def util_partition_adj_coo_hilbert()

in python/featgraph/util/adj_partitioning.py [0:0]


def util_partition_adj_coo_hilbert(adj_scipy_coo, hilbert_depth):
    num_rows = adj_scipy_coo.shape[0]
    num_cols = adj_scipy_coo.shape[1]
    adj_row_indices = adj_scipy_coo.row
    adj_col_indices = adj_scipy_coo.col
    nnz = adj_row_indices.shape[0]
    assert adj_col_indices.shape[0] == nnz, "length of adj_row_indices and that of adj_col_indices do not match"

    num_row_partitions = pow(2, hilbert_depth)
    num_col_partitions = pow(2, hilbert_depth)

    # Give each edge an id to record the graph traversal order
    adj_scipy_coo.data = np.arange(1, 1 + nnz, dtype='int32')
    # COO matrix is not subscriptable; we need CSR to do partitioning
    adj_scipy_csr = adj_scipy_coo.tocsr()
    edge_id_list_after_partition = np.zeros(shape=(nnz), dtype=adj_scipy_coo.data.dtype)
    adj_row_indices_after_partition = np.zeros(shape=(nnz), dtype=adj_row_indices.dtype)
    adj_col_indices_after_partition = np.zeros(shape=(nnz), dtype=adj_col_indices.dtype)

    hilbert_curve = HilbertCurve(hilbert_depth, 2)

    num_rows_per_partition = (num_rows + num_row_partitions - 1) // num_row_partitions
    num_cols_per_partition = (num_cols + num_col_partitions - 1) // num_col_partitions
    counter = 0
    for partition_idx in range(num_row_partitions * num_col_partitions):
        row_idx, col_idx = hilbert_curve.coordinates_from_distance(partition_idx)
        if row_idx < num_row_partitions - 1 and col_idx < num_col_partitions - 1:
            adj_partition_scipy_csr = adj_scipy_csr[row_idx*num_rows_per_partition:(row_idx+1)*num_rows_per_partition, \
                col_idx*num_cols_per_partition:(col_idx+1)*num_cols_per_partition]
        elif row_idx < num_row_partitions - 1 and col_idx == num_col_partitions - 1:
            adj_partition_scipy_csr = adj_scipy_csr[row_idx*num_rows_per_partition:(row_idx+1)*num_rows_per_partition, \
                col_idx*num_cols_per_partition::]
        elif row_idx == num_row_partitions - 1 and col_idx < num_col_partitions - 1:
            adj_partition_scipy_csr = adj_scipy_csr[row_idx*num_rows_per_partition::, \
                col_idx*num_cols_per_partition:(col_idx+1)*num_cols_per_partition]
        elif row_idx == num_row_partitions - 1 and col_idx == num_col_partitions - 1:
            adj_partition_scipy_csr = adj_scipy_csr[row_idx*num_rows_per_partition::, \
                col_idx*num_cols_per_partition::]
        else:
            raise RuntimeError("no condition is satisfied")
        adj_partition_scipy_coo = adj_partition_scipy_csr.tocoo()
        nnz_in_this_partition = adj_partition_scipy_coo.nnz
        edge_id_list_after_partition[counter:(counter+nnz_in_this_partition)] = adj_partition_scipy_coo.data
        adj_row_indices_after_partition[counter:(counter+nnz_in_this_partition)] = \
            adj_partition_scipy_coo.row + row_idx * num_rows_per_partition
        adj_col_indices_after_partition[counter:(counter+nnz_in_this_partition)] = \
            adj_partition_scipy_coo.col + col_idx * num_cols_per_partition
        counter += nnz_in_this_partition

    return edge_id_list_after_partition, adj_row_indices_after_partition, adj_col_indices_after_partition