def _convert_saint2shadow()

in para_graph_sampler/graph_engine/frontend/data_converter.py [0:0]


def _convert_saint2shadow(data_meta, dir_shadow: str, dir_saint: str) -> None:
    print(f"Preparing shaDow-GNN dataset from GraphSAINT format")
    adj_full = sp.load_npz(dir_saint.format('adj_full.npz'))
    dtype = get_adj_dtype(adj=adj_full)
    # adj_full.npz -> adj_full_raw.npz
    if adj_full.data.min() == adj_full.data.max() == 1.:
        adj_f_data = np.broadcast_to(np.ones(1, dtype=np.bool), adj_full.data.size)
    else:
        adj_f_data = adj_full.data.astype(np.float32, copy=False)
    adj_f_indptr = adj_full.indptr
    adj_f_indices = adj_full.indices
    adj_ = sp.csr_matrix((adj_f_data, adj_f_indices, adj_f_indptr), shape=adj_full.shape)
    adj_.indptr = adj_.indptr.astype(dtype, copy=False)
    adj_.indices = adj_.indices.astype(dtype, copy=False)
    sp.save_npz(dir_shadow.format('adj_full_raw.npz'), adj_)
    # adj_train.npz -> adj_train_raw.npz
    adj_train = sp.load_npz(dir_saint.format('adj_train.npz'))
    if adj_train.data.min() == adj_train.data.max() == 1:
        adj_t_data = np.broadcast_to(np.ones(1, dtype=np.bool), adj_train.data.size)
    else:
        adj_t_data = adj_train.data.astype(np.float32, copy=False)
    adj_t_indptr = adj_train.indptr
    adj_t_indices = adj_train.indices
    adj_ = sp.csr_matrix((adj_t_data, adj_t_indices, adj_t_indptr), shape=adj_train.shape)
    adj_.indptr = adj_.indptr.astype(dtype, copy=False)
    adj_.indices = adj_.indices.astype(dtype, copy=False)
    sp.save_npz(dir_shadow.format('adj_train_raw.npz'), adj_)
    # role.json -> split.npy
    with open(dir_saint.format('role.json')) as fr:
        role = json.load(fr)
    np.save(
        dir_shadow.format('split.npy'), 
        {
            TRAIN: np.asarray(role['tr'], dtype=dtype),
            VALID: np.asarray(role['va'], dtype=dtype),
            TEST : np.asarray(role['te'], dtype=dtype)
        }
    )    
    # class_map.json -> label_full.npy
    with open(dir_saint.format('class_map.json')) as fc:
        class_map = json.load(fc)
    class_map = {int(k): v for k, v in class_map.items()}
    num_nodes = adj_full.shape[0]
    class_val_0 = next(iter(class_map.values()))
    if isinstance(class_val_0, list):
        num_classes = len(class_val_0)
        label_full = np.zeros((num_nodes, num_classes), dtype=np.bool)
        for k, v in class_map.items():
            label_full[k] = v
    else:       # class label is represented as an int
        num_classes = max(class_map.values()) - min(class_map.values()) + 1
        label_full = np.zeros((num_nodes, num_classes), dtype=np.bool)
        offset = min(class_map.values())
        idx0 = np.asarray(list(class_map.keys()))
        idx1 = np.asarray(list(class_map.values())) - offset
        label_full[idx0, idx1] = 1
    np.save(dir_shadow.format('label_full.npy'), label_full)
    # feats.npy -> feat_full.npy
    feats = np.load(dir_saint.format('feats.npy'))
    np.save(dir_shadow.format('feat_full.npy'), feats.astype(np.float32, copy=False))
    print(f"Successfully saved shaDow-GNN dataset into {'/'.join(dir_shadow.split('/')[:-1])}")