in para_graph_sampler/graph_engine/frontend/data_converter.py [0:0]
def _convert_saint2shadow(data_meta, dir_shadow: str, dir_saint: str) -> None:
print(f"Preparing shaDow-GNN dataset from GraphSAINT format")
adj_full = sp.load_npz(dir_saint.format('adj_full.npz'))
dtype = get_adj_dtype(adj=adj_full)
# adj_full.npz -> adj_full_raw.npz
if adj_full.data.min() == adj_full.data.max() == 1.:
adj_f_data = np.broadcast_to(np.ones(1, dtype=np.bool), adj_full.data.size)
else:
adj_f_data = adj_full.data.astype(np.float32, copy=False)
adj_f_indptr = adj_full.indptr
adj_f_indices = adj_full.indices
adj_ = sp.csr_matrix((adj_f_data, adj_f_indices, adj_f_indptr), shape=adj_full.shape)
adj_.indptr = adj_.indptr.astype(dtype, copy=False)
adj_.indices = adj_.indices.astype(dtype, copy=False)
sp.save_npz(dir_shadow.format('adj_full_raw.npz'), adj_)
# adj_train.npz -> adj_train_raw.npz
adj_train = sp.load_npz(dir_saint.format('adj_train.npz'))
if adj_train.data.min() == adj_train.data.max() == 1:
adj_t_data = np.broadcast_to(np.ones(1, dtype=np.bool), adj_train.data.size)
else:
adj_t_data = adj_train.data.astype(np.float32, copy=False)
adj_t_indptr = adj_train.indptr
adj_t_indices = adj_train.indices
adj_ = sp.csr_matrix((adj_t_data, adj_t_indices, adj_t_indptr), shape=adj_train.shape)
adj_.indptr = adj_.indptr.astype(dtype, copy=False)
adj_.indices = adj_.indices.astype(dtype, copy=False)
sp.save_npz(dir_shadow.format('adj_train_raw.npz'), adj_)
# role.json -> split.npy
with open(dir_saint.format('role.json')) as fr:
role = json.load(fr)
np.save(
dir_shadow.format('split.npy'),
{
TRAIN: np.asarray(role['tr'], dtype=dtype),
VALID: np.asarray(role['va'], dtype=dtype),
TEST : np.asarray(role['te'], dtype=dtype)
}
)
# class_map.json -> label_full.npy
with open(dir_saint.format('class_map.json')) as fc:
class_map = json.load(fc)
class_map = {int(k): v for k, v in class_map.items()}
num_nodes = adj_full.shape[0]
class_val_0 = next(iter(class_map.values()))
if isinstance(class_val_0, list):
num_classes = len(class_val_0)
label_full = np.zeros((num_nodes, num_classes), dtype=np.bool)
for k, v in class_map.items():
label_full[k] = v
else: # class label is represented as an int
num_classes = max(class_map.values()) - min(class_map.values()) + 1
label_full = np.zeros((num_nodes, num_classes), dtype=np.bool)
offset = min(class_map.values())
idx0 = np.asarray(list(class_map.keys()))
idx1 = np.asarray(list(class_map.values())) - offset
label_full[idx0, idx1] = 1
np.save(dir_shadow.format('label_full.npy'), label_full)
# feats.npy -> feat_full.npy
feats = np.load(dir_saint.format('feats.npy'))
np.save(dir_shadow.format('feat_full.npy'), feats.astype(np.float32, copy=False))
print(f"Successfully saved shaDow-GNN dataset into {'/'.join(dir_shadow.split('/')[:-1])}")