in src/sagemaker/FD_SL_DGL/code/fd_sl_deployment_entry_point.py [0:0]
def recreate_grpha_data(graph_dict, n_feats, target_id):
"""
From the graph dictionary, build the input graph and node features for model.
:param
graph_dict: a Python dictionary, where key is a tuple containing source type and destination type, like ('target',
'card1'), and the value is a tuple of two Python lists, containing the original ids of source and
destination nodes.
n_feats: a Python dictionary, where key is node type string, and value is another dictionary with node ids as key and
value is a list of 390 dimension floats.
target_id: an id of a node in the graph to be inferred.
:return:
graph: a DGL heterogeneous graph, including reversed edges.
new_n_feats: a Tensor in the order of new id nodes.
new_pred_target_id: an integer for the target node in the new graph
"""
print('------------------ Convert to DLG Graph -------------------')
# --- Step 1: collect all types of nodes together
rel_list = []
node_id_list = {}
for can_etype, src_dst_tuple in graph_dict.items():
src_type, dst_type = can_etype.split('<>')
src_origin, dst_origin = np.array(src_dst_tuple[0]), np.array(src_dst_tuple[1])
rel_list.append(((src_type, dst_type), (src_origin, dst_origin)))
# rel_list.append(((dst_type, dst_type + '<>' + src_type, src_type), (dst_origin, src_origin)))
if node_id_list.get(src_type) is not None:
node_id_list[src_type] = np.append(node_id_list.get(src_type), src_origin)
else:
node_id_list[src_type] = src_origin
if node_id_list.get(dst_type) is not None:
node_id_list[dst_type] = np.append(node_id_list.get(dst_type), dst_origin)
else:
node_id_list[dst_type] = dst_origin
# --- Step 2: for each type of node, unique their IDs and store
node_new_list = {}
for ntype, nid_list in node_id_list.items():
# get new id
nid_old, nid_new = np.unique(nid_list, return_inverse=True)
node_new_list[ntype] = (nid_old, nid_new)
# --- Step 3: map new node IDs to old node IDs
rel_dict = {}
node_type_idx = {}
for rel in rel_list:
src_type, dst_type = rel[0]
src, dst = rel[1]
_, nid_new = node_new_list[src_type]
if node_type_idx.get(src_type) is not None:
src_new = nid_new[node_type_idx.get(src_type):node_type_idx.get(src_type) + src.size]
node_type_idx[src_type] = node_type_idx.get(src_type) + src.size
else:
src_new = nid_new[0: 0 + src.size]
node_type_idx[src_type] = 0 + src.size
_, nid_new = node_new_list[dst_type]
if node_type_idx.get(dst_type) is not None:
dst_new = nid_new[node_type_idx.get(dst_type):node_type_idx.get(dst_type) + dst.size]
node_type_idx[dst_type] = node_type_idx.get(dst_type) + dst.size
else:
dst_new = nid_new[0: 0 + dst.size]
node_type_idx[dst_type] = 0 + dst.size
rel_dict[(src_type, src_type + '<>' + dst_type, dst_type)] = (th.from_numpy(src_new), th.from_numpy(dst_new))
rel_dict[(dst_type, dst_type + '<>' + src_type, src_type)] = (th.from_numpy(dst_new), th.from_numpy(src_new))
# Add target self-loop
target_nid_old = node_new_list['target'][0]
target_nid_new = np.arange(target_nid_old.shape[0])
rel_dict[('target', 'self_relation', 'target')] = (th.from_numpy(target_nid_new),
th.from_numpy(target_nid_new))
# Extract the new target node id
new_pred_target_id = th.tensor(np.searchsorted(target_nid_old, target_id)).long()
print("New target node id: {}".format(new_pred_target_id))
# --- Step 4: process n_feats dictionary to get feature tensor
new_n_feats = {}
for in_ntype, in_feat_dict in n_feats.items():
old_ids, _ = node_new_list[in_ntype]
feats = []
for old_id in old_ids:
feats.append(in_feat_dict[str(old_id)])
if in_ntype == 'target':
global TARGET_FEAT_MEAN, TARGET_FEAT_STD
np_feats = np.array(feats).astype(np.float32)
th_feat = th.from_numpy(np_feats)
norm_feat = (th_feat - TARGET_FEAT_MEAN) / TARGET_FEAT_STD
new_n_feats[in_ntype] = norm_feat
else:
new_n_feats[in_ntype] = th.Tensor(feats)
# --- Step 5: build DGL graph
graph = dgl.heterograph(rel_dict)
print(graph)
return graph, new_n_feats, new_pred_target_id