def recreate_grpha

def recreate_grpha_data()

in src/sagemaker/FD_SL_DGL/code/fd_sl_deployment_entry_point.py [0:0]
64 lines of code
13 McCabe index (conditional complexity)

def recreate_grpha_data(graph_dict, n_feats, target_id):
    """
    From the graph dictionary, build the input graph and node features for model.

    :param
    graph_dict: a Python dictionary, where key is a tuple containing source type and destination type, like ('target',
                'card1'), and the value is a tuple of two Python lists, containing the original ids of source and
                destination nodes.
    n_feats: a Python dictionary, where key is node type string, and value is another dictionary with node ids as key and
             value is a list of 390 dimension floats.
    target_id: an id of a node in the graph to be inferred.

    :return:
    graph: a DGL heterogeneous graph, including reversed edges.

    new_n_feats: a Tensor in the order of new id nodes.

    new_pred_target_id: an integer for the target node in the new graph

    """
    print('------------------ Convert to DLG Graph -------------------')
    # --- Step 1: collect all types of nodes together
    rel_list = []
    node_id_list = {}
    for can_etype, src_dst_tuple in graph_dict.items():

        src_type, dst_type = can_etype.split('<>')
        src_origin, dst_origin = np.array(src_dst_tuple[0]), np.array(src_dst_tuple[1])

        rel_list.append(((src_type, dst_type), (src_origin, dst_origin)))
        # rel_list.append(((dst_type, dst_type + '<>' + src_type, src_type), (dst_origin, src_origin)))

        if node_id_list.get(src_type) is not None:
            node_id_list[src_type] = np.append(node_id_list.get(src_type), src_origin)
        else:
            node_id_list[src_type] = src_origin

        if node_id_list.get(dst_type) is not None:
            node_id_list[dst_type] = np.append(node_id_list.get(dst_type), dst_origin)
        else:
            node_id_list[dst_type] = dst_origin

    # --- Step 2: for each type of node, unique their IDs and store
    node_new_list = {}
    for ntype, nid_list in node_id_list.items():
        # get new id
        nid_old, nid_new = np.unique(nid_list, return_inverse=True)
        node_new_list[ntype] = (nid_old, nid_new)

    # ---  Step 3: map new node IDs to old node IDs
    rel_dict = {}
    node_type_idx = {}
    for rel in rel_list:
        src_type, dst_type = rel[0]
        src, dst = rel[1]

        _, nid_new = node_new_list[src_type]
        if node_type_idx.get(src_type) is not None:
            src_new = nid_new[node_type_idx.get(src_type):node_type_idx.get(src_type) + src.size]
            node_type_idx[src_type] = node_type_idx.get(src_type) + src.size
        else:
            src_new = nid_new[0: 0 + src.size]
            node_type_idx[src_type] = 0 + src.size

        _, nid_new = node_new_list[dst_type]
        if node_type_idx.get(dst_type) is not None:
            dst_new = nid_new[node_type_idx.get(dst_type):node_type_idx.get(dst_type) + dst.size]
            node_type_idx[dst_type] = node_type_idx.get(dst_type) + dst.size
        else:
            dst_new = nid_new[0: 0 + dst.size]
            node_type_idx[dst_type] = 0 + dst.size

        rel_dict[(src_type, src_type + '<>' + dst_type, dst_type)] = (th.from_numpy(src_new), th.from_numpy(dst_new))
        rel_dict[(dst_type, dst_type + '<>' + src_type, src_type)] = (th.from_numpy(dst_new), th.from_numpy(src_new))

    # Add target self-loop
    target_nid_old = node_new_list['target'][0]
    target_nid_new = np.arange(target_nid_old.shape[0])
    rel_dict[('target', 'self_relation', 'target')] = (th.from_numpy(target_nid_new),
                                                       th.from_numpy(target_nid_new))

    # Extract the new target node id
    new_pred_target_id = th.tensor(np.searchsorted(target_nid_old, target_id)).long()

    print("New target node id: {}".format(new_pred_target_id))

    # --- Step 4: process n_feats dictionary to get feature tensor
    new_n_feats = {}
    for in_ntype, in_feat_dict in n_feats.items():
        old_ids, _ = node_new_list[in_ntype]

        feats = []
        for old_id in old_ids:
            feats.append(in_feat_dict[str(old_id)])

        if in_ntype == 'target':
            global TARGET_FEAT_MEAN, TARGET_FEAT_STD
            np_feats = np.array(feats).astype(np.float32)
            th_feat = th.from_numpy(np_feats)
            norm_feat = (th_feat - TARGET_FEAT_MEAN) / TARGET_FEAT_STD

            new_n_feats[in_ntype] = norm_feat
        else:
            new_n_feats[in_ntype] = th.Tensor(feats)

    # --- Step 5: build DGL graph
    graph = dgl.heterograph(rel_dict)
    print(graph)

    return graph, new_n_feats, new_pred_target_id