def get_features()

in src/sagemaker/FD_SL_DGL/gnn_fraud_detection_dgl/data.py [0:0]


def get_features(id_to_node, node_feature_files):
    """

    :param id_to_node: dictionary mapping node names(id) to dgl node idx
    :param node_features: path to file containing node features
    :return: (np.ndarray, list) node feature matrix in order and new nodes not yet in the graph
    """
    indices, features, new_nodes = [], [], []
    max_node = max(id_to_node.values())

    for node_file in node_feature_files:
        is_1st_line = True
        with open(node_file, "r") as fh:
            for line in fh:
                # hard-coding to ignore the 1st line of header
                if is_1st_line:
                    is_1st_line = False
                    continue
    
                node_feats = line.strip().split(",")
                node_id = node_feats[0]
                feats = np.array(list(map(float, node_feats[1:])))
                features.append(feats)
                if node_id not in id_to_node:
                    max_node += 1
                    id_to_node[node_id] = max_node
                    new_nodes.append(max_node)
    
                indices.append(id_to_node[node_id])

    features = np.array(features).astype('float32')
    features = features[np.argsort(indices), :]
    return features, new_nodes