in source/sagemaker/sagemaker_graph_fraud_detection/dgl_fraud_detection/data.py [0:0]
def get_features(id_to_node, node_features):
"""
:param id_to_node: dictionary mapping node names(id) to dgl node idx
:param node_features: path to file containing node features
:return: (np.ndarray, list) node feature matrix in order and new nodes not yet in the graph
"""
indices, features, new_nodes = [], [], []
max_node = max(id_to_node.values())
with open(node_features, "r") as fh:
for line in fh:
node_feats = line.strip().split(",")
node_id = node_feats[0]
feats = np.array(list(map(float, node_feats[1:])))
features.append(feats)
if node_id not in id_to_node:
max_node += 1
id_to_node[node_id] = max_node
new_nodes.append(max_node)
indices.append(id_to_node[node_id])
features = np.array(features).astype('float32')
features = features[np.argsort(indices), :]
return features, new_nodes