in src/sagemaker/FD_SL_DGL/gnn_fraud_detection_dgl/data.py [0:0]
def get_features(id_to_node, node_feature_files):
"""
:param id_to_node: dictionary mapping node names(id) to dgl node idx
:param node_features: path to file containing node features
:return: (np.ndarray, list) node feature matrix in order and new nodes not yet in the graph
"""
indices, features, new_nodes = [], [], []
max_node = max(id_to_node.values())
for node_file in node_feature_files:
is_1st_line = True
with open(node_file, "r") as fh:
for line in fh:
# hard-coding to ignore the 1st line of header
if is_1st_line:
is_1st_line = False
continue
node_feats = line.strip().split(",")
node_id = node_feats[0]
feats = np.array(list(map(float, node_feats[1:])))
features.append(feats)
if node_id not in id_to_node:
max_node += 1
id_to_node[node_id] = max_node
new_nodes.append(max_node)
indices.append(id_to_node[node_id])
features = np.array(features).astype('float32')
features = features[np.argsort(indices), :]
return features, new_nodes