in src/sagemaker/FD_SL_DGL/gnn_fraud_detection_dgl/data.py [0:0]
def read_edges(edges, nodes=None):
"""
Read edges and node features
:param edges: path to comma separated file containing all edges
:param nodes: path to comma separated file containing all nodes + features
:return: (list, list, list, dict) sources, sinks, features and id_to_node dictionary containing mappings
from node names(id) to dgl node indices
"""
node_pointer = 0
id_to_node = {}
features = []
sources, sinks = [], []
if nodes is not None:
with open(nodes, "r") as fh:
for line in fh:
node_feats = line.strip().split(",")
node_id = node_feats[0]
if node_id not in id_to_node:
id_to_node[node_id] = node_pointer
node_pointer += 1
if len(node_feats) > 1:
feats = np.array(list(map(float, node_feats[1:])))
features.append(feats)
with open(edges, "r") as fh:
for line in fh:
source, sink = line.strip().split(",")
sources.append(id_to_node[source])
sinks.append(id_to_node[sink])
else:
with open(edges, "r") as fh:
for line in fh:
source, sink = line.strip().split(",")
if source not in id_to_node:
id_to_node[source] = node_pointer
node_pointer += 1
if sink not in id_to_node:
id_to_node[sink] = node_pointer
node_pointer += 1
sources.append(id_to_node[source])
sinks.append(id_to_node[sink])
return sources, sinks, features, id_to_node