in graphml2csv/graphml2csv.py [0:0]
def graphml_to_csv(self, fname, delimiter, encoding):
outfname_prefix = os.path.splitext(fname)[0]
with open(fname, 'r') as f:
# Initialize headers and dictionary
vtx_header = []
vtx_dict = {}
edge_header = []
edge_dict = {}
# Add the Neptune CSV Edge Headers
vtx_header.append("~id")
vtx_header.append("~label")
# Add the Neptune CSV Edge Headers
edge_header.append("~id")
edge_header.append("~from")
edge_header.append("~to")
edge_header.append("~label")
with open(outfname_prefix+'-nodes.csv', 'w') as node_csvfile, open(outfname_prefix+'-edges.csv', 'w') as edge_csvfile:
# Initialize these after we've read the header.
node_writer = None
edge_writer = None
edge_cnt = 0
edge_attr_cnt = 0
node_cnt = 0
node_attr_cnt = 0
for event, elem in etree.iterparse(f, events=('start', 'end')):
if event == 'start':
# Extract the node and edge CSV headers
# Write the header for the CSV files when we see the graph element
if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('graph'):
node_writer = csv.DictWriter(
node_csvfile, fieldnames=vtx_header, restval='', delimiter=delimiter)
node_writer.writeheader()
edge_writer = csv.DictWriter(
edge_csvfile, fieldnames=edge_header, restval='', delimiter=delimiter)
edge_writer.writeheader()
if event == 'end':
if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('key'):
# Assume the labelV is the vertex label, if specified
if elem.attrib['id'] != 'labelV' and elem.attrib['id'] != 'labelE':
if not 'for' in elem.attrib or elem.attrib['for'] == 'node':
vtx_dict[elem.attrib['id']] = elem.attrib['id'] + \
":"+elem.attrib['attr.type']
vtx_header.append(
elem.attrib['id']+":"+elem.attrib['attr.type'])
if not 'for' in elem.attrib or elem.attrib['for'] == 'edge':
edge_dict[elem.attrib['id']] = elem.attrib['id'] + \
":"+elem.attrib['attr.type']
edge_header.append(
elem.attrib['id']+":"+elem.attrib['attr.type'])
elem.clear()
if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('node'):
node_cnt += 1
node_d = {}
if 'id' in elem.attrib:
node_d["~id"] = elem.attrib['id']
else:
# If the optional ID is not present, use the node count
node_d["~id"] = node_cnt
has_label = None
for data in elem:
att_val = GraphML2CSV.py_compat_str(encoding,
data.attrib.get('key'))
if att_val == "labelV":
node_d["~label"] = GraphML2CSV.py_compat_str(encoding,
data.text)
has_label = True
else:
node_d[vtx_dict[att_val]] = GraphML2CSV.py_compat_str(encoding,
data.text)
node_attr_cnt += 1
if not has_label:
# Use node as the label if it is unspecified
node_d["~label"] = "node"
node_writer.writerow(node_d)
elem.clear()
if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('edge'):
edge_cnt += 1
edge_d = {}
has_label = None
source = elem.attrib['source']
dest = elem.attrib['target']
# Neptune CSV header values
# source/target attributes refer to IDs: http://graphml.graphdrawing.org/xmlns/1.1/graphml-structure.xsd
id = source + '_' + dest # If the optional ID is not present, use the source_dest
if 'id' in elem.attrib:
id = elem.attrib['id']
edge_d["~id"] = id
edge_d["~from"] = source
edge_d["~to"] = dest
for data in elem:
att_val = GraphML2CSV.py_compat_str(encoding,
data.attrib.get('key'))
if att_val == "labelE":
edge_d["~label"] = GraphML2CSV.py_compat_str(encoding,
data.text)
has_label = True
else:
edge_d[edge_dict[att_val]] = GraphML2CSV.py_compat_str(encoding,
data.text)
edge_attr_cnt += 1
if not has_label:
# Use edge as the label if it is unspecified
edge_d["~label"] = "edge"
edge_writer.writerow(edge_d)
elem.clear()
sys.stderr.write("Wrote %d nodes and %d attributes to %s.\n" % (
node_cnt, node_attr_cnt, outfname_prefix+'-nodes.csv'))
sys.stderr.write("Wrote %d edges and %d attributes to %s.\n" % (
edge_cnt, edge_attr_cnt, outfname_prefix+'-edges.csv'))
return