def graphml_to_csv()

in graphml2csv/graphml2csv.py [0:0]


    def graphml_to_csv(self, fname, delimiter, encoding):

        outfname_prefix = os.path.splitext(fname)[0]

        with open(fname, 'r') as f:

            # Initialize headers and dictionary
            vtx_header = []
            vtx_dict = {}
            edge_header = []
            edge_dict = {}

            # Add the Neptune CSV Edge Headers

            vtx_header.append("~id")
            vtx_header.append("~label")

            # Add the Neptune CSV Edge Headers

            edge_header.append("~id")
            edge_header.append("~from")
            edge_header.append("~to")
            edge_header.append("~label")

            with open(outfname_prefix+'-nodes.csv', 'w') as node_csvfile, open(outfname_prefix+'-edges.csv', 'w') as edge_csvfile:

                # Initialize these after we've read the header.
                node_writer = None
                edge_writer = None

                edge_cnt = 0
                edge_attr_cnt = 0

                node_cnt = 0
                node_attr_cnt = 0

                for event, elem in etree.iterparse(f, events=('start', 'end')):

                    if event == 'start':

                        # Extract the node and edge CSV headers
                        # Write the header for the CSV files when we see the graph element
                        if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('graph'):
                            node_writer = csv.DictWriter(
                                node_csvfile, fieldnames=vtx_header, restval='', delimiter=delimiter)
                            node_writer.writeheader()
                            edge_writer = csv.DictWriter(
                                edge_csvfile, fieldnames=edge_header, restval='', delimiter=delimiter)
                            edge_writer.writeheader()

                    if event == 'end':

                        if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('key'):

                            # Assume the labelV is the vertex label, if specified
                            if elem.attrib['id'] != 'labelV' and elem.attrib['id'] != 'labelE':
                                if not 'for' in elem.attrib or elem.attrib['for'] == 'node':
                                    vtx_dict[elem.attrib['id']] = elem.attrib['id'] + \
                                        ":"+elem.attrib['attr.type']
                                    vtx_header.append(
                                        elem.attrib['id']+":"+elem.attrib['attr.type'])

                                if not 'for' in elem.attrib or elem.attrib['for'] == 'edge':
                                    edge_dict[elem.attrib['id']] = elem.attrib['id'] + \
                                        ":"+elem.attrib['attr.type']
                                    edge_header.append(
                                        elem.attrib['id']+":"+elem.attrib['attr.type'])

                            elem.clear()

                        if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('node'):

                            node_cnt += 1
                            node_d = {}

                            if 'id' in elem.attrib:
                                node_d["~id"] = elem.attrib['id']
                            else:
                                # If the optional ID is not present, use the node count
                                node_d["~id"] = node_cnt

                            has_label = None

                            for data in elem:
                                att_val = GraphML2CSV.py_compat_str(encoding,
                                                                    data.attrib.get('key'))

                                if att_val == "labelV":
                                    node_d["~label"] = GraphML2CSV.py_compat_str(encoding,
                                                                                 data.text)
                                    has_label = True
                                else:
                                    node_d[vtx_dict[att_val]] = GraphML2CSV.py_compat_str(encoding,
                                                                                          data.text)
                                node_attr_cnt += 1

                            if not has_label:
                                # Use node as the label if it is unspecified
                                node_d["~label"] = "node"

                            node_writer.writerow(node_d)
                            elem.clear()

                        if GraphML2CSV.graphml_tag(elem.tag) == GraphML2CSV.graphml_tag('edge'):
                            edge_cnt += 1
                            edge_d = {}
                            has_label = None

                            source = elem.attrib['source']
                            dest = elem.attrib['target']
                            # Neptune CSV header values
                            # source/target attributes refer to IDs: http://graphml.graphdrawing.org/xmlns/1.1/graphml-structure.xsd

                            id = source + '_' + dest  # If the optional ID is not present, use the source_dest
                            if 'id' in elem.attrib:
                                id = elem.attrib['id']

                            edge_d["~id"] = id
                            edge_d["~from"] = source
                            edge_d["~to"] = dest

                            for data in elem:
                                att_val = GraphML2CSV.py_compat_str(encoding,
                                                                    data.attrib.get('key'))

                                if att_val == "labelE":
                                    edge_d["~label"] = GraphML2CSV.py_compat_str(encoding,
                                                                                 data.text)
                                    has_label = True
                                else:
                                    edge_d[edge_dict[att_val]] = GraphML2CSV.py_compat_str(encoding,
                                                                                           data.text)
                                edge_attr_cnt += 1

                            if not has_label:
                                # Use edge as the label if it is unspecified
                                edge_d["~label"] = "edge"

                            edge_writer.writerow(edge_d)
                            elem.clear()

        sys.stderr.write("Wrote %d nodes and %d attributes to %s.\n" % (
            node_cnt, node_attr_cnt, outfname_prefix+'-nodes.csv'))
        sys.stderr.write("Wrote %d edges and %d attributes to %s.\n" % (
            edge_cnt, edge_attr_cnt, outfname_prefix+'-edges.csv'))

        return