def convert()

in assets/lambda_helper_neptune/python/rdflib/tools/csv2rdf.py [0:0]


    def convert(self, csvreader):

        start = time.time()

        if self.OUT:
            sys.stderr.write("Output to %s\n" % self.OUT.name)

        if self.IDENT != "auto" and not isinstance(self.IDENT, tuple):
            self.IDENT = (self.IDENT,)

        if not self.BASE:
            warnings.warn("No base given, using http://example.org/instances/")
            self.BASE = rdflib.Namespace("http://example.org/instances/")

        if not self.PROPBASE:
            warnings.warn(
                "No property base given, using http://example.org/property/")
            self.PROPBASE = rdflib.Namespace("http://example.org/props/")

        # skip lines at the start
        for x in range(self.SKIP):
            next(csvreader)

        # read header line
        header_labels = list(next(csvreader))
        headers = dict(
            enumerate([self.PROPBASE[toProperty(x)] for x in header_labels]))
        # override header properties if some are given
        for k, v in self.PROPS.items():
            headers[k] = v
            header_labels[k] = split_uri(v)[1]

        if self.DEFINECLASS:
            # output class/property definitions
            self.triple(self.CLASS, RDF.type, RDFS.Class)
            for i in range(len(headers)):
                h, l = headers[i], header_labels[i]
                if h == "" or l == "":
                    continue
                if self.COLUMNS.get(i) == _config_ignore:
                    continue
                self.triple(h, RDF.type, RDF.Property)
                self.triple(h, RDFS.label, rdflib.Literal(toPropertyLabel(l)))
                self.triple(h, RDFS.domain, self.CLASS)
                self.triple(h, RDFS.range,
                            self.COLUMNS.get(i, default_node_make).range())

        rows = 0
        for l in csvreader:
            try:
                if self.IDENT == 'auto':
                    uri = self.BASE["%d" % rows]
                else:
                    uri = self.BASE["_".join([urllib.parse.quote(x.encode(
                        "utf8").replace(" ", "_"), safe="")
                        for x in index(l, self.IDENT)])]

                if self.LABEL:
                    self.triple(uri, RDFS.label, rdflib.Literal(
                        " ".join(index(l, self.LABEL))))

                if self.CLASS:
                    # type triple
                    self.triple(uri, RDF.type, self.CLASS)

                for i, x in enumerate(l):
                    x = x.strip()
                    if x != '':
                        if self.COLUMNS.get(i) == _config_ignore:
                            continue
                        try:
                            o = self.COLUMNS.get(i, rdflib.Literal)(x)
                            if isinstance(o, list):
                                for _o in o:
                                    self.triple(uri, headers[i], _o)
                            else:
                                self.triple(uri, headers[i], o)

                        except Exception as e:
                            warnings.warn(
                                "Could not process value for column " +
                                "%d:%s in row %d, ignoring: %s " % (
                                i, headers[i], rows, e.message))

                rows += 1
                if rows % 100000 == 0:
                    sys.stderr.write(
                        "%d rows, %d triples, elapsed %.2fs.\n" % (
                        rows, self.triples, time.time() - start))
            except:
                sys.stderr.write("Error processing line: %d\n" % rows)
                raise

        # output types/labels for generated URIs
        classes = set()
        for l, x in uris.items():
            u, c = x
            self.triple(u, RDFS.label, rdflib.Literal(l))
            if c:
                c = rdflib.URIRef(c)
                classes.add(c)
                self.triple(u, RDF.type, c)

        for c in classes:
            self.triple(c, RDF.type, RDFS.Class)

        self.OUT.close()
        sys.stderr.write(
            "Converted %d rows into %d triples.\n" % (rows, self.triples))
        sys.stderr.write("Took %.2f seconds.\n" % (time.time() - start))