scripts_mgenre/preprocess_wikidata.py [92:121]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                line = line.strip()
                if line[-1] == ",":
                    line = line[:-1]

                if line == "[" or line == "]":
                    continue

                line = json.loads(line)
                if line["type"] == "item":

                    if any(
                        e["mainsnak"]["datavalue"]["value"]["id"] in NOPAGE
                        for e in line["claims"].get("P31", {})
                        if "datavalue" in e["mainsnak"]
                    ):
                        continue
                    if any(
                        e["mainsnak"]["datavalue"]["value"]["id"] in NOPAGE
                        for e in line["claims"].get("P279", {})
                        if "datavalue" in e["mainsnak"]
                    ):
                        continue

                    line["sitelinks"] = {
                        k[:-4]: v["title"]
                        for k, v in line["sitelinks"].items()
                        if k.endswith("wiki")
                    }
                    if len(line["sitelinks"]) == 0:
                        continue
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



scripts_mgenre/preprocess_wikidata.py [281:311]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                line = line.strip()
                if line[-1] == ",":
                    line = line[:-1]

                if line == "[" or line == "]":
                    continue

                line = json.loads(line)

                if line["type"] == "item":

                    if any(
                        e["mainsnak"]["datavalue"]["value"]["id"] in NOPAGE
                        for e in line["claims"].get("P31", {})
                        if "datavalue" in e["mainsnak"]
                    ):
                        continue
                    if any(
                        e["mainsnak"]["datavalue"]["value"]["id"] in NOPAGE
                        for e in line["claims"].get("P279", {})
                        if "datavalue" in e["mainsnak"]
                    ):
                        continue

                    line["sitelinks"] = {
                        k[:-4]: v["title"]
                        for k, v in line["sitelinks"].items()
                        if k.endswith("wiki")
                    }
                    if len(line["sitelinks"]) == 0:
                        continue
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



