in src/extract_wikidata_info.py [0:0]
def main():
"""
For each Wikidata entity in the Wikidata dump, we extract out it's entity
type, associated Wikipedia page (used for popularity), all aliases
for the entity, and popularity of the entity's Wikipedia page, then write
this information into a compressed JSON file. We write each dictionary of entity
information in it's own line for easy readability.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"-w",
"--wikidata_dump",
required=True,
help="Compressed .json.bz2 Wikidata dump for information extraction",
)
parser.add_argument(
"-p",
"--popularity_dump",
required=True,
help="Compressed .bz2 Wikipedia popularity dump",
)
parser.add_argument(
"-o",
"--output_file",
default="wikidata/entity_info.json.gz",
help="Output compressed JSON file for writing Wikidata entity information.",
)
args = parser.parse_args()
extract_entity_information(
popularity_dump=args.popularity_dump,
wikidata_dump=args.wikidata_dump,
output_file=args.output_file,
)