def main()

in src/extract_wikidata_info.py [0:0]


def main():
    """
    For each Wikidata entity in the Wikidata dump, we extract out it's entity
    type, associated Wikipedia page (used for popularity), all aliases
    for the entity, and popularity of the entity's Wikipedia page, then write
    this information into a compressed JSON file. We write each dictionary of entity
    information in it's own line for easy readability.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-w",
        "--wikidata_dump",
        required=True,
        help="Compressed .json.bz2 Wikidata dump for information extraction",
    )
    parser.add_argument(
        "-p",
        "--popularity_dump",
        required=True,
        help="Compressed .bz2 Wikipedia popularity dump",
    )
    parser.add_argument(
        "-o",
        "--output_file",
        default="wikidata/entity_info.json.gz",
        help="Output compressed JSON file for writing Wikidata entity information.",
    )
    args = parser.parse_args()

    extract_entity_information(
        popularity_dump=args.popularity_dump,
        wikidata_dump=args.wikidata_dump,
        output_file=args.output_file,
    )