def extract_popularities()

in src/extract_wikidata_info.py [0:0]


def extract_popularities(popularity_dump):
    """Iterate through the Wikipedia popularity dump without decompressing
    it, storing each English Wikipedia page's number of page views.

    Args:
        popularity_dump: ``str`` A path to a .BZ2 file containing Wikipedia
        page views for a day.

    Returns:
        wiki_popularity: ``dict`` Maps from a Wikipedia page to the daily
        page view count.
    """
    wiki_popularity = collections.defaultdict(int)
    with bz2.open(popularity_dump, "rt") as bz_file:
        # Each line corresponds to the number of page views for a Wikipedia page
        for line in tqdm.tqdm(bz_file, desc="Loading Wikipedia popularity values"):
            line = line.strip().split()
            # Skip lines w/o right len or Wikipedia pages that aren't in English
            if len(line) == 6 and line[0] == "en.wikipedia":
                wiki_popularity[line[1]] += int(line[4])
    print(f"Found {len(wiki_popularity)} English Wikipedia pages")
    return wiki_popularity