def process_json()

in minihack/wiki.py [0:0]


def process_json(wiki_json: List[dict], ignore_inpage_anchors) -> dict:
    """Process a list of json pages of the wiki into one dict of all pages."""
    result: dict = {}
    redirects = {}
    result["_global_counts"] = defaultdict(int)

    def href_normalise(x: str):
        result = unquote(x.lower())
        if ignore_inpage_anchors:
            result = result.split("#")[0]
        return result.replace("_", " ")

    for page in wiki_json:
        relevant_page_info = dict(
            title=page["wikipedia_title"].lower(),
            length=len("".join(page["text"])),
            categories=page["categories"].split(","),
            raw_text="".join(page["text"]),
            text=clean_page_text(page["page_data"]),
        )
        # noqa: E731
        relevant_page_info["anchors"] = [
            dict(
                text=anchor["text"].lower(),
                page=href_normalise(anchor.get("title", anchor.get("href"))),
                start=anchor["start"],
            )
            for anchor in page["anchors"]
        ]
        redirect_anchors = [
            anchor
            for anchor in page["anchors"]
            if anchor.get("title")
            and href_normalise(anchor["href"])
            != href_normalise(anchor["title"])
        ]
        redirects.update(
            {
                href_normalise(anchor["href"]): href_normalise(anchor["title"])
                for anchor in redirect_anchors
            }
        )
        unique_anchors: dict = defaultdict(int)
        for anchor in relevant_page_info["anchors"]:
            unique_anchors[anchor["page"]] += 1
            result["_global_counts"][anchor["page"]] += 1
        relevant_page_info["unique_anchors"] = dict(unique_anchors)
        result[relevant_page_info["title"]] = relevant_page_info
    for alias, page in redirects.items():
        result[alias] = result[page]
    return result