def get_data_for_key()

in blink/candidate_retrieval/data_ingestion.py [0:0]


def get_data_for_key(data, title):
    obj = {}

    obj["id"] = data[title]["wikipedia_id"]
    obj["title"] = title

    if ("wikidata_info" in data[title]) and (
        data[title]["wikidata_info"]["wikidata_id"] is not None
    ):
        obj["wikidata_id"] = data[title]["wikidata_info"]["wikidata_id"]
    else:
        obj["wikidata_id"] = data[title]["wikidata_id_from_index"]

    description = data[title]["intro_concatenated"]
    obj["desc"] = description

    if "wikidata_info" in data[title]:
        if "description" in data[title]["wikidata_info"]:
            wikidata_description = data[title]["wikidata_info"]["description"]
        else:
            wikidata_description = ""

        if ("aliases" in data[title]["wikidata_info"]) and (
            data[title]["wikidata_info"]["aliases"]
        ) is not None:
            aliases = " ".join(
                [
                    '"{}"'.format(alias)
                    for alias in data[title]["wikidata_info"]["aliases"]
                    if alias not in emoji.UNICODE_EMOJI
                ]
            )
        else:
            aliases = ""
    else:
        aliases = ""
        wikidata_description = ""

    obj["aliases"] = aliases
    obj["wikidata_desc"] = wikidata_description
    obj["num_tokens"] = data[title]["num_tokens"]
    obj["num_incoming_links"] = data[title].get("num_incoming_links", 0)

    if args.add_sentence_data:
        for k in range(0, 10):
            key = "sent_desc_{}".format(k + 1)
            obj[key] = data[title].get(key, "")

    return obj