def find_longest_match()

in dataset-construction/src/ndb_data/generation/map_kelm.py [0:0]


def find_longest_match(searches, search_key, restrict_relation=False):
    search_toks = []
    for s in searches:
        search_toks.append(s[1])

    ents = wikidata.find_custom(search_key, search_toks)
    highest_query_index = None

    n_count = defaultdict(set)
    for result in ents:

        if restrict_relation and result["wikidata_id"].strip()[0] != "P":
            continue
        elif not restrict_relation and result["wikidata_id"].strip()[0] != "Q":
            continue

        if "." in search_key:
            first, second = search_key.split(".", maxsplit=1)

            for nested in result[first]:
                try:
                    query_index = search_toks.index(nested[second])

                    n_count[nested[second]].add(result["wikidata_id"])

                    if highest_query_index is None or highest_query_index < query_index:
                        highest_query_index = query_index
                        # ent_id = result["wikidata_id"]
                except ValueError:
                    pass
        else:
            query_index = search_toks.index(result[search_key])
            n_count[result[search_key]].add(result["wikidata_id"])

            if highest_query_index is None or highest_query_index < query_index:
                highest_query_index = query_index
                # ent_id = result["wikidata_id"]

    return (
        search_toks[highest_query_index] if highest_query_index is not None else None,
        list(n_count[search_toks[highest_query_index]])
        if highest_query_index is not None
        else None,
        highest_query_index if highest_query_index is not None else -1,
    )