in dataset-construction/src/ndb_data/generation/map_kelm.py [0:0]
def find_longest_match(searches, search_key, restrict_relation=False):
search_toks = []
for s in searches:
search_toks.append(s[1])
ents = wikidata.find_custom(search_key, search_toks)
highest_query_index = None
n_count = defaultdict(set)
for result in ents:
if restrict_relation and result["wikidata_id"].strip()[0] != "P":
continue
elif not restrict_relation and result["wikidata_id"].strip()[0] != "Q":
continue
if "." in search_key:
first, second = search_key.split(".", maxsplit=1)
for nested in result[first]:
try:
query_index = search_toks.index(nested[second])
n_count[nested[second]].add(result["wikidata_id"])
if highest_query_index is None or highest_query_index < query_index:
highest_query_index = query_index
# ent_id = result["wikidata_id"]
except ValueError:
pass
else:
query_index = search_toks.index(result[search_key])
n_count[result[search_key]].add(result["wikidata_id"])
if highest_query_index is None or highest_query_index < query_index:
highest_query_index = query_index
# ent_id = result["wikidata_id"]
return (
search_toks[highest_query_index] if highest_query_index is not None else None,
list(n_count[search_toks[highest_query_index]])
if highest_query_index is not None
else None,
highest_query_index if highest_query_index is not None else -1,
)