in kilt/eval_retrieval.py [0:0]
def _get_ids_list(datapoint, rank_keys, verbose=False):
# collect all gold ids
ids_list = []
for output in datapoint["output"]:
current_ids_list = []
if "provenance" in output:
for provenance in output["provenance"]:
if any(rank_key not in provenance for rank_key in rank_keys):
missing = set(rank_keys) - set(
list(provenance.keys())
).intersection(set(rank_keys))
if verbose:
print(
f"WARNING: missing key(s) {missing} in provenance, unable to compute retrieval for those."
)
else:
current_ids_list.append(
"+".join(
[
str(provenance[rank_key]).strip()
for rank_key in rank_keys
]
)
)
ids_list.append(_remove_duplicates(current_ids_list)) # remove duplicates
# consider only unique ids
return ids_list