in kilt/eval_retrieval.py [0:0]
def get_ranking_metrics(guess_item, gold_item, ks, rank_keys):
Rprec = 0.0
P_at_k = {"precision@{}".format(k): 0 for k in sorted(ks) if k > 0}
R_at_k = {"recall@{}".format(k): 0 for k in sorted(ks) if k > 1}
S_at_k = {"success_rate@{}".format(k): 0 for k in sorted(ks) if k > 1}
A_at_k = {"answer_in_context@{}".format(k): 0 for k in sorted(ks) if k > 0}
AE_at_k = {"answer_and_ent_in_context@{}".format(k): 0 for k in sorted(ks) if k > 0}
assert (
"output" in guess_item and len(guess_item["output"]) == 1
), f"guess should provide exactly one output for {guess_item['id']}"
Rprec = rprecision(guess_item, gold_item, rank_keys=rank_keys)
eii = entity_in_input(gold_item)
for k in ks:
# 0. get rank
rank, num_distinct_evidence_sets = get_rank(
guess_item, gold_item, k, rank_keys=rank_keys
)
if num_distinct_evidence_sets > 0:
# 1. precision
P_at_k["precision@{}".format(k)] = _precision_at_k(rank, k)
# 2. recall
R_at_k["recall@{}".format(k)] = _recall_at_k(
rank, num_distinct_evidence_sets, k
)
# 3. success rate
S_at_k["success_rate@{}".format(k)] = _success_rate_at_k(rank, k)
# 4. answer in context
A_at_k["answer_in_context@{}".format(k)] = _answer_in_context_at_k(
guess_item, gold_item, k
)
AE_at_k[
"answer_and_ent_in_context@{}".format(k)
] = _answer_and_ent_in_context_at_k(guess_item, gold_item, k)
return {
"Rprec": Rprec,
**P_at_k,
**R_at_k,
**S_at_k,
**A_at_k,
**AE_at_k,
"entity_in_input": eii,
}