in msmarco-v2-vector/track.py [0:0]
def calc_ndcg(qrels: Qrels, results: Results, k_list: list):
import pytrec_eval as pe
scores = defaultdict(float)
metrics = ["ndcg_cut"]
pytrec_strings = {f"{metric}.{','.join([str(k) for k in k_list])}" for metric in metrics}
evaluator = pe.RelevanceEvaluator(qrels, pytrec_strings)
pytrec_scores = evaluator.evaluate(results)
for query_id in pytrec_scores.keys():
for metric in metrics:
for k in k_list:
scores[f"{metric}@{k}"] += pytrec_scores[query_id][f"{metric}_{k}"]
queries_count = len(pytrec_scores.keys())
if queries_count == 0:
return scores
for metric in metrics:
for k in k_list:
scores[f"{metric}@{k}"] = float(scores[f"{metric}@{k}"] / queries_count)
return scores