in ms_marco_eval.py [0:0]
def compute_metrics(qids_to_relevant_passageids, qids_to_ranked_candidate_passages):
"""Compute MRR metric
Args:
p_qids_to_relevant_passageids (dict): dictionary of query-passage mapping
Dict as read in with load_reference or load_reference_from_stream
p_qids_to_ranked_candidate_passages (dict): dictionary of query-passage candidates
Returns:
dict: dictionary of metrics {'MRR': <MRR Score>}
"""
all_scores = {}
MRR = 0
qids_with_relevant_passages = 0
ranking = []
for qid in qids_to_ranked_candidate_passages:
if qid in qids_to_relevant_passageids:
ranking.append(0)
target_pid = qids_to_relevant_passageids[qid]
candidate_pid = qids_to_ranked_candidate_passages[qid]
for i in range(0,MaxMRRRank):
if candidate_pid[i] in target_pid:
MRR += 1/(i + 1)
ranking.pop()
ranking.append(i+1)
break
if len(ranking) == 0:
raise IOError("No matching QIDs found. Are you sure you are scoring the evaluation set?")
MRR = MRR/len(qids_to_relevant_passageids)
all_scores['MRR @10'] = MRR
all_scores['QueriesRanked'] = len(qids_to_ranked_candidate_passages)
return all_scores