in paq/evaluation/eval_retriever.py [0:0]
def eval_retriever(refs, preds, hits_at_k):
for k in hits_at_k:
scores = []
dont_print = False
for r, p in zip(refs, preds):
if hits_at_k[-1] > len(p['retrieved_qas']):
print(f'Skipping hits@{K} eval as {K} is larger than number of retrieved results')
dont_print = True
ref_answers = r['answer']
em = any([
metric_max_over_ground_truths(exact_match_score, pred_answer['answer'][0], ref_answers)
for pred_answer in p['retrieved_qas'][:k]
])
scores.append(em)
if not dont_print:
print(f'{k}: {100 * sum(scores) / len(scores):0.1f}% \n({sum(scores)} / {len(scores)})')