def eval_retriever()

in paq/evaluation/eval_retriever.py [0:0]


def eval_retriever(refs, preds, hits_at_k):
    for k in hits_at_k:
        scores = []
        dont_print = False
        for r, p in zip(refs, preds):
            if hits_at_k[-1] > len(p['retrieved_qas']):
                print(f'Skipping hits@{K} eval as {K} is larger than number of retrieved results')
                dont_print = True
            ref_answers = r['answer']
            em = any([
                metric_max_over_ground_truths(exact_match_score, pred_answer['answer'][0], ref_answers)
                for pred_answer in p['retrieved_qas'][:k]
            ])
            scores.append(em)

        if not dont_print:
            print(f'{k}: {100 * sum(scores) / len(scores):0.1f}% \n({sum(scores)} / {len(scores)})')