def get_raw_scores()

in leaderboard/squad_eval_v2.py [0:0]


def get_raw_scores(dataset, preds) -> Tuple[Dict[str, float], Dict[str, float]]:
    exact_scores: Dict[str, float] = {}
    f1_scores: Dict[str, float] = {}
    for article in dataset:
        for p in article["paragraphs"]:
            for qa in p["qas"]:
                qid = qa["id"]
                gold_answers = [a["text"] for a in qa["answers"]]
                if not gold_answers:
                    # For unanswerable questions, only correct answer is empty string
                    gold_answers = [""]
                if qid not in preds:
                    print("Missing prediction for %s" % qid)
                    continue
                a_pred = preds[qid]
                # Take max over all gold answers
                exact_scores[qid] = max(compute_exact(a, a_pred) for a in gold_answers)
                f1_scores[qid] = max(compute_f1(a, a_pred) for a in gold_answers)
    return exact_scores, f1_scores