in drop_eval.py [0:0]
def drop_metric(sample: str, reference: list[str]) -> Tuple[float, float]:
em_scores = []
f1_scores = []
for answer in reference:
if answer.strip() != "":
em, f1 = get_drop_metrics(sample, answer)
em_scores.append(em)
f1_scores.append(f1)
return (max(em_scores), max(f1_scores))