def find_closest_span_match()

in utils/span_heuristic.py [0:0]


def find_closest_span_match(passage: str, gold_answer: str) -> Tuple[str, float]:
    """Heuristic for finding the closest span in a passage relative to some golden answer based on F1 score."""
    closest_encompassing_span, closest_encompassing_span_score = _find_approximate_matching_sequence(passage, gold_answer)
    closest_encompassing_span_tok = closest_encompassing_span.split()
    gold_answer_tok = gold_answer.split()
    closest_encompassing_span_tok_normalized = _normalize_tokens(closest_encompassing_span_tok)
    gold_answer_tok_normalized = _normalize_tokens(gold_answer_tok, keep_empty_str=False)

    best_span, best_score, best_i, best_j = '', 0, None, None
    for i in range(0, len(closest_encompassing_span_tok_normalized)):
        for j in range(i + 1, len(closest_encompassing_span_tok_normalized) + 1):
            score = compute_f1_from_tokens(
                gold_answer_tok_normalized,
                [t for t in closest_encompassing_span_tok_normalized[i:j] if len(t)],
            )
            if score > best_score:
                best_score = score
                best_i, best_j = i, j

    best_span = ' '.join(closest_encompassing_span_tok[best_i:best_j])
    best_f1 = compute_f1(gold_answer, best_span)
    return best_span, best_f1