in utils/span_heuristic.py [0:0]
def find_closest_span_match(passage: str, gold_answer: str) -> Tuple[str, float]:
"""Heuristic for finding the closest span in a passage relative to some golden answer based on F1 score."""
closest_encompassing_span, closest_encompassing_span_score = _find_approximate_matching_sequence(passage, gold_answer)
closest_encompassing_span_tok = closest_encompassing_span.split()
gold_answer_tok = gold_answer.split()
closest_encompassing_span_tok_normalized = _normalize_tokens(closest_encompassing_span_tok)
gold_answer_tok_normalized = _normalize_tokens(gold_answer_tok, keep_empty_str=False)
best_span, best_score, best_i, best_j = '', 0, None, None
for i in range(0, len(closest_encompassing_span_tok_normalized)):
for j in range(i + 1, len(closest_encompassing_span_tok_normalized) + 1):
score = compute_f1_from_tokens(
gold_answer_tok_normalized,
[t for t in closest_encompassing_span_tok_normalized[i:j] if len(t)],
)
if score > best_score:
best_score = score
best_i, best_j = i, j
best_span = ' '.join(closest_encompassing_span_tok[best_i:best_j])
best_f1 = compute_f1(gold_answer, best_span)
return best_span, best_f1