in utils/span_heuristic.py [0:0]
def _find_approximate_matching_sequence(context: str, target: str) -> Tuple[str, float]:
"""Find some substring in the context which closely matches the target, returning this substring with a score."""
if target in context:
return target, 1.0
target_length = len(target.split())
max_sim_val = 0
max_sim_string = ''
seq_matcher = SM()
seq_matcher.set_seq2(target)
for ngram in ngrams(context.split(), target_length + int(0.05 * target_length)):
candidate_ngram = ' '.join(ngram)
seq_matcher.set_seq1(candidate_ngram)
similarity = seq_matcher.quick_ratio()
if similarity > max_sim_val:
max_sim_val = similarity
max_sim_string = candidate_ngram
if similarity == 1.0:
# early exiting
break
return max_sim_string, max_sim_val