in muss/mining/filtering.py [0:0]
def is_overlapping(source, simplification):
def ordered_is_overlap(previous_text_window, next_text_window):
'''Check if the end of one sentence is exactly the same as the beginning of the other'''
overlapping_ratio_threshold = 0.25 # Minimum ratio of overlapping characters to be considered an overlap
min_length = min(len(previous_text_window), len(next_text_window))
min_overlap_length = int(min_length * overlapping_ratio_threshold) + 1
min_possible_overlapping_text = next_text_window[:min_overlap_length]
index = 0
while True:
index = previous_text_window.find(min_possible_overlapping_text, index + 1)
if index == -1: # Not found
return False
possible_overlapping_text = previous_text_window[index:]
if next_text_window.startswith(possible_overlapping_text):
return True
return ordered_is_overlap(source, simplification) or ordered_is_overlap(simplification, source)