in presidio-analyzer/presidio_analyzer/entity_recognizer.py [0:0]
def remove_duplicates(results: List[RecognizerResult]) -> List[RecognizerResult]:
"""
Remove duplicate results.
Remove duplicates in case the two results
have identical start and ends and types.
:param results: List[RecognizerResult]
:return: List[RecognizerResult]
"""
results = list(set(results))
results = sorted(results, key=lambda x: (-x.score, x.start, -(x.end - x.start)))
filtered_results = []
for result in results:
if result.score == 0:
continue
to_keep = result not in filtered_results # equals based comparison
if to_keep:
for filtered in filtered_results:
# If result is contained in one of the other results
if (
result.contained_in(filtered)
and result.entity_type == filtered.entity_type
):
to_keep = False
break
if to_keep:
filtered_results.append(result)
return filtered_results