in comprehend_groundtruth_integration/src/comprehend_customer_scripts/GroundTruth/EntityRecognizer/groundtruth_to_comprehend_format_converter.py [0:0]
def _check_for_overlapping_annotations(self, annotations):
annotations.sort(key=itemgetter(2)) # 2 represents the index of beginOffset in the tuple
for i in range(1, len(annotations)):
previous_end_offset = annotations[i - 1][3] # 3 represents the index of the endOffset in the previous tuple
current_begin_offset = annotations[i][2] # 2 represents the index of the beginOffset in the current tuple
if previous_end_offset > current_begin_offset:
raise Exception(OVERLAPPING_ANNOTATIONS.substitute(doc=self.groundtruth_manifest_file_name,
line=annotations[i][1],
annotations1=annotations[i - 1][4], # represents entity types in the previous tuple that is overlapping
annotations2=annotations[i][4])) # represents other entity type in the current tuple that is overlapping