def _read_line_annotation()

in sagemaker/src/htr_dataset.py [0:0]


    def _read_line_annotation(self, annotation_dict):
        '''
        Extract the relevant information from the annotation (dict) of the output.manifest. 
        Then convert the bb into lines

        Parameter:
        ----------
        annotation_dict: {}
            line from the output.manifest
            
        Return:
        -------
        line_annotations: [{[]}]
            formatted information.
            Note that bbs are converted from polygons to rectangles.
        '''
        page_annotation = self._read_page_annotation(annotation_dict)
        line_annotation_dict = {}
        for annotation in page_annotation['annotation']:
            line_num = annotation['line_num']
            if line_num not in line_annotation_dict:
                line_annotation_dict[line_num] = []
            line_annotation_dict[line_num].append(annotation) 
                        
        line_annotations = []
        # Sort annotation by line
        for line_num in line_annotation_dict:
            tmp = line_annotation_dict[line_num]
            bb_list = [a['bb'] for a in tmp]
            texts = [a['text'] for a in tmp]
            bb_list, texts = self._sort_texts_on_bb_x(bb_list, texts)
            bb = self._convert_bb_list_to_max_bb(bb_list)
            line_annotations.append({
                "filename": page_annotation["filename"],
                "annotation": [{
                    "text": texts,
                    "line_bb": bb,
                    "bb": bb_list
                }]
            })
            
        return line_annotations