def _read_word_annotations()

in sagemaker/src/htr_dataset.py [0:0]


    def _read_word_annotations(self, annotation_dict):
        '''
        Extract the relevant information from the annotation (dict) of the output.manifest. 
        Then convert the bb into words

        Parameter:
        ----------
        annotation_dict: {}
            line from the output.manifest
            
        Return:
        -------
        word_annotations: [{[]}]
            formatted information.
            Note that bbs are converted from polygons to rectangles.
        '''
        page_annotation = self._read_page_annotation(annotation_dict)
        word_annotations = []
        for annotation in page_annotation["annotation"]:
            word_annotations.append({
                "filename": page_annotation["filename"],
                "annotation": [{
                    "text": annotation["text"],
                    "bb": annotation['bb']
                }]
            })
            
        return word_annotations