def _read_page_annotation()

in sagemaker/src/htr_dataset.py [0:0]


    def _read_page_annotation(self, annotation_dict):
        '''
        Extract the relevant information from the annotation (dict) of the output.manifest.

        Parameter:
        ----------
        annotation_dict: {}
            line from the output.manifest
            
        Return:
        -------
        out: {[]}
            formatted information.
            Note that bbs are converted from polygons to rectangles.
        '''
        filename = os.path.basename(annotation_dict["source-ref"])
        out = {"filename": filename}
        
        annotation_list = []
        for annotation in annotation_dict["annotations"]["texts"]:
            tmp = annotation
            tmp["bb"] = self._convert_polygon_to_rects(annotation["bb"])
            annotation_list.append(tmp)
        out["annotation"] = annotation_list
        return out