in sagemaker/src/htr_dataset.py [0:0]
def _read_page_annotation(self, annotation_dict):
'''
Extract the relevant information from the annotation (dict) of the output.manifest.
Parameter:
----------
annotation_dict: {}
line from the output.manifest
Return:
-------
out: {[]}
formatted information.
Note that bbs are converted from polygons to rectangles.
'''
filename = os.path.basename(annotation_dict["source-ref"])
out = {"filename": filename}
annotation_list = []
for annotation in annotation_dict["annotations"]["texts"]:
tmp = annotation
tmp["bb"] = self._convert_polygon_to_rects(annotation["bb"])
annotation_list.append(tmp)
out["annotation"] = annotation_list
return out