in sagemaker/src/htr_dataset.py [0:0]
def _read_word_annotations(self, annotation_dict):
'''
Extract the relevant information from the annotation (dict) of the output.manifest.
Then convert the bb into words
Parameter:
----------
annotation_dict: {}
line from the output.manifest
Return:
-------
word_annotations: [{[]}]
formatted information.
Note that bbs are converted from polygons to rectangles.
'''
page_annotation = self._read_page_annotation(annotation_dict)
word_annotations = []
for annotation in page_annotation["annotation"]:
word_annotations.append({
"filename": page_annotation["filename"],
"annotation": [{
"text": annotation["text"],
"bb": annotation['bb']
}]
})
return word_annotations