def _decode_anno()

in src/chug/doc/doc_read_processor.py [0:0]


    def _decode_anno(self, sample):
        anno = _get_value(self.text_input_key, sample)
        assert anno is not None, f"No annotation found with keys ({self.text_input_key})."

        try:
            page_anno = self._process_anno_pages(anno)
        except Exception as exn:
            _logger.error(f'Issue processing annotation for {sample["__url__"]}, {sample["__key__"]}.')
            #_logger.error(json.dumps(anno, indent=4))
            raise exn

        # extract info from the _parse
        info = page_anno.get('_parse', {})
        page_indices = info.get('page_indices', [0])  # the samples page indices
        num_anno_pages = info.get('num_pages', 1)

        # TODO support 'image info' to relay details such as text bbox, layout
        # page_image_info = info.get('image_info', None)
        # if page_image_info is not None:
        #     assert len(page_image_info) == len(page_indices)

        return page_anno, page_indices, num_anno_pages