in src/chug/doc/doc_processor.py [0:0]
def _preprocess_image_pages(self, decoded_pages, page_image_info=None):
if self.image_process_fn is None:
return decoded_pages
if page_image_info is not None:
# FIXME, WIP. If train objective involves masking or otherwise processing image
# with knowledge of annotations / text content, anno info should contain
# mask locations, etc. For such a task, we need to pass it to image preprocess
decoded_pages = [self.image_process_fn(dp, page_info=pi) for dp, pi in zip(decoded_pages, page_image_info)]
else:
decoded_pages = [self.image_process_fn(dp) for dp in decoded_pages]
return decoded_pages