in src/pre_human_task_lambda.py [0:0]
def is_scanned_pdf(images, page_width, page_height):
"""Return whether a PDF is a scanned PDF given its images and page dimensions."""
page_size = page_width * page_height
if len(images) >= 1:
print(f'Total number of images in a single PDF page {len(images)}')
image_size_total = 0
for image in images:
image_size_total += image['width'] * image['height']
image_size_to_page_size_ratio = image_size_total / page_size
print(f"image_size_total = {image_size_total}, page_size = {page_size}, ratio = {image_size_to_page_size_ratio}, threshold = {TOTAL_IMAGE_SIZE_TO_PAGE_SIZE_RATIO_THREASHOLD}")
return image_size_to_page_size_ratio >= TOTAL_IMAGE_SIZE_TO_PAGE_SIZE_RATIO_THREASHOLD
else:
return False