def is_scanned_pdf()

in src/pre_human_task_lambda.py [0:0]


def is_scanned_pdf(images, page_width, page_height):
    """Return whether a PDF is a scanned PDF given its images and page dimensions."""
    page_size = page_width * page_height
    if len(images) >= 1:
        print(f'Total number of images in a single PDF page {len(images)}')
        image_size_total = 0
        for image in images:
            image_size_total += image['width'] * image['height']
        image_size_to_page_size_ratio = image_size_total / page_size
        print(f"image_size_total = {image_size_total}, page_size = {page_size}, ratio = {image_size_to_page_size_ratio}, threshold = {TOTAL_IMAGE_SIZE_TO_PAGE_SIZE_RATIO_THREASHOLD}")
        return image_size_to_page_size_ratio >= TOTAL_IMAGE_SIZE_TO_PAGE_SIZE_RATIO_THREASHOLD
    else:
        return False