in seed/util/preprocess.py [0:0]
def crop_image_from_pdf_page(pdf_path, page_number, bounding_box):
"""
Crops a region from a given page in a PDF and returns it as an image.
:param pdf_path: Path to the PDF file.
:param page_number: The page number to crop from (0-indexed).
:param bounding_box: A tuple of (x0, y0, x1, y1) coordinates for the bounding box.
:return: A PIL Image of the cropped area.
"""
doc = fitz.open(pdf_path)
page = doc.load_page(page_number)
# Cropping the page. The rect requires the coordinates in the format (x0, y0, x1, y1).
bbx = [x * 72 for x in bounding_box]
rect = fitz.Rect(bbx)
pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72), clip=rect)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
doc.close()
return img