def create_output_pdfs()

in workflow2_docsplitter/sam-app/functions/docsplitter_function/index.py [0:0]


def create_output_pdfs(input_pdf_content, pages_by_class):
    # loops through each class in the pages_by_class dictionary to get all of the input PDF page numbers
    # creates new PDF for each class using the corresponding input PDF's pages

    input_pdf_buffer = BytesIO(input_pdf_content)
    input_pdf = PdfFileReader(input_pdf_buffer, strict=False)
    output_zip_buffer = BytesIO()

    with zipfile.ZipFile(output_zip_buffer, "w") as zip_archive:
        for _class in pages_by_class:
            output = PdfFileWriter()
            page_numbers = pages_by_class[_class]

            for page_num in page_numbers:
                output.addPage(input_pdf.getPage(page_num))

            output_buffer = BytesIO()
            output.write(output_buffer)

            with zip_archive.open(f"{_class}.pdf", 'w') as output_pdf:
                output_pdf.write(output_buffer.getvalue())

            print(f"Created PDF for {_class}")

    return output_zip_buffer