def main()

in workflow3_local/local_docsplitter.py [0:0]


def main(endpoint_arn, choice, input_pdf_info):
    root_path = os.path.dirname(os.path.abspath(__file__))
    _id = datetime.now().strftime("%Y%m%d%H%M%S")
    temp_dir_name = f"workflow2_temp_documents-{_id}"
    temp_dir_path = f"{root_path}/{temp_dir_name}"
    output_dir_name = f"workflow2_output_documents-{_id}"
    output_dir_path = f"{root_path}/{output_dir_name}"
    create_directories([temp_dir_path, output_dir_path])
    s3 = boto3.client('s3')

    if choice == "s3":
        input_pdf_uri = input_pdf_info
        bucket_name = input_pdf_uri.split("/")[2]
        input_pdf_key = input_pdf_uri.split(bucket_name + "/", 1)[1]
        input_pdf_path = f"{temp_dir_path}/input.pdf"
        with open(input_pdf_path, "wb") as data:
            s3.download_fileobj(bucket_name, input_pdf_key, data)
    elif choice == "local":
        input_pdf_path = input_pdf_info

    # pages_by_class is a dictionary
    # key is class name; value is list of page numbers belonging to the key class
    pages_by_class = split_input_pdf_by_class(input_pdf_path, temp_dir_path, endpoint_arn, _id)

    create_output_pdfs(input_pdf_path, pages_by_class, output_dir_path, output_dir_name)
    rmtree(temp_dir_path)
    print("Multi-class PDFs have been created in the output folder, " + output_dir_path)