def split_pdf()

in src/pdf-splitter/main.py [0:0]


def split_pdf(input_bucket, input_file, output_bucket, output_folder, dpi):
    print(f"Downloading file: gs://{input_bucket}/{input_file}")

    bucket = storage_client.get_bucket(input_bucket)
    blob = bucket.get_blob(input_file)
    downloaded_filename = str(uuid.uuid4())
    blob.download_to_filename(downloaded_filename)
    print(f"Input file downloaded from GCS to {downloaded_filename}")

    images = convert_from_path(downloaded_filename, dpi)
    uploaded_images = []

    # Save pages as images in the pdf
    bucket = storage_client.get_bucket(output_bucket)
    for i in range(len(images)):
        tmp_name = f"${str(uuid.uuid4())}.jpg"
        images[i].save(tmp_name, 'JPEG')

        # Upload image to GCS
        uploaded_filename = f"{output_folder}/page-{str(i).zfill(4)}.jpg"
        blob = bucket.blob(uploaded_filename)
        blob.upload_from_filename(tmp_name)

        # Keep a list of the uploaded images
        uploaded_images.append(uploaded_filename)
        print(f"Image uploaded to gs://{output_bucket}/{uploaded_filename}")

        # Cleanup local file
        os.remove(tmp_name)

    # Cleanup local file
    os.remove(downloaded_filename)

    return uploaded_images