def process_image()

in src/dlp-runner/main.py [0:0]


def process_image(input_file_bucket, input_file, output_file_bucket,
                  output_file, project, inspect_template, include_quotes,
                  labels):
    # declare names for temporary files
    name, ext = os.path.splitext(os.path.basename(input_file))
    hash = str(uuid.uuid4())
    tmp_file = f"{hash}{ext}"
    tmp_file_redacted = f"{hash}-redacted{ext}"

    # download file from bucket
    print(f"Downloading input file from gs://{input_file_bucket}/{input_file}")
    input_bucket_client = storage_client.get_bucket(input_file_bucket)
    blob_pdf = input_bucket_client.get_blob(input_file)
    blob_pdf.download_to_filename(tmp_file)
    print(f"Input file downloaded from GCS to {tmp_file}")

    # redact file using DLP
    findings = redact_image(project, tmp_file, tmp_file_redacted,
                            inspect_template, include_quotes)
    print(f"Redacted image saved to file {tmp_file_redacted}")

    # upload redacted image to bucket
    output_bucket_client = storage_client.get_bucket(output_file_bucket)
    out_blob = output_bucket_client.blob(output_file)
    out_blob.upload_from_filename(tmp_file_redacted)
    print(
        f"Redacted image uploaded to gs://{output_file_bucket}/{output_file}")

    for f in findings:
        # Create time is not properly parsed to match BQ table, so we need to pass from a string
        # datetime into BQ's structure for create_time (create_time.seconds and create_time.nanos)
        # Workaround: The [0:19] was added to trim the string to the first 20 characters,
        # this cuts off the milliseconds, as the API omits 000 when at the round second.
        create_time = datetime.datetime.strptime(f["create_time"][0:19],
                                                 '%Y-%m-%dT%H:%M:%S')

        f["create_time"] = {"seconds": create_time.strftime('%s'), "nanos": 0}
        f["location"]["container"] = {
            "project_id": project,
            "full_path": f"gs://{input_file_bucket}/{input_file}"
        }

        if labels and len(labels) > 0:
            f["labels"] = []
            for key in labels:
                f["labels"].append({"key": key, "value": labels[key]})
        else:
            f.pop("labels")

    # upload findings to cloud storage
    findings_file = output_file.replace(ext, ".json")
    out_blob = output_bucket_client.blob(findings_file)
    out_blob.upload_from_string(data=json.dumps(findings),
                                content_type='application/json')
    print(
        f"Redaction metadata successfully uploaded to gs://{output_file_bucket}/{findings_file}"
    )

    # Cleanup local files
    os.remove(tmp_file)
    os.remove(tmp_file_redacted)

    return {
        "redacted_image": {
            "bucket": output_file_bucket,
            "file": output_file
        },
        "findings": {
            "bucket": output_file_bucket,
            "file": findings_file
        }
    }