def process_invoice()

in fraud-detection-python/cloud-functions/process-invoices/main.py [0:0]


def process_invoice(event, context):
    """
    Extract Invoice Entities and Save to BQ
    """
    input_bucket = event.get("bucket")
    input_filename = event.get("name")
    mime_type = event.get("contentType")

    if not input_bucket or not input_filename:
        print("No bucket or filename provided")
        return

    if mime_type not in ACCEPTED_MIME_TYPES:
        print("Cannot parse the file type: " + mime_type)
        return

    print("Mime Type: " + mime_type)

    gcs_input_uri = f"gs://{input_bucket}/{input_filename}"

    print("Input File: " + gcs_input_uri)

    operation = _batch_process_documents(
        PROJECT_ID, LOCATION, PROCESSOR_ID, gcs_input_uri, destination_uri
    )

    print("Document Processing Operation: " + operation.operation.name)

    # Wait for the operation to finish
    operation.result(timeout=timeout)

    # Output files will be in a new subdirectory with Operation ID as the name
    operation_id = re.search(
        r"operations\/(\d+)", operation.operation.name, re.IGNORECASE
    ).group(1)

    output_directory = f"{gcs_output_uri_prefix}/{operation_id}"
    print(f"Output Path: gs://{gcs_output_bucket}/{output_directory}")

    print("Output files:")

    output_document_protos = get_document_protos_from_gcs(
        gcs_output_bucket, output_directory
    )

    # Reading all entities into a dictionary to write into a BQ table

    for document_proto in output_document_protos:
        entities = extract_document_entities(document_proto)
        entities["input_file_name"] = input_filename

        print("Entities:", entities)
        print("Writing DocAI Entities to BQ")

        # Add Entities to DocAI Extracted Entities Table
        write_to_bq(DATSET_NAME, ENTITIES_TABLE_NAME, entities)

        # Send Address Data to PubSub
        for address_field in address_fields:
            if address_field in entities:
                process_address(address_field, entities[address_field], input_filename)

    cleanup_gcs(
        input_bucket,
        input_filename,
        gcs_output_bucket,
        output_directory,
        gcs_archive_bucket_name,
    )
    return