in fraud-detection-python/cloud-functions/process-invoices/main.py [0:0]
def process_invoice(event, context):
"""
Extract Invoice Entities and Save to BQ
"""
input_bucket = event.get("bucket")
input_filename = event.get("name")
mime_type = event.get("contentType")
if not input_bucket or not input_filename:
print("No bucket or filename provided")
return
if mime_type not in ACCEPTED_MIME_TYPES:
print("Cannot parse the file type: " + mime_type)
return
print("Mime Type: " + mime_type)
gcs_input_uri = f"gs://{input_bucket}/{input_filename}"
print("Input File: " + gcs_input_uri)
operation = _batch_process_documents(
PROJECT_ID, LOCATION, PROCESSOR_ID, gcs_input_uri, destination_uri
)
print("Document Processing Operation: " + operation.operation.name)
# Wait for the operation to finish
operation.result(timeout=timeout)
# Output files will be in a new subdirectory with Operation ID as the name
operation_id = re.search(
r"operations\/(\d+)", operation.operation.name, re.IGNORECASE
).group(1)
output_directory = f"{gcs_output_uri_prefix}/{operation_id}"
print(f"Output Path: gs://{gcs_output_bucket}/{output_directory}")
print("Output files:")
output_document_protos = get_document_protos_from_gcs(
gcs_output_bucket, output_directory
)
# Reading all entities into a dictionary to write into a BQ table
for document_proto in output_document_protos:
entities = extract_document_entities(document_proto)
entities["input_file_name"] = input_filename
print("Entities:", entities)
print("Writing DocAI Entities to BQ")
# Add Entities to DocAI Extracted Entities Table
write_to_bq(DATSET_NAME, ENTITIES_TABLE_NAME, entities)
# Send Address Data to PubSub
for address_field in address_fields:
if address_field in entities:
process_address(address_field, entities[address_field], input_filename)
cleanup_gcs(
input_bucket,
input_filename,
gcs_output_bucket,
output_directory,
gcs_archive_bucket_name,
)
return