web-app-demo/Backend/api/helper.py (56 lines of code) (raw):
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable-msg=too-many-locals
""" Helper file that holds DocAI API calls"""
from google.cloud.documentai_v1beta3 import Document
from google.cloud.documentai_v1beta3 import DocumentProcessorServiceClient
from google.cloud.documentai_v1beta3 import ListProcessorsRequest
from google.cloud.documentai_v1beta3 import ProcessRequest
from google.cloud.documentai_v1beta3 import ProcessResponse
def process_document(process_document_request):
"""Handles Document AI API call and returns the document proto as JSON"""
project_id = process_document_request["project_id"]
location = process_document_request["location"]
file_path = process_document_request["file_path"]
file_type = process_document_request["file_type"]
processor_id = process_document_request["processor_id"]
# Instantiates a client
client = DocumentProcessorServiceClient()
# The full resource name of the processor, e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"
# TODO : change file handling if the file that was sent in the request is stored in GCS # pylint: disable=W0511
with open(file_path, "rb") as pdf:
pdf_content = pdf.read()
# Read the file into memory
document = Document()
document.content = pdf_content
document.mime_type = file_type
# Configure the process request
request = ProcessRequest()
request.name = name
request.document = document
# Use the Document AI client to process the sample form
try:
result = client.process_document(request=request)
except Exception as err: # pylint: disable=W0703
return {
"resultStatus": "ERROR",
"errorMessage": str(err),
}
document = result.document
json_result = ProcessResponse.to_json(result)
return json_result
# TODO: Store the file that was sent in the request in GCS # pylint: disable=W0511
def store_file(file):
"""Stores the file to specified destination"""
destination = "/".join(["api/test_docs", file.name])
file.save(destination)
return destination
def populate_list_source(project_id, location, processor_id_by_processor_type):
"""Gets all available processors from the specified GCP project"""
client = DocumentProcessorServiceClient()
req = ListProcessorsRequest()
req.parent = f"projects/{project_id}/locations/{location}"
try:
processor_list = client.list_processors(req)
for processor in processor_list:
# The resource name of the processor follows the following
# format `projects/{project}/locations/{location}/processors/{processor}`
parsed_path = client.parse_processor_path(processor.name)
processor_id_by_processor_type[processor.type_] = parsed_path["processor"]
except Exception as err: # pylint: disable=W0703
if location == "ENTER_YOUR_LOCATION_HERE":
str_error = "Location was not changed"
else:
str_error = str(err)
return {
"resultStatus": "ERROR",
"errorMessage": str_error,
}, 400
return {
"resultStatus": "SUCCESS",
}