tax-processing-pipeline-python/main.py (83 lines of code) (raw):
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# type: ignore
"""Flask Web Server"""
import os
from tempfile import TemporaryDirectory
from typing import List, Tuple
from uuid import uuid4
from consts import FIRESTORE_COLLECTION_PREFIX
from consts import FIRESTORE_PROJECT_ID
from docai_pipeline import run_docai_pipeline
from firestore_utils import delete_collection
from firestore_utils import read_collection
from flask import after_this_request
from flask import Flask
from flask import render_template
from flask import request
from tax_pipeline import calculate_tax_values
from werkzeug.exceptions import HTTPException
SESSION_ID = str(uuid4())
FIRESTORE_COLLECTION = f"{FIRESTORE_COLLECTION_PREFIX}-{SESSION_ID}"
app = Flask(__name__)
UPLOAD_FOLDER = "/tmp"
ALLOWED_MIMETYPES = set(["application/pdf", "image/tiff", "image/jpeg"])
@app.route("/", methods=["GET"])
def index() -> str:
"""
Web Server, Homepage
"""
return render_template("index.html")
@app.route("/file_upload", methods=["POST"])
def file_upload() -> str:
"""
Handle file upload request
"""
# pylint: disable=consider-using-with
temp_dir = TemporaryDirectory()
@after_this_request
def cleanup(response):
temp_dir.cleanup()
return response
# Check if POST Request includes Files
if not request.files:
return render_template("index.html", message_error="No files provided")
files = request.files.getlist("files")
uploaded_filenames = save_files_to_temp_directory(files, temp_dir)
if not uploaded_filenames:
return render_template("index.html", message_error="No valid files provided")
status_messages = run_docai_pipeline(uploaded_filenames, FIRESTORE_COLLECTION)
return render_template(
"index.html",
message_success="Successfully uploaded & processed files",
status_messages=status_messages,
)
@app.route("/view_extracted_data", methods=["GET"])
def view_extracted_data() -> str:
"""
Display Raw extracted data from Documents
"""
extracted_data = read_collection(FIRESTORE_PROJECT_ID, FIRESTORE_COLLECTION)
if not extracted_data:
return render_template("index.html", message_error="No data to display")
return render_template("index.html", extracted_data=extracted_data)
@app.route("/view_tax_bill", methods=["GET"])
def view_tax_bill() -> str:
"""
Calculate Tax Return with Document Information from Firestore
"""
extracted_data = read_collection(FIRESTORE_PROJECT_ID, FIRESTORE_COLLECTION)
tax_data = calculate_tax_values(extracted_data)
if not tax_data:
return render_template("index.html", message_error="No data to display")
return render_template("index.html", tax_data=tax_data)
@app.route("/delete_data", methods=["GET"])
def delete_data() -> str:
"""
Remove Saved Data from Database
"""
delete_collection(FIRESTORE_PROJECT_ID, FIRESTORE_COLLECTION)
return render_template("index.html", message_success="Successfully deleted data")
def save_files_to_temp_directory(files, temp_dir) -> List[Tuple[str, str]]:
"""
Save files to temporary directory
Returns a list of tuples containing file paths and mimetypes
"""
uploaded_filenames: List[Tuple[str, str]] = []
for file in files:
if not file or file.filename == "":
print("Skipping corrupt file")
continue
if file.mimetype not in ALLOWED_MIMETYPES:
print(f"Invalid File Type: {file.filename}: {file.mimetype}")
continue
input_file_path = os.path.join(temp_dir.name, file.filename)
file.save(input_file_path)
uploaded_filenames.append((input_file_path, file.mimetype))
print(f"Uploaded file: {input_file_path}, {file.mimetype}")
return uploaded_filenames
@app.errorhandler(Exception)
def handle_exception(ex):
"""
Handle Application Exceptions
"""
# Pass through HTTP errors
if isinstance(ex, HTTPException):
return ex
# Non-HTTP exceptions only
return render_template(
"index.html",
message_error="An unknown error occurred, please try again later",
)
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))