in hacks/genai-intro/artifacts/function/main.py [0:0]
def on_document_added(event):
"""Triggered from a message on a Cloud Pub/Sub topic.
Do not edit until Challenge 4.
Args:
event: event payload
context: metadata for the event.
"""
pubsub_message = json.loads(base64.b64decode(event.data["message"]["data"]).decode("utf-8"))
src_bucket = pubsub_message["bucket"]
src_fname = pubsub_message["name"]
print(f"Processing file: {src_fname}")
if pubsub_message["contentType"] != "application/pdf":
raise ValueError("Only PDF files are supported, aborting")
dst_bucket = STAGING_BUCKET
dst_folder = extract_text_from_document(src_bucket, src_fname, dst_bucket)
print("Completed the text extraction")
complete_text = collate_pages(dst_bucket, dst_folder)
print(f"Completed collation, #characters: {len(complete_text)}")
title = extract_title_from_text(complete_text)
print(f"Title: {title}")
summary = extract_summary_from_text(complete_text)
print(f"Summary: {summary}")