def on_document_added()

in hacks/genai-intro/artifacts/function/main.py [0:0]


def on_document_added(event):
    """Triggered from a message on a Cloud Pub/Sub topic.

    Do not edit until Challenge 4.

    Args:
        event: event payload
        context: metadata for the event.
    """
    pubsub_message = json.loads(base64.b64decode(event.data["message"]["data"]).decode("utf-8"))
    src_bucket = pubsub_message["bucket"]
    src_fname = pubsub_message["name"]
    print(f"Processing file: {src_fname}")

    if pubsub_message["contentType"] != "application/pdf":
        raise ValueError("Only PDF files are supported, aborting")

    dst_bucket = STAGING_BUCKET
    dst_folder = extract_text_from_document(src_bucket, src_fname, dst_bucket)
    print("Completed the text extraction")

    complete_text = collate_pages(dst_bucket, dst_folder)
    print(f"Completed collation, #characters: {len(complete_text)}")

    title = extract_title_from_text(complete_text)
    print(f"Title: {title}")

    summary = extract_summary_from_text(complete_text)
    print(f"Summary: {summary}")