metadata.yaml (132 lines of code) (raw):
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: blueprints.cloud.google.com/v1alpha1
kind: BlueprintMetadata
metadata:
name: terraform-genai-doc-summarization
annotations:
config.kubernetes.io/local-config: "true"
spec:
info:
title: Generative AI Document Summarization
source:
repo: https://github.com/balajismaniam/terraform-genai-doc-summarization.git
sourceType: git
version: 0.0.1
actuationTool:
flavor: Terraform
version: '>= 0.13'
description:
tagline: Create summaries of a large corpus of documents using Generative AI.
detailed: |-
This solution showcases how to summarize a large corpus of documents using Generative AI. It provides an
end-to-end demonstration of document summarization going all the way from raw documents, detecting text
in the documents and summarizing the documents on-demand using Vertex AI LLM APIs, Cloud Vision Optical
Character Recognition (OCR) and BigQuery.
preDeploy: To deploy this blueprint you must have an active billing account and billing permissions.
icon: assets/icon.png
deploymentDuration:
configurationSecs: 60
deploymentSecs: 600
costEstimate:
description: Cost Details
url: https://cloud.google.com/products/calculator/#id=78888c9b-02ac-4130-9327-fecd7f4cfb11
cloudProducts:
- productId: VERTEX_SECTION
pageUrl: ""
label: Vertex AI
- productId: VISION_SECTION
pageUrl: ""
label: Vertex AI Vision
- productId: search_BIGQUERY_SECTION
pageUrl: ""
label: BigQuery
- productId: FUNCTIONS_SECTION
pageUrl: ""
label: Cloud Functions
- productId: STORAGE_SECTION
pageUrl: ""
label: Cloud Storage
content:
architecture:
diagramUrl: https://www.gstatic.com/pantheon/images/solutions/gen_ai_document_summarization_architecture_v1.svg
description:
- 1. The developer follows a tutorial on a Jupyter Notebook, where they upload a PDF — either through Vertex AI Workbench or Colaboratory.
- 2. The uploaded PDF file is sent to a function running on Cloud Functions. This function handles PDF file processing.
- 3. The Cloud Functions function uses Cloud Vision to extract all text from the PDF file.
- 4. The Cloud Functions function stores the extracted text inside a Cloud Storage bucket.
- 5. The Cloud Functions function uses Vertex AI’s LLM API to summarize the extracted text.
- 6. The Cloud Functions function stores the text summaries of PDFs in BigQuery tables.
- 7. As an alternative to uploading PDF files through Jupyter Notebook, the developer can upload a PDF file directly to a Cloud Storage
- bucket — for instance, through the Console UI or gcloud. This upload triggers Eventarc to begin the Document Processing phase.
- 8. As a result of the direct upload to Cloud Storage, Eventarc triggers the Document Processing phase, handled by Cloud Functions.
documentation:
- title: Generative AI Document Summary
url: https://cloud.google.com/architecture/ai-ml/generative-ai-document-summarization
examples:
- name: simple_example
location: examples/simple_example
interfaces:
variables:
- name: bucket_name
description: The name of the bucket to create
varType: string
defaultValue: genai-webhook
- name: gcf_timeout_seconds
description: GCF execution timeout
varType: number
defaultValue: 900
- name: project_id
description: The Google Cloud project ID to deploy to
varType: string
required: true
- name: region
description: Google Cloud region
varType: string
defaultValue: us-central1
- name: time_to_enable_apis
description: Wait time to enable APIs in new projects
varType: string
defaultValue: 180s
- name: webhook_name
description: Name of the webhook
varType: string
defaultValue: webhook
- name: webhook_path
description: Path to the webhook directory
varType: string
defaultValue: webhook
outputs:
- name: genai_doc_summary_colab_url
description: The URL to launch the notebook tutorial for the Generateive AI Document Summarization Solution
- name: neos_walkthrough_url
description: The URL to launch the in-console tutorial for the Generative AI Document Summarization solution
requirements:
roles:
- level: Project
roles:
- roles/aiplatform.admin
- roles/artifactregistry.reader
- roles/bigquery.admin
- roles/cloudfunctions.admin
- roles/eventarc.admin
- roles/iam.serviceAccountAdmin
- roles/iam.serviceAccountUser
- roles/logging.admin
- roles/pubsub.admin
- roles/resourcemanager.projectIamAdmin
- roles/run.admin
- roles/serviceusage.serviceUsageAdmin
- roles/storage.admin
services:
- aiplatform.googleapis.com
- artifactregistry.googleapis.com
- bigquery.googleapis.com
- cloudbuild.googleapis.com
- cloudfunctions.googleapis.com
- cloudresourcemanager.googleapis.com
- eventarc.googleapis.com
- iam.googleapis.com
- run.googleapis.com
- serviceusage.googleapis.com
- storage-api.googleapis.com
- storage.googleapis.com
- vision.googleapis.com