gen-ai-rag-vertex-ai-vector-search/ingestion.tf (77 lines of code) (raw):
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
### Bucket for ingested data
resource "google_storage_bucket" "ingest" {
name = "ingest-${local.unique_str}"
# Design consideration: Data availability
location = var.region
}
### Pub/Sub to trigger ingestion job
# Pub/Sub Topic
resource "google_pubsub_topic" "ingest" {
name = "ingest-${local.unique_str}"
}
# Link events on the Cloud Storage bucket to the Pub/Sub Topic
resource "google_storage_notification" "default" {
bucket = google_storage_bucket.ingest.name
topic = google_pubsub_topic.ingest.id
event_types = ["OBJECT_FINALIZE"]
payload_format = "JSON_API_V1"
depends_on = [google_pubsub_topic_iam_member.pubsub]
}
# Allow the Cloud Storage service account the ability to publish messages
resource "google_pubsub_topic_iam_member" "gcs" {
topic = google_pubsub_topic.ingest.id
role = "roles/pubsub.publisher"
member = "serviceAccount:${local.gcs_service_account}"
depends_on = [module.project_services]
}
# Allow the Pub/Sub service account the ability to publish messages
resource "google_pubsub_topic_iam_member" "pubsub" {
topic = google_pubsub_topic.ingest.id
role = "roles/pubsub.publisher"
member = "serviceAccount:${local.pubsub_service_account}"
}
### Cloud Run Function code
# Take the Cloud Run Function code from the local function-source folder
data "archive_file" "default" {
type = "zip"
output_path = "/tmp/function-source.zip"
# Edit Me: Replace with a path to the customized "ingestion" function code
source_dir = "function-source/"
}
# Create a bucket to store the Cloud Run Function code
resource "google_storage_bucket" "default" {
name = "gcf-source-${local.unique_str}-${var.project_id}"
location = "US"
uniform_bucket_level_access = true
}
# Upload the function code
resource "google_storage_bucket_object" "default" {
name = "function-source.zip"
bucket = google_storage_bucket.default.name
source = data.archive_file.default.output_path
}
### Ingestion function
resource "google_cloudfunctions2_function" "default" {
name = "ingestion-${local.unique_str}"
location = var.region
description = "Function to process Cloud Storage events"
build_config {
# Note: Adjust based on the language/version of the ingestion app.
runtime = "python312"
entry_point = "process_data"
source {
# Sample function from uploaded archive of local code folder
storage_source {
bucket = google_storage_bucket.default.name
object = google_storage_bucket_object.default.name
}
}
}
# Note: Configure based on performance requirements for ingest function.
service_config {
max_instance_count = 3
min_instance_count = 1
available_memory = "256M"
timeout_seconds = 60
environment_variables = {
SERVICE_CONFIG_TEST = "config_test"
}
ingress_settings = "ALLOW_INTERNAL_ONLY"
all_traffic_on_latest_revision = true
service_account_email = google_service_account.gcf.email
}
# Creates a Pub/Sub Subscription based on the topic
event_trigger {
trigger_region = var.region
event_type = "google.cloud.pubsub.topic.v1.messagePublished"
pubsub_topic = google_pubsub_topic.ingest.id
retry_policy = "RETRY_POLICY_RETRY"
}
depends_on = [module.project_services, google_service_account.gcf]
}
# Dedicated service account for function.
# Note: grant any additional permissions to the function as required.
resource "google_service_account" "gcf" {
account_id = "function-service-account-${local.unique_str}"
}