terraform/clickstream_analytics/main.tf (160 lines of code) (raw):

# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. locals { dataflow_service_account = "my-dataflow-sa" bigtable_instance = "clickstream-analytics" bigtable_zone = "${var.region}-a" bigtable_lookup_key = "bigtable-lookup-key" bigquery_dataset = "clickstream_analytics" } // Project module "google_cloud_project" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/project?ref=v38.0.0" billing_account = var.billing_account project_reuse = var.project_create ? null : {} name = var.project_id parent = var.organization services = [ "dataflow.googleapis.com", "monitoring.googleapis.com", "pubsub.googleapis.com", "autoscaling.googleapis.com", "bigtableadmin.googleapis.com", "bigquery.googleapis.com" ] } resource "google_bigtable_instance" "clickstream-analytics" { name = local.bigtable_instance cluster { cluster_id = "${local.bigtable_instance}-c1" num_nodes = 1 storage_type = "HDD" zone = local.bigtable_zone } } # Create BigQuery dataset resource "google_bigquery_dataset" "clickstream_analytics" { dataset_id = local.bigquery_dataset description = "Dataset for storing clickstream analytics data" location = var.region } # Create BigQuery table resource "google_bigquery_table" "wikipedia" { dataset_id = google_bigquery_dataset.clickstream_analytics.dataset_id table_id = "wikipedia" deletion_protection = false schema = jsonencode([ { name = "prev", type = "STRING", mode = "NULLABLE" }, { name = "curr", type = "STRING", mode = "NULLABLE" }, { name = "type", type = "STRING", mode = "NULLABLE" }, { name = "n", type = "INTEGER", mode = "NULLABLE" }, ]) } resource "google_bigquery_table" "deadletter" { dataset_id = google_bigquery_dataset.clickstream_analytics.dataset_id table_id = "deadletter" deletion_protection = false schema = jsonencode([ { name = "timestamp", type = "TIMESTAMP", mode = "REQUIRED" }, { name = "payloadString", type = "STRING", mode = "REQUIRED" }, { name = "payloadBytes", type = "BYTES", mode = "REQUIRED" }, { name = "attributes", type = "RECORD", mode = "REPEATED", fields = [ { name = "key", type = "STRING", mode = "NULLABLE" }, { name = "value", type = "STRING", mode = "NULLABLE" } ] }, { name = "errorMessage", type = "STRING", mode = "NULLABLE" }, { name = "stacktrace", type = "STRING", mode = "NULLABLE" } ]) } // Buckets for staging data, scripts, etc, in the two regions module "buckets" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/gcs?ref=v38.0.0" project_id = module.google_cloud_project.project_id name = module.google_cloud_project.project_id location = var.region storage_class = "STANDARD" force_destroy = var.destroy_all_resources } module "input_topic" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/pubsub?ref=v38.0.0" project_id = module.google_cloud_project.project_id name = "input" subscriptions = { messages-sub = {} } } // Service account module "dataflow_sa" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/iam-service-account?ref=v38.0.0" project_id = module.google_cloud_project.project_id name = local.dataflow_service_account iam_project_roles = { (module.google_cloud_project.project_id) = [ "roles/storage.admin", "roles/dataflow.worker", "roles/monitoring.metricWriter", "roles/pubsub.editor", "roles/bigtable.reader" ] } } // Network module "vpc_network" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/net-vpc?ref=v38.0.0" project_id = module.google_cloud_project.project_id name = "${var.network_prefix}-net" subnets = [ { ip_cidr_range = "10.1.0.0/16" name = "${var.network_prefix}-subnet" region = var.region enable_private_access = true secondary_ip_ranges = { pods = "10.16.0.0/14" services = "10.20.0.0/24" } } ] } module "firewall_rules" { // Default rules for internal traffic + SSH access via IAP source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric//modules/net-vpc-firewall?ref=v38.0.0" project_id = module.google_cloud_project.project_id network = module.vpc_network.name default_rules_config = { admin_ranges = [ module.vpc_network.subnet_ips["${var.region}/${var.network_prefix}-subnet"], ] } egress_rules = { allow-egress-dataflow = { deny = false description = "Dataflow firewall rule egress" targets = ["dataflow"] rules = [{ protocol = "tcp", ports = [12345, 12346] }] } } ingress_rules = { allow-ingress-dataflow = { description = "Dataflow firewall rule ingress" targets = ["dataflow"] rules = [{ protocol = "tcp", ports = [12345, 12346] }] } } } resource "local_file" "variables_script" { filename = "${path.module}/../../pipelines/clickstream_analytics_java/scripts/00_set_variables.sh" file_permission = "0644" content = <<FILE # This file is generated by the Terraform code of this Solution Guide. # We recommend that you modify this file only through the Terraform deployment. export PROJECT=${module.google_cloud_project.project_id} export REGION=${var.region} export SUBNETWORK=regions/${var.region}/subnetworks/${var.network_prefix}-subnet export TEMP_LOCATION=gs://$PROJECT/tmp export SERVICE_ACCOUNT=${module.dataflow_sa.email} export BQ_DATASET=${google_bigquery_dataset.clickstream_analytics.dataset_id} export BQ_TABLE=${google_bigquery_table.wikipedia.table_id} export BQ_DEADLETTER_TABLE=${google_bigquery_table.deadletter.table_id} export SUBSCRIPTION=${module.input_topic.subscriptions["messages-sub"].id} export BIGTABLE_INSTANCE=${google_bigtable_instance.clickstream-analytics.id} export BIGTABLE_TABLE=$BQ_TABLE export BT_LOOKUP_KEY=${local.bigtable_lookup_key} FILE }