terraform-modules/resources/tf-resources.tf (313 lines of code) (raw):
####################################################################################
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
####################################################################################
####################################################################################
# Create the GCP resources
#
# Author: Adam Paternostro
####################################################################################
terraform {
required_providers {
google = {
source = "hashicorp/google-beta"
version = "5.35.0"
}
}
}
####################################################################################
# Variables
####################################################################################
variable "gcp_account_name" {}
variable "project_id" {}
variable "dataplex_region" {}
variable "multi_region" {}
variable "bigquery_non_multi_region" {}
variable "vertex_ai_region" {}
variable "data_catalog_region" {}
variable "appengine_region" {}
variable "colab_enterprise_region" {}
variable "dataflow_region" {}
variable "kafka_region" {}
variable "random_extension" {}
variable "project_number" {}
variable "deployment_service_account_name" {}
variable "terraform_service_account" {}
variable "bigquery_chocolate_ai_dataset" {}
variable "chocolate_ai_bucket" {}
variable "chocolate_ai_code_bucket" {}
variable "dataflow_staging_bucket" {}
data "google_client_config" "current" {
}
####################################################################################
# Bucket for all data (BigQuery, Spark, etc...)
# This is your "Data Lake" bucket
# If you are using Dataplex you should create a bucket per data lake zone (bronze, silver, gold, etc.)
####################################################################################
resource "google_storage_bucket" "google_storage_bucket_chocolate_ai_bucket" {
project = var.project_id
name = var.chocolate_ai_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_chocolate_ai_code_bucket" {
project = var.project_id
name = var.chocolate_ai_code_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_dataflow_staging" {
project = var.project_id
name = var.dataflow_staging_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
soft_delete_policy {
retention_duration_seconds = 0
}
}
####################################################################################
# Default Network
# The project was not created with the default network.
# This creates just the network/subnets we need.
####################################################################################
resource "google_compute_network" "default_network" {
project = var.project_id
name = "vpc-main"
description = "Default network"
auto_create_subnetworks = false
mtu = 1460
}
resource "google_compute_subnetwork" "colab_enterprise_subnet" {
project = var.project_id
name = "colab-enterprise-subnet"
ip_cidr_range = "10.1.0.0/16"
region = var.colab_enterprise_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network
]
}
resource "google_compute_subnetwork" "dataflow_subnet" {
project = var.project_id
name = "dataflow-subnet"
ip_cidr_range = "10.2.0.0/16"
region = var.dataflow_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network,
google_compute_subnetwork.colab_enterprise_subnet
]
}
resource "google_compute_subnetwork" "kafka_subnet" {
project = var.project_id
name = "kafka-subnet"
ip_cidr_range = "10.3.0.0/16"
region = var.kafka_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network,
google_compute_subnetwork.colab_enterprise_subnet,
google_compute_subnetwork.kafka_subnet
]
}
# Firewall for NAT Router
resource "google_compute_firewall" "subnet_firewall_rule" {
project = var.project_id
name = "subnet-nat-firewall"
network = google_compute_network.default_network.id
allow {
protocol = "icmp"
}
allow {
protocol = "tcp"
}
allow {
protocol = "udp"
}
source_ranges = ["10.1.0.0/16","10.2.0.0/16","10.3.0.0/16"]
depends_on = [
google_compute_subnetwork.colab_enterprise_subnet,
google_compute_subnetwork.dataflow_subnet,
google_compute_subnetwork.kafka_subnet
]
}
# We want a NAT for every region
locals {
distinctRegions = distinct([var.colab_enterprise_region, var.dataflow_region, var.kafka_region])
}
resource "google_compute_router" "nat-router-distinct-regions" {
project = var.project_id
count = length(local.distinctRegions)
name = "nat-router-${local.distinctRegions[count.index]}"
region = local.distinctRegions[count.index]
network = google_compute_network.default_network.id
depends_on = [
google_compute_firewall.subnet_firewall_rule
]
}
resource "google_compute_router_nat" "nat-config-distinct-regions" {
project = var.project_id
count = length(local.distinctRegions)
name = "nat-config-${local.distinctRegions[count.index]}"
router = google_compute_router.nat-router-distinct-regions[count.index].name
region = local.distinctRegions[count.index]
nat_ip_allocate_option = "AUTO_ONLY"
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
depends_on = [
google_compute_router.nat-router-distinct-regions
]
}
####################################################################################
# BigQuery Datasets
####################################################################################
resource "google_bigquery_dataset" "google_bigquery_dataset_chocolate_ai" {
project = var.project_id
dataset_id = var.bigquery_chocolate_ai_dataset
friendly_name = var.bigquery_chocolate_ai_dataset
description = "This dataset contains the data for the Chocolate A.I. demo."
location = var.multi_region
}
####################################################################################
# IAM for cloud build
####################################################################################
# Needed per https://cloud.google.com/build/docs/cloud-build-service-account-updates
resource "google_project_iam_member" "cloudfunction_builder" {
project = var.project_id
role = "roles/cloudbuild.builds.builder"
member = "serviceAccount:${var.project_number}-compute@developer.gserviceaccount.com"
}
# Needed per https://cloud.google.com/build/docs/cloud-build-service-account-updates
# Allow cloud function service account to read storage [V2 Function]
resource "google_project_iam_member" "cloudfunction_objectViewer" {
project = var.project_id
role = "roles/storage.objectViewer"
member = "serviceAccount:${var.project_number}-compute@developer.gserviceaccount.com"
depends_on = [
google_project_iam_member.cloudfunction_builder
]
}
####################################################################################
# Dataplex / Data Lineage
####################################################################################
resource "google_project_iam_member" "gcp_roles_datalineage_admin" {
project = var.project_id
role = "roles/datalineage.admin"
member = "user:${var.gcp_account_name}"
}
####################################################################################
# BigQuery - Connections (BigLake, Functions, etc)
####################################################################################
# Vertex AI connection
resource "google_bigquery_connection" "vertex_ai_connection" {
project = var.project_id
connection_id = "vertex-ai"
location = var.multi_region
friendly_name = "vertex-ai"
description = "vertex-ai"
cloud_resource {}
}
# Allow Vertex AI connection to Vertex User
resource "google_project_iam_member" "vertex_ai_connection_vertex_user_role" {
project = var.project_id
role = "roles/aiplatform.user"
member = "serviceAccount:${google_bigquery_connection.vertex_ai_connection.cloud_resource[0].service_account_id}"
depends_on = [
google_bigquery_connection.vertex_ai_connection
]
}
# BigLake connection
resource "google_bigquery_connection" "biglake_connection" {
project = var.project_id
connection_id = "biglake-connection"
location = var.multi_region
friendly_name = "biglake-connection"
description = "biglake-connection"
cloud_resource {}
}
resource "time_sleep" "biglake_connection_time_delay" {
depends_on = [google_bigquery_connection.biglake_connection]
create_duration = "30s"
}
# Allow BigLake to read storage (at project level, you can do each bucket individually)
resource "google_project_iam_member" "bq_connection_iam_object_viewer" {
project = var.project_id
role = "roles/storage.objectViewer"
member = "serviceAccount:${google_bigquery_connection.biglake_connection.cloud_resource[0].service_account_id}"
depends_on = [
time_sleep.biglake_connection_time_delay
]
}
####################################################################################
# Colab Enterprise
####################################################################################
# https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.notebookRuntimeTemplates
# NOTE: If you want a "when = destroy" example TF please see:
# https://github.com/GoogleCloudPlatform/data-analytics-golden-demo/blob/main/cloud-composer/data/terraform/dataplex/terraform.tf#L147
resource "null_resource" "colab_runtime_template" {
provisioner "local-exec" {
when = create
command = <<EOF
curl -X POST \
https://${var.colab_enterprise_region}-aiplatform.googleapis.com/ui/projects/${var.project_id}/locations/${var.colab_enterprise_region}/notebookRuntimeTemplates?notebookRuntimeTemplateId=colab-enterprise-template \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Content-Type: application/json" \
--data '{
displayName: "colab-enterprise-template",
description: "colab-enterprise-template",
isDefault: true,
machineSpec: {
machineType: "e2-highmem-4"
},
networkSpec: {
enableInternetAccess: false,
network: "projects/${var.project_id}/global/networks/vpc-main",
subnetwork: "projects/${var.project_id}/regions/${var.colab_enterprise_region}/subnetworks/${google_compute_subnetwork.colab_enterprise_subnet.name}"
},
shieldedVmConfig: {
enableSecureBoot: true
}
}'
EOF
}
depends_on = [
google_compute_subnetwork.colab_enterprise_subnet
]
}
# https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.notebookRuntimes
resource "null_resource" "colab_runtime" {
provisioner "local-exec" {
when = create
command = <<EOF
curl -X POST \
https://${var.colab_enterprise_region}-aiplatform.googleapis.com/ui/projects/${var.project_id}/locations/${var.colab_enterprise_region}/notebookRuntimes:assign \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Content-Type: application/json" \
--data '{
notebookRuntimeTemplate: "projects/${var.project_number}/locations/${var.colab_enterprise_region}/notebookRuntimeTemplates/colab-enterprise-template",
notebookRuntime: {
displayName: "colab-enterprise-runtime",
description: "colab-enterprise-runtime",
runtimeUser: "${var.gcp_account_name}"
}
}'
EOF
}
depends_on = [
null_resource.colab_runtime_template
]
}
####################################################################################
# New Service Account - For Continuous Queries
####################################################################################
resource "google_service_account" "kafka_continuous_query_service_account" {
project = var.project_id
account_id = "kafka-continuous-query"
display_name = "kafka-continuous-query"
}
# Needs access to BigQuery
resource "google_project_iam_member" "kafka_continuous_query_service_account_bigquery_admin" {
project = var.project_id
role = "roles/bigquery.admin"
member = "serviceAccount:${google_service_account.kafka_continuous_query_service_account.email}"
depends_on = [
google_service_account.kafka_continuous_query_service_account
]
}
# Needs access to Pub/Sub
resource "google_project_iam_member" "kafka_continuous_query_service_account_pubsub_admin" {
project = var.project_id
role = "roles/pubsub.admin"
member = "serviceAccount:${google_service_account.kafka_continuous_query_service_account.email}"
depends_on = [
google_project_iam_member.kafka_continuous_query_service_account_bigquery_admin
]
}
####################################################################################
# Pub/Sub (Topic and Subscription)
####################################################################################
resource "google_pubsub_topic" "google_pubsub_topic_bq_continuous_query" {
project = var.project_id
name = "bq-continuous-query"
message_retention_duration = "86400s"
}
resource "google_pubsub_subscription" "google_pubsub_subscription_bq_continuous_query" {
project = var.project_id
name = "bq-continuous-query"
topic = google_pubsub_topic.google_pubsub_topic_bq_continuous_query.id
message_retention_duration = "86400s"
retain_acked_messages = false
expiration_policy {
ttl = "86400s"
}
retry_policy {
minimum_backoff = "10s"
}
enable_message_ordering = false
depends_on = [
google_pubsub_topic.google_pubsub_topic_bq_continuous_query
]
}
####################################################################################
# DataFlow Service Account
####################################################################################
# Service account for dataflow cluster
resource "google_service_account" "dataflow_service_account" {
project = var.project_id
account_id = "dataflow-service-account"
display_name = "Service Account for Dataflow Environment"
}
# Grant editor (too high) to service account
resource "google_project_iam_member" "dataflow_service_account_editor_role" {
project = var.project_id
role = "roles/editor"
member = "serviceAccount:${google_service_account.dataflow_service_account.email}"
depends_on = [
google_service_account.dataflow_service_account
]
}
####################################################################################
# Outputs
####################################################################################
output "dataflow_service_account" {
value = google_service_account.dataflow_service_account.email
}