terraform/tf-main.tf (527 lines of code) (raw):
####################################################################################
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
####################################################################################
####################################################################################
# README
# This is the main entry point into the Terraform creation script
# This script can be run in a different ways:
# 1. Run "source deploy.sh" in the root folder (this for for when you run locally or cloud shell).
# This will create the GCP project for you and deploy everything. The logged in user needs to be
# an Org Admin so the project can be created and permissions set.
#
# 2. If you have a GCP project already created you would run just by passing in the parameters.
# Review the script deploy-use-existing-project.sh to see the requirements of what is
# required items and permissions.
# terraform apply \
# -var="gcp_account_name=${gcp_account_name}" \
# -var="project_id=${project_id}" \
# -var="project_number=${project_number}" \
# -var="deployment_service_account_name=${service_account_email}" \
# -var="org_id=${org_id}"
#
# Review the parameters. If a Project Number is passed in, it is assumed the GCP project has been created.
#
# Author: Adam Paternostro
#
# References:
# Terraform for Google: https://registry.terraform.io/providers/hashicorp/google/latest/docs
# https://www.terraform.io/language/resources/provisioners/local-exec
####################################################################################
terraform {
required_providers {
google = {
source = "hashicorp/google-beta"
version = "5.35.0"
configuration_aliases = [google.service_principal_impersonation]
}
}
}
####################################################################################
# Providers
# Multiple providers: https://www.terraform.io/language/providers/configuration
# The first is the default (who is logged in) and creates the project and service principal that provisions the resources
# The second is the service account created by the first and is used to create the resources
####################################################################################
# Default provider (uses the logged in user to create the project and service principal for deployment)
provider "google" {
project = local.local_project_id
}
# Provider that uses service account impersonation (best practice - no exported secret keys to local computers)
provider "google" {
alias = "service_principal_impersonation"
impersonate_service_account = "${local.local_project_id}@${local.local_project_id}.iam.gserviceaccount.com"
project = local.local_project_id
region = var.default_region
zone = var.default_zone
}
####################################################################################
# Reuseable Modules
####################################################################################
module "project" {
# Run this as the currently logged in user or the service account (assuming DevOps)
count = var.project_number == "" ? 1 : 0
source = "../terraform-modules/project"
project_id = local.local_project_id
org_id = var.org_id
billing_account = var.billing_account
}
module "service-account" {
# This creates a service account to run portions of the following deploy by impersonating this account
source = "../terraform-modules/service-account"
project_id = local.local_project_id
org_id = var.org_id
impersonation_account = local.local_impersonation_account
gcp_account_name = var.gcp_account_name
environment = var.environment
depends_on = [
module.project
]
}
# Enable all the cloud APIs that will be used by using Batch Mode
# Batch mode is enabled on the provider (by default)
module "apis-batch-enable" {
source = "../terraform-modules/apis-batch-enable"
project_id = local.local_project_id
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
depends_on = [
module.project,
module.service-account
]
}
resource "time_sleep" "service_account_api_activation_time_delay" {
create_duration = "120s"
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable
]
}
# Uses the new Org Policies method (when a project is created by TF)
module "org-policies" {
count = var.environment == "GITHUB_ENVIRONMENT" && var.org_id != "0" ? 1 : 0
source = "../terraform-modules/org-policies"
# Use Service Account Impersonation for this step.
# NOTE: This step must be done using a service account (a user account cannot change these policies)
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay
]
}
# Uses the "Old" Org Policies methods (for when a project is created in advance)
# This is used since the method you cannot specify a project and some orgs deploy with a
# cloud build account that is in a different domain/org
/*
module "org-policies-deprecated" {
count = var.project_number == "" ? 0 : 1
source = "../terraform-modules/org-policies-deprecated"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay
]
}
*/
module "resources" {
source = "../terraform-modules/resources"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
gcp_account_name = var.gcp_account_name
project_id = local.local_project_id
composer_region = var.composer_region
dataform_region = var.dataform_region
dataplex_region = var.dataplex_region
dataproc_region = var.dataproc_region
dataflow_region = var.dataflow_region
bigquery_region = var.bigquery_region
bigquery_non_multi_region = var.bigquery_non_multi_region
spanner_region = var.spanner_region
datafusion_region = var.datafusion_region
vertex_ai_region = var.vertex_ai_region
cloud_function_region = var.cloud_function_region
data_catalog_region = var.data_catalog_region
dataproc_serverless_region = var.dataproc_serverless_region
cloud_sql_region = var.cloud_sql_region
cloud_sql_zone = var.cloud_sql_zone
datastream_region = var.datastream_region
colab_enterprise_region = var.colab_enterprise_region
storage_bucket = local.local_storage_bucket
spanner_config = var.spanner_config
random_extension = random_string.project_random.result
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
deployment_service_account_name = var.deployment_service_account_name
curl_impersonation = local.local_curl_impersonation
aws_omni_biglake_dataset_region = var.aws_omni_biglake_dataset_region
aws_omni_biglake_dataset_name = var.aws_omni_biglake_dataset_name
azure_omni_biglake_dataset_name = var.azure_omni_biglake_dataset_name
azure_omni_biglake_dataset_region = var.azure_omni_biglake_dataset_region
terraform_service_account = module.service-account.deployment_service_account
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
]
}
####################################################################################
# Deploy BigQuery stored procedures / sql scripts
###################################################################################
module "sql-scripts" {
source = "../terraform-modules/sql-scripts"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
gcp_account_name = var.gcp_account_name
project_id = local.local_project_id
storage_bucket = local.local_storage_bucket
random_extension = random_string.project_random.result
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
bigquery_region = var.bigquery_region
spanner_region = var.spanner_region
cloud_function_region = var.cloud_function_region
deployment_service_account_name = var.deployment_service_account_name
shared_demo_project_id = var.shared_demo_project_id
aws_omni_biglake_dataset_name = var.aws_omni_biglake_dataset_name
aws_omni_biglake_dataset_region = var.aws_omni_biglake_dataset_region
aws_omni_biglake_connection = var.aws_omni_biglake_connection
aws_omni_biglake_s3_bucket = var.aws_omni_biglake_s3_bucket
azure_omni_biglake_dataset_name = var.azure_omni_biglake_dataset_name
azure_omni_biglake_connection = local.local_azure_omni_biglake_connection
azure_omni_biglake_adls_name = var.azure_omni_biglake_adls_name
azure_omni_biglake_dataset_region = var.azure_omni_biglake_dataset_region
bigquery_rideshare_lakehouse_raw_dataset = module.resources.bigquery_rideshare_lakehouse_raw_dataset
gcs_rideshare_lakehouse_raw_bucket = module.resources.gcs_rideshare_lakehouse_raw_bucket
bigquery_rideshare_lakehouse_enriched_dataset = module.resources.bigquery_rideshare_lakehouse_enriched_dataset
gcs_rideshare_lakehouse_enriched_bucket = module.resources.gcs_rideshare_lakehouse_enriched_bucket
bigquery_rideshare_lakehouse_curated_dataset = module.resources.bigquery_rideshare_lakehouse_curated_dataset
gcs_rideshare_lakehouse_curated_bucket = module.resources.gcs_rideshare_lakehouse_curated_bucket
bigquery_rideshare_llm_raw_dataset = module.resources.bigquery_rideshare_llm_raw_dataset
bigquery_rideshare_llm_enriched_dataset = module.resources.bigquery_rideshare_llm_enriched_dataset
bigquery_rideshare_llm_curated_dataset = module.resources.bigquery_rideshare_llm_curated_dataset
bigquery_cleanroom_dataset = module.resources.bigquery_cleanroom_dataset
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources
]
}
# Deploy Dataform
module "dataform-module" {
source = "../terraform-modules/dataform"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
dataform_region = var.dataform_region
storage_bucket = local.local_storage_bucket
curl_impersonation = local.local_curl_impersonation
bigquery_region = var.bigquery_region
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources
]
}
# Upload files and scripts
module "deploy-files-module" {
source = "../terraform-modules/deploy-files"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
dataplex_region = var.dataplex_region
storage_bucket = local.local_storage_bucket
random_extension = random_string.project_random.result
deployment_service_account_name = var.deployment_service_account_name
composer_name = module.resources.composer_env_name
composer_dag_bucket = module.resources.composer_env_dag_bucket
demo_rest_api_service_uri = module.resources.demo_rest_api_service_uri
code_bucket_name = module.resources.gcs_code_bucket
bigquery_rideshare_llm_raw_dataset = module.resources.bigquery_rideshare_llm_raw_dataset
bigquery_rideshare_llm_enriched_dataset = module.resources.bigquery_rideshare_llm_enriched_dataset
bigquery_rideshare_llm_curated_dataset = module.resources.bigquery_rideshare_llm_curated_dataset
bigquery_region = var.bigquery_region
gcs_rideshare_lakehouse_raw_bucket = module.resources.gcs_rideshare_lakehouse_raw_bucket
cloud_run_service_rideshare_plus_website_url = module.resources.cloud_run_service_rideshare_plus_website_url
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources
]
}
####################################################################################
# Deploy notebooks to Colab -> Create the Dataform repo and files (base64 encoded)
####################################################################################
module "deploy-notebooks-module-create-files" {
source = "../terraform-modules/colab-deployment-create-files"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
bigquery_rideshare_llm_raw_dataset = module.resources.bigquery_rideshare_llm_raw_dataset
bigquery_rideshare_llm_enriched_dataset = module.resources.bigquery_rideshare_llm_enriched_dataset
bigquery_rideshare_llm_curated_dataset = module.resources.bigquery_rideshare_llm_curated_dataset
gcs_rideshare_lakehouse_raw_bucket = module.resources.gcs_rideshare_lakehouse_raw_bucket
storage_bucket = local.local_storage_bucket
dataform_region = var.dataform_region
cloud_function_region = var.cloud_function_region
workflow_region = "us-central1"
random_extension = random_string.project_random.result
gcp_account_name = var.gcp_account_name
curl_impersonation = local.local_curl_impersonation
bigquery_region = var.bigquery_region
vertex_ai_region = var.vertex_ai_region
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources
]
}
####################################################################################
# Deploy notebooks to Colab -> Push the notebooks
# This is done since there is a race condition when the files are base64 encoded
####################################################################################
module "deploy-notebooks-module-deploy" {
source = "../terraform-modules/colab-deployment-deploy"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
bigquery_rideshare_llm_raw_dataset = module.resources.bigquery_rideshare_llm_raw_dataset
bigquery_rideshare_llm_enriched_dataset = module.resources.bigquery_rideshare_llm_enriched_dataset
bigquery_rideshare_llm_curated_dataset = module.resources.bigquery_rideshare_llm_curated_dataset
gcs_rideshare_lakehouse_raw_bucket = module.resources.gcs_rideshare_lakehouse_raw_bucket
storage_bucket = local.local_storage_bucket
dataform_region = var.dataform_region
cloud_function_region = var.cloud_function_region
workflow_region = "us-central1"
random_extension = random_string.project_random.result
gcp_account_name = var.gcp_account_name
curl_impersonation = local.local_curl_impersonation
bigquery_region = var.bigquery_region
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources,
module.deploy-notebooks-module-create-files
]
}
####################################################################################
# Outputs (Gather from sub-modules)
# Not really needed, but are outputted for viewing
####################################################################################
output "gcp_account_name" {
value = var.gcp_account_name
}
output "project_id" {
value = local.local_project_id
}
output "project_number" {
value = var.project_number == "" ? module.project[0].output-project-number : var.project_number
}
output "deployment_service_account_name" {
value = var.deployment_service_account_name
}
output "org_id" {
value = var.org_id
}
output "billing_account" {
value = var.billing_account
}
output "region" {
value = var.default_region
}
output "zone" {
value = var.default_zone
}
output "composer_region" {
value = var.composer_region
}
output "dataform_region" {
value = var.dataform_region
}
output "dataplex_region" {
value = var.dataplex_region
}
output "dataproc_region" {
value = var.dataproc_region
}
output "dataflow_region" {
value = var.dataflow_region
}
output "bigquery_region" {
value = var.bigquery_region
}
output "bigquery_non_multi_region" {
value = var.bigquery_non_multi_region
}
output "spanner_region" {
value = var.spanner_region
}
output "spanner_config" {
value = var.spanner_config
}
output "datafusion_region" {
value = var.datafusion_region
}
output "vertex_ai_region" {
value = var.vertex_ai_region
}
output "cloud_function_region" {
value = var.cloud_function_region
}
output "data_catalog_region" {
value = var.data_catalog_region
}
output "dataproc_serverless_region" {
value = var.dataproc_serverless_region
}
output "cloud_sql_region" {
value = var.cloud_sql_region
}
output "cloud_sql_zone" {
value = var.cloud_sql_zone
}
output "datastream_region" {
value = var.datastream_region
}
output "shared_demo_project_id" {
value = var.shared_demo_project_id
}
output "aws_omni_biglake_dataset_region" {
value = var.aws_omni_biglake_dataset_region
}
output "aws_omni_biglake_dataset_name" {
value = var.aws_omni_biglake_dataset_name
}
output "aws_omni_biglake_connection" {
value = var.aws_omni_biglake_connection
}
output "aws_omni_biglake_s3_bucket" {
value = var.aws_omni_biglake_s3_bucket
}
output "azure_omni_biglake_adls_name" {
value = var.azure_omni_biglake_adls_name
}
output "azure_omni_biglake_dataset_name" {
value = var.azure_omni_biglake_dataset_name
}
output "azure_omni_biglake_dataset_region" {
value = var.azure_omni_biglake_dataset_region
}
output "random_string" {
value = random_string.project_random.result
}
output "local_storage_bucket" {
value = local.local_storage_bucket
}
output "local_impersonation_account" {
value = local.local_impersonation_account
}
output "local_curl_impersonation" {
value = local.local_curl_impersonation
}
output "local_azure_omni_biglake_connection" {
value = local.local_azure_omni_biglake_connection
}
output "deployment_service_account" {
value = module.service-account.deployment_service_account
}
output "gcs_raw_bucket" {
value = module.resources.gcs_raw_bucket
}
output "gcs_processed_bucket" {
value = module.resources.gcs_processed_bucket
}
output "gcs_code_bucket" {
value = module.resources.gcs_code_bucket
}
output "default_network" {
value = module.resources.default_network
}
/*
output "nat-router" {
value = module.resources.nat-router
}
*/
output "dataproc_subnet_name" {
value = module.resources.dataproc_subnet_name
}
output "dataproc_subnet_name_ip_cidr_range" {
value = module.resources.dataproc_subnet_name_ip_cidr_range
}
output "gcs_dataproc_bucket" {
value = module.resources.gcs_dataproc_bucket
}
output "dataproc_service_account" {
value = module.resources.dataproc_service_account
}
output "cloudcomposer_account_service_agent_v2_ext" {
value = module.resources.cloudcomposer_account_service_agent_v2_ext
}
output "composer_subnet" {
value = module.resources.composer_subnet
}
output "composer_subnet_ip_cidr_range" {
value = module.resources.composer_subnet_ip_cidr_range
}
output "composer_service_account" {
value = module.resources.composer_service_account
}
output "composer_env_name" {
value = module.resources.composer_env_name
}
output "composer_env_dag_bucket" {
value = module.resources.composer_env_dag_bucket
}
output "dataproc_serverless_subnet_name" {
value = module.resources.dataproc_serverless_subnet_name
}
output "dataproc_serverless_ip_cidr_range" {
value = module.resources.dataproc_serverless_ip_cidr_range
}
output "business_critical_taxonomy_aws_id" {
value = module.resources.business_critical_taxonomy_aws_id
}
output "business_critical_taxonomy_azure_id" {
value = module.resources.business_critical_taxonomy_azure_id
}
output "business_critical_taxonomy_id" {
value = module.resources.business_critical_taxonomy_id
}
output "bigquery_external_function" {
value = module.resources.bigquery_external_function
}
output "cloud_function_connection" {
value = module.resources.cloud_function_connection
}
output "biglake_connection" {
value = module.resources.biglake_connection
}
output "dataflow_subnet_name" {
value = module.resources.dataflow_subnet_name
}
output "dataflow_subnet_ip_cidr_range" {
value = module.resources.dataflow_subnet_ip_cidr_range
}
output "dataflow_service_account" {
value = module.resources.dataflow_service_account
}
output "bigquery_taxi_dataset" {
value = module.resources.bigquery_taxi_dataset
}
output "bigquery_thelook_ecommerce_dataset" {
value = module.resources.bigquery_thelook_ecommerce_dataset
}
output "bigquery_rideshare_lakehouse_raw_dataset" {
value = module.resources.bigquery_rideshare_lakehouse_raw_dataset
}
output "bigquery_rideshare_lakehouse_enriched_dataset" {
value = module.resources.bigquery_rideshare_lakehouse_enriched_dataset
}
output "bigquery_rideshare_lakehouse_curated_dataset" {
value = module.resources.bigquery_rideshare_lakehouse_curated_dataset
}
output "bigquery_rideshare_llm_raw_dataset" {
value = module.resources.bigquery_rideshare_llm_raw_dataset
}
output "bigquery_rideshare_llm_enriched_dataset" {
value = module.resources.bigquery_rideshare_llm_enriched_dataset
}
output "bigquery_rideshare_llm_curated_dataset" {
value = module.resources.bigquery_rideshare_llm_curated_dataset
}
output "gcs_rideshare_lakehouse_raw_bucket" {
value = module.resources.gcs_rideshare_lakehouse_raw_bucket
}
output "gcs_rideshare_lakehouse_enriched_bucket" {
value = module.resources.gcs_rideshare_lakehouse_enriched_bucket
}
output "gcs_rideshare_lakehouse_curated_bucket" {
value = module.resources.gcs_rideshare_lakehouse_curated_bucket
}
output "spanner_instance_id" {
value = "spanner-${random_string.project_random.result}"
}
output "dataform_repository" {
value = module.dataform-module.dataform_repository
}
output "dataplex_taxi_datalake" {
value = "taxi-data-lake-${random_string.project_random.result}"
}
output "dataplex_taxi_datalake_raw_zone" {
value = "taxi-raw-zone-${random_string.project_random.result}"
}
output "dataplex_taxi_datalake_curated_zone" {
value = "taxi-curated-zone-${random_string.project_random.result}"
}
output "dataplex_taxi_datalake_raw_bucket" {
value = "taxi-raw-bucket-${random_string.project_random.result}"
}
output "dataplex_taxi_datalake_processed_bucket" {
value = "taxi-processed-bucket-${random_string.project_random.result}"
}
output "dataplex_taxi_datalake_processed_datasets" {
value = "taxi-processed-datasets-${random_string.project_random.result}"
}
output "dataplex_ecommerce_datalake" {
value = "ecommerce-data-lake-${random_string.project_random.result}"
}
output "dataplex_ecommerce_datalake_curated_zone" {
value = "ecommerce-curated-zone-${random_string.project_random.result}"
}
output "dataplex_ecommerce_datalake_processed_datasets" {
value = "ecommerce-dataset-${random_string.project_random.result}"
}
output "demo_rest_api_service_uri" {
value = module.resources.demo_rest_api_service_uri
}
# Tells the deploy.sh where to upload the "terraform" output json file
# A file named "tf-output.json" will be places at gs://${terraform-output-bucket}/terraform/output
output "terraform-output-bucket" {
value = module.resources.gcs_code_bucket
}