data-analytics-demos/bigquery-data-governance/terraform/tf-main.tf (254 lines of code) (raw):
####################################################################################
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
####################################################################################
####################################################################################
# README
# This is the main entry point into the Terraform creation script
# This script can be run in a different ways:
# 1. Run "source deploy.sh" in the root folder (this for for when you run locally or cloud shell).
# This will create the GCP project for you and deploy everything. The logged in user needs to be
# an Org Admin so the project can be created and permissions set.
#
# 2. If you have a GCP project already created you would run just by passing in the parameters.
# Review the script deploy-use-existing-project.sh to see the requirements of what is
# required items and permissions.
# terraform apply \
# -var="gcp_account_name=${gcp_account_name}" \
# -var="project_id=${project_id}" \
# -var="project_number=${project_number}" \
# -var="deployment_service_account_name=${service_account_email}" \
# -var="org_id=${org_id}"
#
# Review the parameters. If a Project Number is passed in, it is assumed the GCP project has been created.
#
# Author: Adam Paternostro
#
# References:
# Terraform for Google: https://registry.terraform.io/providers/hashicorp/google/latest/docs
# https://www.terraform.io/language/resources/provisioners/local-exec
####################################################################################
terraform {
required_providers {
google = {
source = "hashicorp/google-beta"
version = ">= 4.52, < 6"
configuration_aliases = [google.service_principal_impersonation]
}
}
}
####################################################################################
# Providers
# Multiple providers: https://www.terraform.io/language/providers/configuration
# The first is the default (who is logged in) and creates the project and service principal that provisions the resources
# The second is the service account created by the first and is used to create the resources
####################################################################################
# Default provider (uses the logged in user to create the project and service principal for deployment)
provider "google" {
project = local.local_project_id
}
####################################################################################
# Provider that uses service account impersonation (best practice - no exported secret keys to local computers)
####################################################################################
provider "google" {
alias = "service_principal_impersonation"
impersonate_service_account = "${local.local_project_id}@${local.local_project_id}.iam.gserviceaccount.com"
project = local.local_project_id
region = var.default_region
zone = var.default_zone
}
####################################################################################
# Create the project and grants access to the current user
####################################################################################
module "project" {
# Run this as the currently logged in user or the service account (assuming DevOps)
count = var.project_number == "" ? 1 : 0
source = "../terraform-modules/project"
project_id = local.local_project_id
org_id = var.org_id
billing_account = var.billing_account
}
####################################################################################
# Creates a service account that will be used to deploy the subsequent artifacts
####################################################################################
module "service-account" {
# This creates a service account to run portions of the following deploy by impersonating this account
source = "../terraform-modules/service-account"
project_id = local.local_project_id
org_id = var.org_id
impersonation_account = local.local_impersonation_account
gcp_account_name = var.gcp_account_name
environment = var.environment
depends_on = [
module.project
]
}
####################################################################################
# Enable all the cloud APIs that will be used by using Batch Mode
# Batch mode is enabled on the provider (by default)
####################################################################################
module "apis-batch-enable" {
source = "../terraform-modules/apis-batch-enable"
project_id = local.local_project_id
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
depends_on = [
module.project,
module.service-account
]
}
resource "time_sleep" "service_account_api_activation_time_delay" {
create_duration = "120s"
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable
]
}
####################################################################################
# Turns off certain Org Policies required for deployment
# They will be re-enabled by a second call to Terraform
# This step is Skipped when deploying into an existing project (it is assumed a person disabled by hand)
####################################################################################
module "org-policies" {
count = var.environment == "GITHUB_ENVIRONMENT" && var.org_id != "0" ? 1 : 0
source = "../terraform-modules/org-policies"
# Use Service Account Impersonation for this step.
# NOTE: This step must be done using a service account (a user account cannot change these policies)
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay
]
}
####################################################################################
# This deploy the majority of the Google Cloud Infrastructure
####################################################################################
module "resources" {
source = "../terraform-modules/resources"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
gcp_account_name = var.gcp_account_name
project_id = local.local_project_id
local_curl_impersonation = local.local_curl_impersonation
dataplex_region = var.dataplex_region
multi_region = var.multi_region
bigquery_non_multi_region = var.bigquery_non_multi_region
vertex_ai_region = var.vertex_ai_region
data_catalog_region = var.data_catalog_region
appengine_region = var.appengine_region
colab_enterprise_region = var.colab_enterprise_region
dataflow_region = var.dataflow_region
dataproc_region = var.dataproc_region
kafka_region = var.kafka_region
random_extension = random_string.project_random.result
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
deployment_service_account_name = var.deployment_service_account_name
terraform_service_account = module.service-account.deployment_service_account
bigquery_governed_data_raw_dataset = var.bigquery_governed_data_raw_dataset
bigquery_governed_data_enriched_dataset = var.bigquery_governed_data_enriched_dataset
bigquery_governed_data_curated_dataset = var.bigquery_governed_data_curated_dataset
bigquery_analytics_hub_publisher_dataset = var.bigquery_analytics_hub_publisher_dataset
bigquery_analytics_hub_subscriber_dataset = var.bigquery_analytics_hub_subscriber_dataset
governed_data_raw_bucket = local.governed_data_raw_bucket
governed_data_enriched_bucket = local.governed_data_enriched_bucket
governed_data_curated_bucket = local.governed_data_curated_bucket
governed_data_code_bucket = local.code_bucket
dataflow_staging_bucket = local.dataflow_staging_bucket
governed_data_scan_bucket = local.governed_data_scan_bucket
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
]
}
####################################################################################
# Deploy BigQuery stored procedures / sql scripts
###################################################################################
module "sql-scripts" {
source = "../terraform-modules/sql-scripts"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
gcp_account_name = var.gcp_account_name
project_id = local.local_project_id
dataplex_region = var.dataplex_region
multi_region = var.multi_region
bigquery_non_multi_region = var.bigquery_non_multi_region
vertex_ai_region = var.vertex_ai_region
data_catalog_region = var.data_catalog_region
appengine_region = var.appengine_region
colab_enterprise_region = var.colab_enterprise_region
random_extension = random_string.project_random.result
project_number = var.project_number == "" ? module.project[0].output-project-number : var.project_number
deployment_service_account_name = var.deployment_service_account_name
terraform_service_account = module.service-account.deployment_service_account
bigquery_governed_data_raw_dataset = var.bigquery_governed_data_raw_dataset
bigquery_governed_data_enriched_dataset = var.bigquery_governed_data_enriched_dataset
bigquery_governed_data_curated_dataset = var.bigquery_governed_data_curated_dataset
bigquery_analytics_hub_publisher_dataset = var.bigquery_analytics_hub_publisher_dataset
governed_data_raw_bucket = local.governed_data_raw_bucket
governed_data_enriched_bucket = local.governed_data_enriched_bucket
governed_data_curated_bucket = local.governed_data_curated_bucket
governed_data_code_bucket = local.code_bucket
governed_data_scan_bucket = local.governed_data_scan_bucket
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources
]
}
####################################################################################
# Deploy notebooks to Colab -> Create the Dataform repo and files (base64 encoded)
####################################################################################
module "deploy-notebooks-module-create-files" {
source = "../terraform-modules/colab-deployment-create-files"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
multi_region = var.multi_region
vertex_ai_region = var.vertex_ai_region
bigquery_governed_data_raw_dataset = var.bigquery_governed_data_raw_dataset
bigquery_governed_data_enriched_dataset = var.bigquery_governed_data_enriched_dataset
bigquery_governed_data_curated_dataset = var.bigquery_governed_data_curated_dataset
bigquery_analytics_hub_publisher_dataset = var.bigquery_analytics_hub_publisher_dataset
governed_data_raw_bucket = local.governed_data_raw_bucket
governed_data_enriched_bucket = local.governed_data_enriched_bucket
governed_data_curated_bucket = local.governed_data_curated_bucket
governed_data_code_bucket = local.code_bucket
governed_data_scan_bucket = local.governed_data_scan_bucket
dataform_region = "us-central1"
dataproc_region = var.dataproc_region
dataplex_region = var.dataplex_region
random_extension = random_string.project_random.result
gcp_account_name = var.gcp_account_name
dataflow_staging_bucket = local.dataflow_staging_bucket
dataflow_service_account = module.resources.dataflow_service_account
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources
]
}
####################################################################################
# Deploy notebooks to Colab -> Push the notebooks
# This is done since there is a race condition when the files are base64 encoded
####################################################################################
module "deploy-notebooks-module-deploy" {
source = "../terraform-modules/colab-deployment-deploy"
# Use Service Account Impersonation for this step.
providers = { google = google.service_principal_impersonation }
project_id = local.local_project_id
multi_region = var.multi_region
vertex_ai_region = var.vertex_ai_region
bigquery_governed_data_raw_dataset = var.bigquery_governed_data_raw_dataset
bigquery_governed_data_enriched_dataset = var.bigquery_governed_data_enriched_dataset
bigquery_governed_data_curated_dataset = var.bigquery_governed_data_curated_dataset
bigquery_analytics_hub_publisher_dataset = var.bigquery_analytics_hub_publisher_dataset
governed_data_raw_bucket = local.governed_data_raw_bucket
governed_data_enriched_bucket = local.governed_data_enriched_bucket
governed_data_curated_bucket = local.governed_data_curated_bucket
governed_data_code_bucket = local.code_bucket
governed_data_scan_bucket = local.governed_data_scan_bucket
dataform_region = "us-central1"
random_extension = random_string.project_random.result
gcp_account_name = var.gcp_account_name
dataflow_staging_bucket = local.dataflow_staging_bucket
dataflow_service_account = module.resources.dataflow_service_account
depends_on = [
module.project,
module.service-account,
module.apis-batch-enable,
time_sleep.service_account_api_activation_time_delay,
module.org-policies,
module.resources,
module.deploy-notebooks-module-create-files
]
}
####################################################################################
# Outputs (Gather from sub-modules)
# Not really needed, but are outputted for viewing
####################################################################################
output "gcp_account_name" {
value = var.gcp_account_name
}
output "project_id" {
value = local.local_project_id
}
output "project_number" {
value = var.project_number == "" ? module.project[0].output-project-number : var.project_number
}
output "deployment_service_account_name" {
value = var.deployment_service_account_name
}
output "org_id" {
value = var.org_id
}
output "billing_account" {
value = var.billing_account
}
output "region" {
value = var.default_region
}
output "zone" {
value = var.default_zone
}
output "dataplex_region" {
value = var.dataplex_region
}
output "multi_region" {
value = var.multi_region
}
output "bigquery_non_multi_region" {
value = var.bigquery_non_multi_region
}
output "vertex_ai_region" {
value = var.vertex_ai_region
}
output "data_catalog_region" {
value = var.data_catalog_region
}
output "appengine_region" {
value = var.appengine_region
}
output "random_string" {
value = random_string.project_random.result
}
output "local_impersonation_account" {
value = local.local_impersonation_account
}
output "deployment_service_account" {
value = module.service-account.deployment_service_account
}
# Tells the deploy.sh where to upload the "terraform" output json file
# A file named "tf-output.json" will be places at gs://${terraform-output-bucket}/terraform/output
output "terraform-output-bucket" {
value = local.code_bucket
}