infrastructure/terraform/main.tf (158 lines of code) (raw):
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file contains the main configuration for Marketing Analytics Jumpstart solution.
# This is the main entry point for the Terraform configuration.
# The configuration is divided into multiple modules.
# Each module contains the configuration for a specific component of the solution.
# The modules are:
# - feature_store: The feature store module contains the configuration for the feature store.
# - data_store: The data_store module contains the configuration for the marketing data store.
# - pipelines: The pipelines module contains the configuration for the ML pipelines.
# - activation: The activation module contains the configuration for the activation application.
# - monitoring: The monitoring module contains the configuration for the monitoring dashboards in Looker Studio.
#
# The configuration is unique for each environment. If you to deploy the solution is a multi-environment scenario,
# you can create a separate Terraform configuration for each environment.
#
# This solution is designed to be deployed in a Google Cloud project.
# The Terraform backend used is Google Cloud Storage.
# The Terraform provider used is Google Cloud.
#
# As a Platform Engineer, you have to keep the terraform.tfvars file and the backend.
# The terraform.tfvars file contains the configuration values for the solution.
# The backend contains the state file.
# Configure the Google Cloud provider region for this solution.
# You can set the region in the terraform.tfvars file.
# The default region is us-central1.
# You can deploy and migrate the solution across several regions, check the documentation for more information.
provider "google" {
region = var.google_default_region
}
data "google_project" "main_project" {
provider = google
project_id = var.main_project_id
}
data "google_project" "feature_store_project" {
provider = google
project_id = var.feature_store_project_id
}
data "google_project" "activation_project" {
provider = google
project_id = var.activation_project_id
}
data "google_project" "data_processing_project" {
provider = google
project_id = var.data_processing_project_id
}
data "google_project" "data_project" {
provider = google
project_id = var.data_project_id
}
# The locals block contains hardcoded values that are used in the configuration for the solution.
# The locals block is used to define variables that are used in the configuration.
locals {
# The source_root_dir is the root directory of the project.
source_root_dir = "../.."
# The config_file_name is the name of the config file.
config_file_name = "config"
# The uv_run_alias is the alias of the uv run command.
uv_run_alias = "${var.uv_cmd} run"
# The mds_dataset_suffix is the suffix of the marketing data store dataset.
mds_dataset_suffix = var.property_id
# The project_toml_file_path is the path to the project.toml file.
project_toml_file_path = "${local.source_root_dir}/pyproject.toml"
# The project_toml_content_hash is the hash of the project.toml file.
# This is used for the triggers of the local-exec provisioner.
project_toml_content_hash = filesha512(local.project_toml_file_path)
# The generated_sql_queries_directory_path is the path to the generated sql queries directory.
generated_sql_queries_directory_path = "${local.source_root_dir}/sql/query"
# The generated_sql_queries_fileset is the list of files in the generated sql queries directory.
generated_sql_queries_fileset = [for f in fileset(local.generated_sql_queries_directory_path, "*.sqlx") : "${local.generated_sql_queries_directory_path}/${f}"]
# The generated_sql_queries_content_hash is the sha512 hash of file sha512 hashes in the generated sql queries directory.
generated_sql_queries_content_hash = sha512(join("", [for f in local.generated_sql_queries_fileset : fileexists(f) ? filesha512(f) : sha512("file-not-found")]))
# The generated_sql_procedures_directory_path is the path to the generated sql procedures directory.
generated_sql_procedures_directory_path = "${local.source_root_dir}/sql/procedure"
# The generated_sql_procedures_fileset is the list of files in the generated sql procedures directory.
generated_sql_procedures_fileset = [for f in fileset(local.generated_sql_procedures_directory_path, "*.sqlx") : "${local.generated_sql_procedures_directory_path}/${f}"]
# The generated_sql_procedures_content_hash is the sha512 hash of file sha512 hashes in the generated sql procedures directory.
generated_sql_procedures_content_hash = sha512(join("", [for f in local.generated_sql_procedures_fileset : fileexists(f) ? filesha512(f) : sha512("file-not-found")]))
}
# Create a configuration file for the solution.
# the template file is located at
# ${local.source_root_dir}/config/${var.global_config_env}.yaml.tftpl.
# This variable can be set in the terraform.tfvars file. Its default value is "config".
#
#The template file contains the configuration for the feature store.
#The variables that are replaced with values from the Terraform configuration are:
# project_id: The ID of the Google Cloud project that the feature store will be created in.
# project_name: The name of the Google Cloud project that the feature store will be created in.
# project_number: The number of the Google Cloud project that the feature store will be created in.
# cloud_region: The region in which the feature store will be created.
# mds_project_id: The ID of the Google Cloud project that the feature store will be created in.
# mds_dataset: The name of the dataset that the feature store will be created in.
# pipelines_github_owner: The owner of the GitHub repository that contains the pipelines code.
# pipelines_github_repo: The name of the GitHub repository that contains the pipelines code.
# location: The location in which the feature store will be created.
resource "local_file" "global_configuration" {
filename = "${local.source_root_dir}/config/${local.config_file_name}.yaml"
content = templatefile("${local.source_root_dir}/config/${var.global_config_env}.yaml.tftpl", {
project_id = var.main_project_id
project_name = data.google_project.main_project.name
project_number = data.google_project.main_project.number
cloud_region = var.google_default_region
mds_project_id = var.data_project_id
mds_dataset = "${var.mds_dataset_prefix}_${local.mds_dataset_suffix}"
website_url = var.website_url
pipelines_github_owner = var.pipelines_github_owner
pipelines_github_repo = var.pipelines_github_repo
# TODO: this needs to be specific to environment.
location = var.destination_data_location
time_zone = var.time_zone
pipeline_configuration = var.pipeline_configuration
non_ecomm_events_list = var.non_ecomm_events_list
non_ecomm_target_event = var.non_ecomm_target_event
})
}
data "external" "check_ga4_property_type" {
program = ["bash", "-c", "${local.uv_run_alias} ga4-setup --ga4_resource=check_property_type --ga4_property_id=${var.ga4_property_id} --ga4_stream_id=${var.ga4_stream_id}"]
working_dir = local.source_root_dir
}
# Runs the uv invoke command to generate the sql queries and procedures.
# This command is executed before the feature store is created.
resource "null_resource" "generate_sql_queries" {
triggers = {
# The create command generates the sql queries and procedures.
# The command is: uv inv [function_name] --env-name=${local.config_file_name}
# The --env-name argument is the name of the configuration file.
create_command = <<-EOT
${local.uv_run_alias} inv apply-config-parameters-to-all-queries --env-name=${local.config_file_name}
${local.uv_run_alias} inv apply-config-parameters-to-all-procedures --env-name=${local.config_file_name}
EOT
# The destroy command removes the generated sql queries and procedures.
destroy_command = <<-EOT
rm -f sql/query/*.sql
rm -f sql/procedure/*.sql
EOT
# The working directory is the root of the project.
working_dir = local.source_root_dir
# The source_contents_hash trigger is the hash of the project.toml file.
# This is used to ensure that the generate_sql_queries command is run only if the project.toml file has changed.
# It also ensures that the generate_sql_queries command is run only if the sql queries and procedures have changed.
source_contents_hash = local_file.global_configuration.content_sha512
destination_queries_hash = local.generated_sql_queries_content_hash
destination_procedures_hash = local.generated_sql_procedures_content_hash
}
# Only run the command when `terraform apply` executes and the resource doesn't exist.
provisioner "local-exec" {
when = create
command = self.triggers.create_command
working_dir = self.triggers.working_dir
}
# Only run the command when `terraform destroy` executes and the resource exists.
#provisioner "local-exec" {
# when = destroy
# command = self.triggers.destroy_command
# working_dir = self.triggers.working_dir
#}
lifecycle {
precondition {
condition = data.external.check_ga4_property_type.result["supported"] == "True"
error_message = "The configured GA4 property is not supported"
}
}
}
# Create the data store module.
# The data store module creates the marketing data store in BigQuery, creates the ETL pipeline in Dataform
# for the marketing data from Google Ads and Google Analytics.
# The data store is created only if the `deploy_dataform` variable is set to true in the terraform.tfvars file.
# The data store is created in the `data_project_id` project.
module "data_store" {
# The source directory of the data store module.
source = "./modules/data-store"
# The google_default_region variable is set in the terraform.tfvars file. Its default value is "us-central1".
google_default_region = var.google_default_region
# The dataform_region is set in the terraform.tfvars file. Its default value is "us-central1".
dataform_region = var.dataform_region
# The source_ga4_export_project_id is set in the terraform.tfvars file.
# The source_ga4_export_dataset is set in the terraform.tfvars file.
# The source_ads_export_data is set in the terraform.tfvars file.
source_ga4_export_project_id = var.source_ga4_export_project_id
source_ga4_export_dataset = var.source_ga4_export_dataset
source_ads_export_data = var.source_ads_export_data
ga4_incremental_processing_days_back = var.ga4_incremental_processing_days_back
# The data_processing_project_id is set in the terraform.tfvars file.
# The data_project_id is set in the terraform.tfvars file.
# The destination_data_location is set in the terraform.tfvars file.
data_processing_project_id = var.data_processing_project_id
data_project_id = var.data_project_id
destination_data_location = var.destination_data_location
# The dataform_github_repo is set in the terraform.tfvars file.
# The dataform_github_token is set in the terraform.tfvars file.
dataform_github_repo = var.dataform_github_repo
dataform_github_token = var.dataform_github_token
# The create_dataform determines if dataform is created.
# When the value is true, the dataform environment is created.
deploy_dataform = var.deploy_dataform
property_id = var.property_id
# The dev_data_project_id is the project ID of where the dev datasets will created.
#If not provided, data_project_id will be used.
# The dev_destination_data_location is the location of the dev datasets.
# If not provided, destination_data_location will be used.
dev_data_project_id = var.dev_data_project_id
dev_destination_data_location = var.dev_destination_data_location
# The staging_data_project_id is the project ID of where the staging datasets will created.
# If not provided, data_project_id will be used.
# The staging_destination_data_location is the location of the staging datasets.
# If not provided, destination_data_location will be used.
staging_data_project_id = var.staging_data_project_id
staging_destination_data_location = var.staging_destination_data_location
# The prod_data_project_id is the project id of where the prod datasets will created.
# If not provided, data_project_id will be used.
# The prod_destination_data_location is the location of the staging datasets.
# If not provided, destination_data_location will be used.
prod_data_project_id = var.prod_data_project_id
prod_destination_data_location = var.prod_destination_data_location
# The project_owner_email is set in the terraform.tfvars file.
# An example of a valid email address is "william.mckinley@my-own-personal-domain.com".
project_owner_email = var.project_owner_email
# Set the time zone for the scheduled jobs
time_zone = var.time_zone
}
#module "purchase_propensity" {
# # The source is the path to the feature store module.
# source = "./modules/purchase-propensity"
# config_file_path = local_file.global_configuration.id != "" ? local_file.global_configuration.filename : ""
# enabled = var.deploy_purchase_propensity
# # the count determines if the feature store is created or not.
# # If the count is 1, the feature store is created.
# # If the count is 0, the feature store is not created.
# # This is done to avoid creating the feature store if the `deploy_purchase_propensity` variable is set to false in the terraform.tfvars file.
# count = var.deploy_purchase_propensity ? 1 : 0
# project_id = var.feature_store_project_id
# # The region is the region in which the feature store is created.
# # This is set to the default region in the terraform.tfvars file.
# region = var.google_default_region
# # The sql_dir_input is the path to the sql directory.
# # This is set to the path to the sql directory in the feature store module.
# sql_dir_input = null_resource.generate_sql_queries.id != "" ? "${local.source_root_dir}/sql" : ""
#}
# Create the feature store module.
# The feature store module creates the feature store and the sql queries and procedures in BigQuery.
# The feature store is created only if the `deploy_feature_store` variable is set to true in the terraform.tfvars file.
# The feature store is created in the `feature_store_project_id` project.
module "feature_store" {
# The source is the path to the feature store module.
source = "./modules/feature-store"
config_file_path = local_file.global_configuration.id != "" ? local_file.global_configuration.filename : ""
enabled = var.deploy_feature_store
# the count determines if the feature store is created or not.
# If the count is 1, the feature store is created.
# If the count is 0, the feature store is not created.
# This is done to avoid creating the feature store if the `deploy_feature_store` variable is set to false in the terraform.tfvars file.
count = var.deploy_feature_store ? 1 : 0
project_id = var.feature_store_project_id
# The region is the region in which the feature store is created.
# This is set to the default region in the terraform.tfvars file.
region = var.google_default_region
# The sql_dir_input is the path to the sql directory.
# This is set to the path to the sql directory in the feature store module.
sql_dir_input = null_resource.generate_sql_queries.id != "" ? "${local.source_root_dir}/sql" : ""
}
# Create the pipelines module.
# The pipelines module creates the ML pipelines in Vertex AI Pipelines.
# The pipelines are created only if the `deploy_pipelines` variable is set to true in the terraform.tfvars file.
# The pipelines are created in the `data_project_id` project.
module "pipelines" {
# The source is the path to the pipelines module.
source = "./modules/pipelines"
config_file_path = local_file.global_configuration.id != "" ? local_file.global_configuration.filename : ""
uv_run_alias = local.uv_run_alias
# The count determines if the pipelines are created or not.
# If the count is 1, the pipelines are created.
# If the count is 0, the pipelines are not created.
# This is done to avoid creating the pipelines if the `deploy_pipelines` variable is set to false in the terraform.tfvars file.
count = var.deploy_pipelines ? 1 : 0
# The project_id is the project in which the data is stored.
# This is set to the data project ID in the terraform.tfvars file.
mds_project_id = var.data_project_id
}
# Create the activation module.
# The activation module creates the activation function in Cloud Functions.
# The activation function is created only if the `deploy_activation` variable is set to true in the terraform.tfvars file.
# The activation function is created in the `activation_project_id` project.
module "activation" {
# The source is the path to the activation module.
source = "./modules/activation"
# The project_id is the project in which the activation function is created.
# This is set to the activation project ID in the terraform.tfvars file.
project_id = var.activation_project_id
# The project number of where the activation function is created.
# This is retrieved from the activation project id using the google_project data source.
project_number = data.google_project.activation_project.number
# The location is the google_default_region variable.
# This is set to the default region in the terraform.tfvars file.
location = var.google_default_region
# The data_location is the destination_data_location variable.
# This is set to the destination data location in the terraform.tfvars file.
data_location = var.destination_data_location
# The trigger_function_location is the location of the trigger function.
# The trigger function is used to trigger the activation function.
# The trigger function is created in the same region as the activation function.
trigger_function_location = var.google_default_region
# The uv_run_alias is the uv_run_alias variable.
# This can be set on the uv_cmd in the terraform.tfvars file.
uv_run_alias = local.uv_run_alias
# The ga4_measurement_id is the ga4_measurement_id variable.
# This can be set on the ga4_measurement_id in the terraform.tfvars file.
ga4_measurement_id = var.ga4_measurement_id
# The ga4_measurement_secret is the ga4_measurement_secret variable.
# This can be set on the ga4_measurement_secret in the terraform.tfvars file.
ga4_measurement_secret = var.ga4_measurement_secret
# The ga4_property_id is the ga4_property_id variable.
# This is set on the ga4_property_id in the terraform.tfvars file.
# The ga4_property_id is the property ID of the GA4 data.
# You can find the property ID in the GA4 console.
ga4_property_id = var.ga4_property_id
# The ga4_stream_id is the ga4_stream_id variable.
# This is set on the ga4_stream_id in the terraform.tfvars file.
# The ga4_stream_id is the stream ID of the GA4 data.
# You can find the stream ID in the GA4 console.
ga4_stream_id = var.ga4_stream_id
# The count determines if the activation function is created or not.
# If the count is 1, the activation function is created.
# If the count is 0, the activation function is not created.
# This is done to avoid creating the activation function if the `deploy_activation` variable is set
# to false in the terraform.tfvars file.
count = var.deploy_activation ? 1 : 0
mds_project_id = var.data_project_id
mds_dataset_suffix = local.mds_dataset_suffix
# The project_owner_email is set in the terraform.tfvars file.
# An example of a valid email address is "william.mckinley@my-own-personal-domain.com".
project_owner_email = var.project_owner_email
}
# Create the monitoring module.
# The monitoring module creates the monitoring resources in Cloud Monitoring and Looker Studio.
# The monitoring resources are created only if the `deploy_monitoring` variable is set to true in the terraform.tfvars file.
# The monitoring resources are created in the `data_project_id` project.
module "monitoring" {
source = "./modules/monitor"
count = var.deploy_monitoring ? 1 : 0
project_id = var.data_project_id
location = var.google_default_region
mds_project_id = var.data_project_id
mds_dataset_suffix = local.mds_dataset_suffix
mds_location = var.google_default_region
mds_dataform_workspace = var.dataform_workspace
feature_store_project_id = var.feature_store_project_id
activation_project_id = var.activation_project_id
purchase_propensity_dataset_id = module.feature_store[0].purchase_propensity_dataset_id
smart_bidding_configuration_table = module.activation[0].configuration_table_name
}