terraform/dataform.tf (113 lines of code) (raw):

# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #Search for and read dataform.json files in the input dataform repositories data "github_repository_file" "dataform_config" { for_each = var.dataform_repositories repository = local.git_path[each.key] branch = each.value.branch file = "dataform.json" } module "aef-dataform-service-account" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric/modules/iam-service-account" project_id = var.project name = "aef-dataform-service-account" iam_project_roles = { "${var.project}" = [ "roles/dataform.serviceAgent", "roles/iam.serviceAccountTokenCreator", "roles/bigquery.admin" ] } } #In order to enable dataform to communicate with a 3P GIT provider, an access token must be generated and stored as a secret on GCP module "secrets" { for_each = local.dataform_repositories source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric/modules/secret-manager" project_id = var.project secrets = { "${each.value.secret_name}" = { } } versions = { "${each.value.secret_name}" = { "${each.value.secret_version}" = { enabled = true, data = var.dataform_repositories_git_token } } } iam = { "${each.value.secret_name}" = { "roles/secretmanager.secretAccessor" = [ "serviceAccount:service-${data.google_project.project.number}@gcp-sa-dataform.iam.gserviceaccount.com", module.aef-dataform-service-account.iam_email ] } } depends_on = [module.dataform] } resource "google_service_account_iam_member" "dataform_permissions" { for_each = toset(["roles/iam.serviceAccountTokenCreator", "roles/iam.serviceAccountUser"]) service_account_id = module.aef-dataform-service-account.id role = each.key member = "serviceAccount:service-${data.google_project.project.number}@gcp-sa-dataform.iam.gserviceaccount.com" depends_on = [module.dataform_with_external_repos, module.secrets, module.dataform] } #creates a dataform repository with a remote repository attached to it. module "dataform_with_external_repos" { for_each = var.create_dataform_repositories ? local.dataform_repositories : {} source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric/modules/dataform-repository" project_id = var.project name = each.key region = var.region service_account = module.aef-dataform-service-account.email remote_repository_settings = { url = each.value.remote_repo_url branch = each.value.branch secret_name = each.value.secret_name secret_version = module.secrets[each.key].version_ids["${local.dataform_repositories[each.key].secret_name}:${local.dataform_repositories[each.key].secret_version}"] } depends_on = [module.dataform] } # dataform has a know issue where default SA is not created until first repository is created module "dataform" { source = "github.com/GoogleCloudPlatform/cloud-foundation-fabric/modules/dataform-repository" project_id = var.project name = "aef-default-repo" region = var.region } /* Create datasets defined via dataform.json variables if any, it should include 3 variables for each dataset with next format: "dataset_id_<DATASET_IDENTIFIER>":"<YOUR_DATASET_NAME>", "dataset_projectid_<DATASET_IDENTIFIER>":"<YOUR_DATASET_PROJECT>", "dataset_location_<DATASET_IDENTIFIER>":"<YOUR_DATASET_LOCATION>", */ resource "google_bigquery_dataset" "dataform_datasets" { for_each = var.create_dataform_datasets ? { for k, v in local.all_created_datasets : k => v if v.from_dataform } : {} dataset_id = each.value.dataset_id project = each.value.project location = each.value.location description = each.value.description } #Run the dataform scripts found in the repositories resource "null_resource" "install_dataform_dependencies" { for_each = var.compile_dataform_repositories ? local.dataform_repositories : {} provisioner "local-exec" { command = <<EOF python3 -m venv aef_dataform_executor source aef_dataform_executor/bin/activate pip install google-api-core pip install google-cloud-dataform pip install google-cloud-asset EOF } depends_on = [google_service_account_iam_member.dataform_permissions, module.dataform_with_external_repos, null_resource.run_metadata_deployer] triggers = { always_run = timestamp() } } data "external" "dataform_deploy" { for_each = var.compile_dataform_repositories ? local.dataform_repositories : {} program = ["aef_dataform_executor/bin/python3", "../cicd-deployers/dataform_runner.py", "--project_id", var.project, "--project_number", data.google_project.project.number, "--location", var.region, "--repository", each.key, "--tags", "ddl", "--execute", var.execute_dataform_repositories, "--branch", each.value.branch ] depends_on = [null_resource.install_dataform_dependencies,google_service_account_iam_member.dataform_permissions] }