terraform/stacks/common/main.tf

# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Enable APIS resource "google_project_service" "enable_service_usage_api" { project = var.project service = "serviceusage.googleapis.com" disable_on_destroy = false } # Enable Cloud Scheduler API resource "google_project_service" "enable_appengine" { project = var.project service = "appengine.googleapis.com" disable_dependent_services = true disable_on_destroy = false } # Enable Cloud Build API resource "google_project_service" "enable_cloud_build" { project = var.project service = "cloudbuild.googleapis.com" disable_on_destroy = false } # Enables the Cloud Run API resource "google_project_service" "run_api" { project = var.project service = "run.googleapis.com" disable_on_destroy = false } locals { project_and_domains = distinct([ for entry in var.domain_mapping : { project = entry["project"], domain = entry["domain"] } ]) # Only projects with configured domains project_and_domains_filtered = [for entry in local.project_and_domains : entry if entry["domain"] != ""] datasets_and_domains = distinct(flatten([ for entry in var.domain_mapping : [ for dataset in lookup(entry, "datasets", []) : { project = entry["project"], dataset = dataset["name"], domain = dataset["domain"] } ] ])) # Only datasets with configured domains datasets_and_domains_filtered = [for entry in local.datasets_and_domains : entry if entry["domain"] != ""] # Get distinct domains set on project entries project_domains = distinct([ for entry in local.project_and_domains_filtered : entry["domain"] ]) # Get distinct domains set on dataset level dataset_domains = distinct([ for entry in local.datasets_and_domains_filtered : entry["domain"] ]) // Concat project and dataset domains and filter out empty strings domains = distinct(concat(local.project_domains, local.dataset_domains)) # comma separated string with taxonomy names created_taxonomies = join(",", [for taxonomy in module.data-catalog[*].created_taxonomy : taxonomy.name]) // one list of all policy tags generated across domain taxonomies // each element of the list is a map with three attributes (policy_tag_id, domain, classification, info_type, region) created_policy_tags = flatten(module.data-catalog[*].created_children_tags) created_parent_tags = flatten(module.data-catalog[*].created_parent_tags) auto_dlp_results_latest_view = "${var.auto_dlp_results_table_name}_latest_v1" taxonomy_numbers = distinct([for x in var.classification_taxonomy: x["taxonomy_number"]]) // this return a list of lists like [ ["europe-west3","dwh","1"], ["europe-west3","dwh","2"], ["europe-west3","marketing","1"], ["europe-west3","marketing","2"], etc ] taxonomies_to_be_created = setproduct(tolist(var.source_data_regions), local.domains, local.taxonomy_numbers) inspection_templates_count = max([for x in var.classification_taxonomy: x["inspection_template_number"]]...) info_types_map = { for item in var.classification_taxonomy : item["info_type"] => { classification = item["classification"], labels = item["labels"] } } created_dlp_inspection_templates = module.dlp[*].created_inspection_templates } module "data-catalog" { count = length(local.taxonomies_to_be_created) source = "../../modules/data-catalog" project = var.project region = local.taxonomies_to_be_created[count.index][0] domain = local.taxonomies_to_be_created[count.index][1] taxonomy_number = local.taxonomies_to_be_created[count.index][2] // only use the nodes that are marked for taxonomy number x classification_taxonomy = [for x in var.classification_taxonomy: x if x["taxonomy_number"] == local.taxonomies_to_be_created[count.index][2]] data_catalog_taxonomy_activated_policy_types = var.data_catalog_taxonomy_activated_policy_types taxonomy_name_suffix = var.taxonomy_name_suffix } module "bigquery" { source = "../../modules/bigquery" project = var.project region = var.data_region dataset = var.bigquery_dataset_name logging_sink_sa = module.cloud_logging.service_account # Data for config views created_policy_tags = local.created_policy_tags dataset_domains_mapping = local.datasets_and_domains_filtered projects_domains_mapping = local.project_and_domains_filtered standard_dlp_results_table_name = var.standard_dlp_results_table_name inspection_templates_count = local.inspection_templates_count terraform_data_deletion_protection = var.terraform_data_deletion_protection } module "cloud_logging" { source = "../../modules/cloud-logging" dataset = module.bigquery.results_dataset project = var.project log_sink_name = var.log_sink_name } // DLP # deploy 1 dlp inspection template in each source data region module "dlp" { count = length(var.source_data_regions) source = "../../modules/dlp" project = var.project region = tolist(var.source_data_regions)[count.index] # create inspection template in the same region as source data classification_taxonomy = var.classification_taxonomy custom_info_types_dictionaries = var.custom_info_types_dictionaries custom_info_types_regex = var.custom_info_types_regex } module "cloud_scheduler" { source = "../../modules/cloud-scheduler" project = var.project region = var.compute_region scheduler_name = var.scheduler_name target_uri = module.pubsub-tagging-dispatcher.topic-id datasets_include_list = var.datasets_include_list projects_include_list = var.projects_include_list datasets_exclude_list = var.datasets_exclude_list tables_exclude_list = var.tables_exclude_list cron_expression = var.cron_expression depends_on = [google_project_service.enable_appengine] } module "iam" { source = "../../modules/iam" project = var.project sa_tagger = var.sa_tagger sa_tagger_tasks = var.sa_tagger_tasks taxonomy_parent_tags = local.created_parent_tags iam_mapping = var.iam_mapping dlp_service_account = var.dlp_service_account tagger_role = var.tagger_role sa_tagging_dispatcher = var.sa_tagging_dispatcher sa_tagging_dispatcher_tasks = var.sa_tagging_dispatcher_tasks bq_results_dataset = module.bigquery.results_dataset } module "cloud-run-tagging-dispatcher" { source = "../../modules/cloud-run" project = var.project region = var.compute_region service_image = var.dispatcher_service_image service_name = var.dispatcher_service_name service_account_email = module.iam.sa_tagging_dispatcher_email invoker_service_account_email = module.iam.sa_tagging_dispatcher_tasks_email # Dispatcher could take time to list large number of tables timeout_seconds = var.dispatcher_service_timeout_seconds max_containers = 1 max_cpu = 2 environment_variables = [ { name = "TAGGER_TOPIC", value = module.pubsub-tagger.topic-name, }, { name = "COMPUTE_REGION_ID", value = var.compute_region, }, { name = "DATA_REGION_ID", value = var.data_region, }, { name = "SOURCE_DATA_REGIONS", value = jsonencode(var.source_data_regions), }, { name = "PROJECT_ID", value = var.project, }, { name = "GCS_FLAGS_BUCKET", value = var.gcs_flags_bucket_name, }, { name = "SOLUTION_DATASET", value = module.bigquery.results_dataset, }, { name = "DLP_TABLE_STANDARD", value = module.bigquery.results_table_standard_dlp, }, { name = "DLP_TABLE_AUTO", value = local.auto_dlp_results_latest_view, }, { name = "IS_AUTO_DLP_MODE", value = tostring(var.is_auto_dlp_mode), }, { name = "LOGGING_TABLE", value = module.bigquery.logging_table }, { name = "DLP_INSPECTION_TEMPLATES_IDS", value = jsonencode(local.created_dlp_inspection_templates), }, ] } module "cloud-run-tagger" { source = "../../modules/cloud-run" project = var.project region = var.compute_region service_image = var.tagger_service_image service_name = var.tagger_service_name service_account_email = module.iam.sa_tagger_email invoker_service_account_email = module.iam.sa_tagger_tasks_email # no more than 80 requests at a time to handle BigQuery API rate limiting max_containers = 1 max_requests_per_container = 80 # Tagger is using BigQuery BATCH queries that could take time to get started timeout_seconds = var.tagger_service_timeout_seconds environment_variables = [ { name = "IS_DRY_RUN_TAGS", value = var.is_dry_run_tags, }, { name = "IS_DRY_RUN_LABELS", value = var.is_dry_run_labels, }, { name = "TAXONOMIES", value = local.created_taxonomies, }, { name = "REGION_ID", value = var.compute_region, }, { name = "PROJECT_ID", value = var.project, }, { name = "GCS_FLAGS_BUCKET", value = var.gcs_flags_bucket_name, }, { name = "DLP_DATASET", value = module.bigquery.results_dataset, }, { name = "DLP_TABLE_STANDARD", value = module.bigquery.results_table_standard_dlp, }, { name = "DLP_TABLE_AUTO", value = local.auto_dlp_results_latest_view, }, { name = "VIEW_INFOTYPE_POLICYTAGS_MAP", value = module.bigquery.config_view_infotype_policytag_map }, { name = "VIEW_DATASET_DOMAIN_MAP", value = module.bigquery.config_view_dataset_domain_map }, { name = "VIEW_PROJECT_DOMAIN_MAP", value = module.bigquery.config_view_project_domain_map }, { name = "PROMOTE_MIXED_TYPES", value = tostring(var.promote_mixed_info_types), }, { name = "IS_AUTO_DLP_MODE", value = tostring(var.is_auto_dlp_mode), }, { name = "INFO_TYPE_MAP", value = jsonencode(local.info_types_map), } ] } // PubSub module "pubsub-tagging-dispatcher" { source = "../../modules/pubsub" project = var.project subscription_endpoint = module.cloud-run-tagging-dispatcher.service_endpoint subscription_name = var.dispatcher_pubsub_sub subscription_service_account = module.iam.sa_tagging_dispatcher_tasks_email topic = var.dispatcher_pubsub_topic topic_publishers_sa_emails = [var.cloud_scheduler_account] # use a deadline large enough to process BQ listing for large scopes subscription_ack_deadline_seconds = var.dispatcher_subscription_ack_deadline_seconds # avoid resending dispatcher messages if things went wrong and the msg was NAK (e.g. timeout expired, app error, etc) # min value must be at equal to the ack_deadline_seconds subscription_message_retention_duration = var.dispatcher_subscription_message_retention_duration } module "pubsub-tagger" { source = "../../modules/pubsub" project = var.project subscription_endpoint = module.cloud-run-tagger.service_endpoint subscription_name = var.tagger_pubsub_sub subscription_service_account = module.iam.sa_tagger_tasks_email topic = var.tagger_pubsub_topic // Tagging Dispatcher and DLP service account must be able to publish messages to the Tagger topic_publishers_sa_emails = [module.iam.sa_tagging_dispatcher_email, var.dlp_service_account] # Tagger is using BigQuery queries in BATCH mode to avoid INTERACTIVE query concurency limits and they might take longer time to execute under heavy load # 10m is max allowed subscription_ack_deadline_seconds = var.tagger_subscription_ack_deadline_seconds # How long to retain unacknowledged messages in the subscription's backlog, from the moment a message is published. # In case of unexpected problems we want to avoid a buildup that re-trigger functions subscription_message_retention_duration = var.tagger_subscription_message_retention_duration }

terraform/stacks/common/main.tf (292 lines of code) (raw):