modules/secure_data_warehouse/main.tf (188 lines of code) (raw):
/**
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
non_confidential_dataset_id = "non_confidential_dataset"
confidential_dataset_id = "secured_dataset"
taxonomy_name = "secured_taxonomy"
taxonomy_display_name = "${local.taxonomy_name}-${local.random_id}"
confidential_table_id = "re_data"
non_confidential_table_id = "de_data"
wrapped_key_secret_data = chomp(data.google_secret_manager_secret_version.wrapped_key.secret_data)
bq_schema_dl = join(", ",[ for key, value in var.data_fields : "${key}:${value.type}" ])
bigquery_non_confidential_table = "${module.project_radlab_sdw_non_conf_data.project_id}:${local.non_confidential_dataset_id}.${local.non_confidential_table_id}"
bigquery_confidential_table = "${module.project_radlab_sdw_conf_data.project_id}:${local.confidential_dataset_id}.${local.confidential_table_id}"
}
module "secured_data_warehouse" {
source = "GoogleCloudPlatform/secured-data-warehouse/google"
# version = "0.2.0"
org_id = var.organization_id
data_governance_project_id = module.project_radlab_sdw_data_govern.project_id
confidential_data_project_id = module.project_radlab_sdw_conf_data.project_id
non_confidential_data_project_id = module.project_radlab_sdw_non_conf_data.project_id
data_ingestion_project_id = module.project_radlab_sdw_data_ingest.project_id
sdx_project_number = module.template_project.sdx_project_number
terraform_service_account = var.secure_datawarehouse_service_acccount
access_context_manager_policy_id = var.access_context_manager_policy_id
bucket_name = format("radlab-bucket-%s", local.random_id)
dataset_id = local.non_confidential_dataset_id
confidential_dataset_id = local.confidential_dataset_id
cmek_keyring_name = format("radlab-keyring-%s", local.random_id)
pubsub_resource_location = var.region
location = var.region
delete_contents_on_destroy = var.delete_contents_on_destroy
perimeter_additional_members = local.perimeter_additional_members
data_engineer_group = var.data_engineer_group
data_analyst_group = var.data_analyst_group
security_analyst_group = var.security_analyst_group
network_administrator_group = var.network_administrator_group
security_administrator_group = var.security_administrator_group
depends_on = [
time_sleep.wait_120_seconds,
module.iam_projects,
module.centralized_logging,
# google_project_iam_binding.remove_owner_role
]
}
resource "local_file" "deidentification_template_file" {
filename = format("${path.module}/templates/deidentification.tpl")
content = templatefile("${path.module}/templates/deidentification_template.tpl",
{
display_name = "$${display_name}"
description = "$${description}"
crypto_key = "$${crypto_key}"
wrapped_key = "$${wrapped_key}"
template_id = "$${template_id}"
fields = var.deidentified_fields
})
}
module "de_identification_template" {
source = "GoogleCloudPlatform/secured-data-warehouse/google//modules/de-identification-template"
project_id = module.project_radlab_sdw_data_govern.project_id
terraform_service_account = var.secure_datawarehouse_service_acccount
crypto_key = module.kek.keys[local.kek_key_name]
wrapped_key = local.wrapped_key_secret_data
dlp_location = var.region
template_id_prefix = "de_identification"
template_file = local_file.deidentification_template_file.filename
dataflow_service_account = module.secured_data_warehouse.dataflow_controller_service_account_email
}
resource "google_artifact_registry_repository_iam_member" "docker_reader" {
provider = google-beta
project = module.template_project.project_id
location = var.region
repository = "flex-templates"
role = "roles/artifactregistry.reader"
member = "serviceAccount:${module.secured_data_warehouse.dataflow_controller_service_account_email}"
depends_on = [
module.template_project,
module.secured_data_warehouse
]
}
resource "google_artifact_registry_repository_iam_member" "confidential_docker_reader" {
provider = google-beta
project = module.template_project.project_id
location = var.region
repository = "flex-templates"
role = "roles/artifactregistry.reader"
member = "serviceAccount:${module.secured_data_warehouse.confidential_dataflow_controller_service_account_email}"
depends_on = [
module.template_project,
module.secured_data_warehouse
]
}
resource "google_artifact_registry_repository_iam_member" "python_reader" {
provider = google-beta
project = module.template_project.project_id
location = var.region
repository = "python-modules"
role = "roles/artifactregistry.reader"
member = "serviceAccount:${module.secured_data_warehouse.dataflow_controller_service_account_email}"
depends_on = [
module.template_project,
module.secured_data_warehouse
]
}
resource "google_artifact_registry_repository_iam_member" "confidential_python_reader" {
provider = google-beta
project = module.template_project.project_id
location = var.region
repository = "python-modules"
role = "roles/artifactregistry.reader"
member = "serviceAccount:${module.secured_data_warehouse.confidential_dataflow_controller_service_account_email}"
depends_on = [
module.template_project,
module.secured_data_warehouse
]
}
module "regional_deid_pipeline" {
source = "GoogleCloudPlatform/secured-data-warehouse/google//modules/dataflow-flex-job"
project_id = module.project_radlab_sdw_data_ingest.project_id
name = "dataflow-flex-regional-dlp-deid-job-python-query"
container_spec_gcs_path = module.template_project.python_re_identify_template_gs_path
job_language = "PYTHON"
region = var.region
service_account_email = module.secured_data_warehouse.dataflow_controller_service_account_email
subnetwork_self_link = module.dwh_networking_data_ingest.subnets_self_links[0]
kms_key_name = module.secured_data_warehouse.cmek_data_ingestion_crypto_key
temp_location = "gs://${module.secured_data_warehouse.data_ingestion_bucket_name}/tmp/"
staging_location = "gs://${module.secured_data_warehouse.data_ingestion_bucket_name}/staging/"
parameters = {
query = "SELECT ${join(", ",[ for key, value in var.data_fields : "${key}" ])} FROM [${module.project_radlab_sdw_data_ingest.project_id}:${module.sdw_data_ingest_bq_dataset.bigquery_dataset.dataset_id}.${module.sdw_data_ingest_bq_dataset.external_table_ids[0]}] "
deidentification_template_name = module.de_identification_template.template_full_path
window_interval_sec = 30
batch_size = 1000
dlp_location = var.region
dlp_project = module.project_radlab_sdw_data_govern.project_id
bq_schema = local.bq_schema_dl
output_table = local.bigquery_non_confidential_table
dlp_transform = "DE-IDENTIFY"
}
}
resource "time_sleep" "wait_de_identify_job_execution" {
create_duration = "720s"
triggers = {
time = timestamp()
}
depends_on = [
module.regional_deid_pipeline
]
}
module "regional_reid_pipeline" {
source = "GoogleCloudPlatform/secured-data-warehouse/google//modules/dataflow-flex-job"
project_id = module.project_radlab_sdw_conf_data.project_id
name = "dataflow-flex-regional-dlp-reid-job-python-query"
container_spec_gcs_path = module.template_project.python_re_identify_template_gs_path
job_language = "PYTHON"
region = var.region
service_account_email = module.secured_data_warehouse.confidential_dataflow_controller_service_account_email
subnetwork_self_link = module.dwh_networking_conf.subnets_self_links[0]
kms_key_name = module.secured_data_warehouse.cmek_reidentification_crypto_key
temp_location = "gs://${module.secured_data_warehouse.confidential_data_dataflow_bucket_name}/tmp/"
staging_location = "gs://${module.secured_data_warehouse.confidential_data_dataflow_bucket_name}/staging/"
parameters = {
input_table = "${module.project_radlab_sdw_non_conf_data.project_id}:${local.non_confidential_dataset_id}.${local.non_confidential_table_id}"
deidentification_template_name = module.de_identification_template.template_full_path
window_interval_sec = 30
batch_size = 1000
dlp_location = var.region
dlp_project = module.project_radlab_sdw_data_govern.project_id
bq_schema = local.bq_schema_dl
output_table = local.bigquery_confidential_table
dlp_transform = "RE-IDENTIFY"
}
depends_on = [
time_sleep.wait_de_identify_job_execution,
google_bigquery_table.re_id
]
}