5-app-infra/4-data-governance/envs/production/ddl_datasets.tf (294 lines of code) (raw):
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
##########################################
# Create bucket to store DDL data
##########################################
resource "google_storage_bucket" "ddl_data_bucket" {
force_destroy = true
labels = local.data_labels
location = var.region
name = "${local.data_governance_project_id}-ddl-data"
project = local.data_governance_project_id
public_access_prevention = "enforced"
uniform_bucket_level_access = true
}
resource "google_storage_bucket_iam_member" "ddl_data_bucket" {
for_each = toset(local.ddl_data_access)
bucket = google_storage_bucket.ddl_data_bucket.name
role = "roles/storage.legacyBucketReader"
member = each.key
}
resource "google_storage_bucket_iam_member" "ddl_data_object" {
for_each = toset(local.ddl_data_access)
bucket = google_storage_bucket.ddl_data_bucket.name
role = "roles/storage.legacyObjectReader"
member = each.key
}
############################################
# Development Datasets
############################################
resource "google_storage_bucket_object" "ddl_data_development" {
for_each = merge(
flatten([
for domain, project in local.data_domain_non_conf_projects_dev : [
for dataset, tables in var.ddl_tables : [
for table in tables : [
for inspection in var.dlp_job_inspect_datasets : [
for inspection_table in inspection.inspecting_table_ids : {
"${domain}/${dataset}/${table}/${inspection.inspection_dataset}/${inspection_table}" = {
domain = domain,
project_id = project.project_id,
dataset = dataset,
table = table,
inspection_dataset = inspection.inspection_dataset
inspection_table = inspection_table
inspection_owner = inspection.owner_information
}
} if inspection.environment == "development"
]
]
]
]
])...
)
bucket = google_storage_bucket.ddl_data_bucket.name
name = "development/${each.value.domain}/${each.value.dataset}/${each.value.inspection_dataset}/${each.value.table}_${each.value.inspection_table}.json"
content = templatefile("${path.module}/../../static_data/ddl_tables/${each.value.domain}/${each.value.dataset}/${each.value.table}.json", {
PROJECT_ID_DATA = each.value.project_id
dataset_id = each.value.inspection_dataset
table_id = each.value.inspection_table
data_owner_email = each.value.inspection_owner["email"]
data_owner_name = each.value.inspection_owner["name"]
is_sensitive = each.value.inspection_owner["is_sensitive"]
sensitive_category = each.value.inspection_owner["sensitive_category"]
is_authoritative = each.value.inspection_owner["is_authoritative"]
})
}
module "bigquery_ddl_datasets_development" {
for_each = merge(flatten([
for domain, project in local.data_domain_non_conf_projects_dev : [
for dataset, tables in var.ddl_tables : [
for inspection in var.dlp_job_inspect_datasets : {
"${domain}/${dataset}" = {
domain = domain,
project_id = project.project_id,
dataset = dataset,
tables = tables
inspection_dataset = inspection.inspection_dataset
}
} if inspection.environment == "development"
]
]
])...)
source = "../../modules/bigquery"
dataset_id = "${each.value.dataset}_${replace(each.value.domain, "-", "_")}_dev"
dataset_labels = local.data_labels
dataset_name = "${each.value.dataset}_${replace(each.value.domain, "-", "_")}_dev"
delete_contents_on_destroy = true
encryption_key = local.bq_data_quality_kms_key
external_tables = [for table in each.value.tables : {
autodetect = true
source_format = "NEWLINE_DELIMITED_JSON"
source_uris = flatten([
for dataset in var.dlp_job_inspect_datasets : [
for inspection_table in dataset.inspecting_table_ids : [
"gs://${google_storage_bucket.ddl_data_bucket.name}/${google_storage_bucket_object.ddl_data_nonproduction["${each.key}/${table}/${dataset.inspection_dataset}/${inspection_table}"].name}"
]
] if dataset.domain_name == each.value.domain
])
table_id = table
}]
views = each.value.dataset == "entitlement_management" ? [
{
view_id = "_information_schema_view"
use_legacy_sql = false
query = templatefile("${path.module}/../../static_data/ddl_tables/${each.value.domain}/entitlement_management/view/information_schema_view.sql", {
region = var.region
})
labels = {
env = "cdmc"
}
}
] : []
location = var.region
project_id = local.data_governance_project_id
}
############################################
# Non Production Datasets
############################################
resource "google_storage_bucket_object" "ddl_data_nonproduction" {
for_each = merge(
flatten([
for domain, project in local.data_domain_non_conf_projects_nonp : [
for dataset, tables in var.ddl_tables : [
for table in tables : [
for inspection in var.dlp_job_inspect_datasets : [
for inspection_table in inspection.inspecting_table_ids : {
"${domain}/${dataset}/${table}/${inspection.inspection_dataset}/${inspection_table}" = {
domain = domain,
project_id = project.project_id,
dataset = dataset,
table = table,
inspection_dataset = inspection.inspection_dataset
inspection_table = inspection_table
inspection_owner = inspection.owner_information
}
} if inspection.environment == "nonproduction"
]
]
]
]
])...
)
bucket = google_storage_bucket.ddl_data_bucket.name
name = each.value.table == "asset_ia_details" ? "nonproduction/${each.value.domain}/${each.value.dataset}/${each.value.inspection_dataset}/${each.value.table}_${each.value.inspection_table}.json" : each.value.table == "data_asset" || each.value.table == "provider_agreement" || each.value.table == "use_purpose" ? "nonproduction/${each.value.domain}/${each.value.dataset}/${each.value.inspection_dataset}/${each.value.table}.json" : "nonproduction/${each.value.domain}/${each.value.dataset}/${each.value.table}.json"
content = templatefile("${path.module}/../../static_data/ddl_tables/${each.value.domain}/${each.value.dataset}/${each.value.table}.json", {
PROJECT_ID_DATA = each.value.project_id
dataset_id = each.value.inspection_dataset
table_id = each.value.inspection_table
data_owner_email = each.value.inspection_owner["email"]
data_owner_name = each.value.inspection_owner["name"]
is_sensitive = each.value.inspection_owner["is_sensitive"]
sensitive_category = each.value.inspection_owner["sensitive_category"]
is_authoritative = each.value.inspection_owner["is_authoritative"]
})
}
module "bigquery_ddl_datasets_nonproduction" {
for_each = merge(flatten([
for domain, project in local.data_domain_non_conf_projects_nonp : [
for dataset, tables in var.ddl_tables : [
for inspection in var.dlp_job_inspect_datasets : {
"${domain}/${dataset}" = {
domain = domain,
project_id = project.project_id,
dataset = dataset,
tables = tables
inspection_dataset = inspection.inspection_dataset
}
} if inspection.environment == "nonproduction"
]
]
])...)
source = "../../modules/bigquery"
dataset_id = "${each.value.dataset}_${replace(each.value.domain, "-", "_")}_nonp"
dataset_labels = local.data_labels
dataset_name = "${each.value.dataset}_${replace(each.value.domain, "-", "_")}_nonp"
delete_contents_on_destroy = true
encryption_key = local.bq_data_quality_kms_key
external_tables = [for table in each.value.tables : {
autodetect = true
source_format = "NEWLINE_DELIMITED_JSON"
source_uris = distinct(flatten([
for dataset in var.dlp_job_inspect_datasets : [
for inspection_table in dataset.inspecting_table_ids : [
"gs://${google_storage_bucket.ddl_data_bucket.name}/${google_storage_bucket_object.ddl_data_nonproduction["${each.key}/${table}/${dataset.inspection_dataset}/${inspection_table}"].name}"
]
] if dataset.domain_name == each.value.domain
]))
table_id = table
}]
views = each.value.dataset == "entitlement_management" ? [
{
view_id = "_information_schema_view"
use_legacy_sql = false
query = templatefile("${path.module}/../../static_data/ddl_tables/${each.value.domain}/entitlement_management/view/information_schema_view.sql", {
region = var.region
})
labels = {
env = "cdmc"
}
}
] : []
location = var.region
project_id = local.data_governance_project_id
}
############################################
# Production Datasets
############################################
resource "google_storage_bucket_object" "ddl_data_production" {
for_each = merge(
flatten([
for domain, project in local.data_domain_non_conf_projects_prod : [
for dataset, tables in var.ddl_tables : [
for table in tables : [
for inspection in var.dlp_job_inspect_datasets : [
for inspection_table in inspection.inspecting_table_ids : {
"${domain}/${dataset}/${table}/${inspection.inspection_dataset}/${inspection_table}" = {
domain = domain,
project_id = project.project_id,
dataset = dataset,
table = table,
inspection_dataset = inspection.inspection_dataset
inspection_table = inspection_table
inspection_owner = inspection.owner_information
}
} if inspection.environment == "production"
]
]
]
]
])...
)
bucket = google_storage_bucket.ddl_data_bucket.name
name = each.value.table == "asset_ia_details" ? "production/${each.value.domain}/${each.value.dataset}/${each.value.inspection_dataset}/${each.value.table}_${each.value.inspection_table}.json" : each.value.table == "data_asset" || each.value.table == "provider_agreement" || each.value.table == "use_purpose" ? "nonproduction/${each.value.domain}/${each.value.dataset}/${each.value.inspection_dataset}/${each.value.table}.json" : "nonproduction/${each.value.domain}/${each.value.dataset}/${each.value.table}.json"
content = templatefile("${path.module}/../../static_data/ddl_tables/${each.value.domain}/${each.value.dataset}/${each.value.table}.json", {
PROJECT_ID_DATA = each.value.project_id
dataset_id = each.value.inspection_dataset
table_id = each.value.inspection_table
data_owner_email = each.value.inspection_owner["email"]
data_owner_name = each.value.inspection_owner["name"]
is_sensitive = each.value.inspection_owner["is_sensitive"]
sensitive_category = each.value.inspection_owner["sensitive_category"]
is_authoritative = each.value.inspection_owner["is_authoritative"]
})
}
module "bigquery_ddl_datasets_production" {
for_each = merge(flatten([
for domain, project in local.data_domain_non_conf_projects_prod : [
for dataset, tables in var.ddl_tables : [
for inspection in var.dlp_job_inspect_datasets : {
"${domain}/${dataset}" = {
domain = domain,
project_id = project.project_id,
dataset = dataset,
tables = tables
inspection_dataset = inspection.inspection_dataset
}
} if inspection.environment == "production"
]
]
])...)
source = "../../modules/bigquery"
dataset_id = "${each.value.dataset}_${replace(each.value.domain, "-", "_")}_prod"
dataset_labels = local.data_labels
dataset_name = "${each.value.dataset}_${replace(each.value.domain, "-", "_")}_prod"
delete_contents_on_destroy = true
encryption_key = local.bq_data_quality_kms_key
external_tables = [for table in each.value.tables : {
autodetect = true
source_format = "NEWLINE_DELIMITED_JSON"
source_uris = flatten([
for dataset in var.dlp_job_inspect_datasets : [
for inspection_table in dataset.inspecting_table_ids : [
"gs://${google_storage_bucket.ddl_data_bucket.name}/${google_storage_bucket_object.ddl_data_nonproduction["${each.key}/${table}/${dataset.inspection_dataset}/${inspection_table}"].name}"
]
] if dataset.domain_name == each.value.domain
])
table_id = table
}]
views = each.value.dataset == "entitlement_management" ? [
{
view_id = "_information_schema_view"
use_legacy_sql = false
query = templatefile("${path.module}/../../static_data/ddl_tables/${each.value.domain}/entitlement_management/view/information_schema_view.sql", {
region = var.region
})
labels = {
env = "cdmc"
}
}
] : []
location = var.region
project_id = local.data_governance_project_id
}