data-analytics-demos/bigquery-data-governance/terraform-modules/resources/tf-resources.tf (810 lines of code) (raw):
####################################################################################
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
####################################################################################
####################################################################################
# Create the GCP resources
#
# Author: Adam Paternostro
####################################################################################
terraform {
required_providers {
google = {
source = "hashicorp/google-beta"
version = "5.35.0"
}
}
}
####################################################################################
# Variables
####################################################################################
variable "gcp_account_name" {}
variable "project_id" {}
variable "local_curl_impersonation" {}
variable "dataplex_region" {}
variable "multi_region" {}
variable "bigquery_non_multi_region" {}
variable "vertex_ai_region" {}
variable "data_catalog_region" {}
variable "appengine_region" {}
variable "colab_enterprise_region" {}
variable "dataflow_region" {}
variable "dataproc_region" {}
variable "kafka_region" {}
variable "random_extension" {}
variable "project_number" {}
variable "deployment_service_account_name" {}
variable "terraform_service_account" {}
variable "bigquery_governed_data_raw_dataset" {}
variable "bigquery_governed_data_enriched_dataset" {}
variable "bigquery_governed_data_curated_dataset" {}
variable "bigquery_analytics_hub_publisher_dataset" {}
variable "bigquery_analytics_hub_subscriber_dataset" {}
variable "governed_data_raw_bucket" {}
variable "governed_data_enriched_bucket" {}
variable "governed_data_curated_bucket" {}
variable "governed_data_code_bucket" {}
variable "governed_data_scan_bucket" {}
variable "dataflow_staging_bucket" {}
data "google_client_config" "current" {
}
####################################################################################
# Bucket for all data (BigQuery, Spark, etc...)
# This is your "Data Lake" bucket
# If you are using Dataplex you should create a bucket per data lake zone (bronze, silver, gold, etc.)
####################################################################################
resource "google_storage_bucket" "google_storage_bucket_governed_data_raw_bucket" {
project = var.project_id
name = var.governed_data_raw_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_governed_data_enriched_bucket" {
project = var.project_id
name = var.governed_data_enriched_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_governed_data_curated_bucket" {
project = var.project_id
name = var.governed_data_curated_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_governed_data_code_bucket" {
project = var.project_id
name = var.governed_data_code_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_governed_data_scan_bucket" {
project = var.project_id
name = var.governed_data_scan_bucket
location = var.dataplex_region
force_destroy = true
uniform_bucket_level_access = true
}
resource "google_storage_bucket" "google_storage_bucket_dataflow_staging" {
project = var.project_id
name = var.dataflow_staging_bucket
location = var.multi_region
force_destroy = true
uniform_bucket_level_access = true
soft_delete_policy {
retention_duration_seconds = 0
}
}
####################################################################################
# Default Network
# The project was not created with the default network.
# This creates just the network/subnets we need.
####################################################################################
resource "google_compute_network" "default_network" {
project = var.project_id
name = "vpc-main"
description = "Default network"
auto_create_subnetworks = false
mtu = 1460
}
resource "google_compute_subnetwork" "colab_enterprise_subnet" {
project = var.project_id
name = "colab-enterprise-subnet"
ip_cidr_range = "10.1.0.0/16"
region = var.colab_enterprise_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network
]
}
resource "google_compute_subnetwork" "dataflow_subnet" {
project = var.project_id
name = "dataflow-subnet"
ip_cidr_range = "10.2.0.0/16"
region = var.dataflow_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network,
google_compute_subnetwork.colab_enterprise_subnet
]
}
resource "google_compute_subnetwork" "kafka_subnet" {
project = var.project_id
name = "kafka-subnet"
ip_cidr_range = "10.3.0.0/16"
region = var.kafka_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network,
google_compute_subnetwork.colab_enterprise_subnet
]
}
resource "google_compute_subnetwork" "dataproc_subnet" {
project = var.project_id
name = "dataproc-subnet"
ip_cidr_range = "10.4.0.0/16"
region = var.dataproc_region
network = google_compute_network.default_network.id
private_ip_google_access = true
depends_on = [
google_compute_network.default_network,
google_compute_subnetwork.colab_enterprise_subnet,
google_compute_subnetwork.kafka_subnet
]
}
# Firewall for NAT Router
resource "google_compute_firewall" "subnet_firewall_rule" {
project = var.project_id
name = "subnet-nat-firewall"
network = google_compute_network.default_network.id
allow {
protocol = "icmp"
}
allow {
protocol = "tcp"
}
allow {
protocol = "udp"
}
source_ranges = ["10.1.0.0/16","10.2.0.0/16","10.3.0.0/16","10.4.0.0/16"]
depends_on = [
google_compute_subnetwork.colab_enterprise_subnet,
google_compute_subnetwork.dataflow_subnet,
google_compute_subnetwork.kafka_subnet
]
}
# We want a NAT for every region
locals {
distinctRegions = distinct([var.colab_enterprise_region, var.dataflow_region, var.kafka_region])
}
resource "google_compute_router" "nat-router-distinct-regions" {
project = var.project_id
count = length(local.distinctRegions)
name = "nat-router-${local.distinctRegions[count.index]}"
region = local.distinctRegions[count.index]
network = google_compute_network.default_network.id
depends_on = [
google_compute_firewall.subnet_firewall_rule
]
}
resource "google_compute_router_nat" "nat-config-distinct-regions" {
project = var.project_id
count = length(local.distinctRegions)
name = "nat-config-${local.distinctRegions[count.index]}"
router = google_compute_router.nat-router-distinct-regions[count.index].name
region = local.distinctRegions[count.index]
nat_ip_allocate_option = "AUTO_ONLY"
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
depends_on = [
google_compute_router.nat-router-distinct-regions
]
}
####################################################################################
# BigQuery Datasets
####################################################################################
resource "google_bigquery_dataset" "google_bigquery_dataset_governed_data_raw" {
project = var.project_id
dataset_id = var.bigquery_governed_data_raw_dataset
friendly_name = var.bigquery_governed_data_raw_dataset
description = "This dataset contains the raw data for the demo."
location = var.multi_region
}
resource "google_bigquery_dataset" "google_bigquery_dataset_governed_data_enriched" {
project = var.project_id
dataset_id = var.bigquery_governed_data_enriched_dataset
friendly_name = var.bigquery_governed_data_enriched_dataset
description = "This dataset contains the enriched data for the demo."
location = var.multi_region
}
resource "google_bigquery_dataset" "google_bigquery_dataset_governed_data_curated" {
project = var.project_id
dataset_id = var.bigquery_governed_data_curated_dataset
friendly_name = var.bigquery_governed_data_curated_dataset
description = "This dataset contains the curated data for the demo."
location = var.multi_region
}
resource "google_bigquery_dataset" "google_bigquery_dataset_analytics_hub_publisher" {
project = var.project_id
dataset_id = var.bigquery_analytics_hub_publisher_dataset
friendly_name = var.bigquery_analytics_hub_publisher_dataset
description = "This dataset contains the analytics hub publisher data data for the demo."
location = var.multi_region
}
####################################################################################
# IAM for cloud build
####################################################################################
# Needed per https://cloud.google.com/build/docs/cloud-build-service-account-updates
resource "google_project_iam_member" "cloudfunction_builder" {
project = var.project_id
role = "roles/cloudbuild.builds.builder"
member = "serviceAccount:${var.project_number}-compute@developer.gserviceaccount.com"
}
# Needed per https://cloud.google.com/build/docs/cloud-build-service-account-updates
# Allow cloud function service account to read storage [V2 Function]
resource "google_project_iam_member" "cloudfunction_objectViewer" {
project = var.project_id
role = "roles/storage.objectViewer"
member = "serviceAccount:${var.project_number}-compute@developer.gserviceaccount.com"
depends_on = [
google_project_iam_member.cloudfunction_builder
]
}
####################################################################################
# Dataplex / Data Lineage
####################################################################################
resource "google_project_iam_member" "gcp_roles_datalineage_admin" {
project = var.project_id
role = "roles/datalineage.admin"
member = "user:${var.gcp_account_name}"
}
####################################################################################
# BigQuery - Connections (BigLake, Functions, etc)
####################################################################################
# Vertex AI connection
resource "google_bigquery_connection" "vertex_ai_connection" {
project = var.project_id
connection_id = "vertex-ai"
location = var.multi_region
friendly_name = "vertex-ai"
description = "vertex-ai"
cloud_resource {}
}
# Allow Vertex AI connection to Vertex User
resource "google_project_iam_member" "vertex_ai_connection_vertex_user_role" {
project = var.project_id
role = "roles/aiplatform.user"
member = "serviceAccount:${google_bigquery_connection.vertex_ai_connection.cloud_resource[0].service_account_id}"
depends_on = [
google_bigquery_connection.vertex_ai_connection
]
}
# BigLake connection
resource "google_bigquery_connection" "biglake_connection" {
project = var.project_id
connection_id = "biglake-connection"
location = var.multi_region
friendly_name = "biglake-connection"
description = "biglake-connection"
cloud_resource {}
}
resource "time_sleep" "biglake_connection_time_delay" {
depends_on = [google_bigquery_connection.biglake_connection]
create_duration = "30s"
}
# Allow BigLake to read storage (at project level, you can do each bucket individually)
resource "google_project_iam_member" "bq_connection_iam_object_viewer" {
project = var.project_id
role = "roles/storage.objectViewer"
member = "serviceAccount:${google_bigquery_connection.biglake_connection.cloud_resource[0].service_account_id}"
depends_on = [
time_sleep.biglake_connection_time_delay
]
}
# BigLake connection (Dataplex Region: us-central1)
resource "google_bigquery_connection" "biglake_connection_dataplex_region" {
project = var.project_id
connection_id = "biglake-connection-dataplex"
location = var.dataplex_region
friendly_name = "biglake-connection-dataplex"
description = "biglake-connection-dataplex"
cloud_resource {}
}
resource "time_sleep" "biglake_connection_dataplex_region_time_delay" {
depends_on = [google_bigquery_connection.biglake_connection_dataplex_region]
create_duration = "30s"
}
# Allow BigLake to read storage (at project level, you can do each bucket individually)
resource "google_project_iam_member" "bq_connection_dataplex_region_iam_object_viewer" {
project = var.project_id
role = "roles/storage.objectViewer"
member = "serviceAccount:${google_bigquery_connection.biglake_connection_dataplex_region.cloud_resource[0].service_account_id}"
depends_on = [
time_sleep.biglake_connection_dataplex_region_time_delay
]
}
####################################################################################
# Colab Enterprise
####################################################################################
# https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.notebookRuntimeTemplates
# NOTE: If you want a "when = destroy" example TF please see:
# https://github.com/GoogleCloudPlatform/data-analytics-golden-demo/blob/main/cloud-composer/data/terraform/dataplex/terraform.tf#L147
resource "null_resource" "colab_runtime_template" {
provisioner "local-exec" {
when = create
command = <<EOF
curl -X POST \
https://${var.colab_enterprise_region}-aiplatform.googleapis.com/ui/projects/${var.project_id}/locations/${var.colab_enterprise_region}/notebookRuntimeTemplates?notebookRuntimeTemplateId=colab-enterprise-template \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Content-Type: application/json" \
--data '{
displayName: "colab-enterprise-template",
description: "colab-enterprise-template",
isDefault: true,
machineSpec: {
machineType: "e2-highmem-4"
},
networkSpec: {
enableInternetAccess: false,
network: "projects/${var.project_id}/global/networks/vpc-main",
subnetwork: "projects/${var.project_id}/regions/${var.colab_enterprise_region}/subnetworks/${google_compute_subnetwork.colab_enterprise_subnet.name}"
},
shieldedVmConfig: {
enableSecureBoot: true
}
}'
EOF
}
depends_on = [
google_compute_subnetwork.colab_enterprise_subnet
]
}
# https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.notebookRuntimes
resource "null_resource" "colab_runtime" {
provisioner "local-exec" {
when = create
command = <<EOF
curl -X POST \
https://${var.colab_enterprise_region}-aiplatform.googleapis.com/ui/projects/${var.project_id}/locations/${var.colab_enterprise_region}/notebookRuntimes:assign \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Content-Type: application/json" \
--data '{
notebookRuntimeTemplate: "projects/${var.project_number}/locations/${var.colab_enterprise_region}/notebookRuntimeTemplates/colab-enterprise-template",
notebookRuntime: {
displayName: "colab-enterprise-runtime",
description: "colab-enterprise-runtime",
runtimeUser: "${var.gcp_account_name}"
}
}'
EOF
}
depends_on = [
null_resource.colab_runtime_template
]
}
####################################################################################
# New Service Account - For Continuous Queries
####################################################################################
resource "google_service_account" "kafka_continuous_query_service_account" {
project = var.project_id
account_id = "kafka-continuous-query"
display_name = "kafka-continuous-query"
}
# Needs access to BigQuery
resource "google_project_iam_member" "kafka_continuous_query_service_account_bigquery_admin" {
project = var.project_id
role = "roles/bigquery.admin"
member = "serviceAccount:${google_service_account.kafka_continuous_query_service_account.email}"
depends_on = [
google_service_account.kafka_continuous_query_service_account
]
}
# Needs access to Pub/Sub
resource "google_project_iam_member" "kafka_continuous_query_service_account_pubsub_admin" {
project = var.project_id
role = "roles/pubsub.admin"
member = "serviceAccount:${google_service_account.kafka_continuous_query_service_account.email}"
depends_on = [
google_project_iam_member.kafka_continuous_query_service_account_bigquery_admin
]
}
####################################################################################
# Pub/Sub (Topic and Subscription)
####################################################################################
resource "google_pubsub_topic" "google_pubsub_topic_governed_data_topic" {
project = var.project_id
name = "pubsub-governed-data-topic"
message_retention_duration = "86400s"
}
resource "google_pubsub_subscription" "google_pubsub_subscription_governed_data_subscription" {
project = var.project_id
name = "pubsub-governed-data-subscription"
topic = google_pubsub_topic.google_pubsub_topic_governed_data_topic.id
message_retention_duration = "86400s"
retain_acked_messages = false
expiration_policy {
ttl = "86400s"
}
retry_policy {
minimum_backoff = "10s"
}
enable_message_ordering = false
depends_on = [
google_pubsub_topic.google_pubsub_topic_governed_data_topic
]
}
####################################################################################
# DataFlow Service Account
####################################################################################
# Service account for dataflow cluster
resource "google_service_account" "dataflow_service_account" {
project = var.project_id
account_id = "dataflow-service-account"
display_name = "Service Account for Dataflow Environment"
}
# Grant editor (too high) to service account
resource "google_project_iam_member" "dataflow_service_account_editor_role" {
project = var.project_id
role = "roles/editor"
member = "serviceAccount:${google_service_account.dataflow_service_account.email}"
depends_on = [
google_service_account.dataflow_service_account
]
}
####################################################################################
# Dataproc Service Account
####################################################################################
# Service account for dataproc cluster
resource "google_service_account" "dataproc_service_account" {
project = var.project_id
account_id = "dataproc-service-account"
display_name = "Service Account for Dataproc Environment"
}
# Grant editor (too high) to service account
resource "google_project_iam_member" "dataproc_service_account_editor_role" {
project = var.project_id
role = "roles/editor"
member = "serviceAccount:${google_service_account.dataproc_service_account.email}"
depends_on = [
google_service_account.dataproc_service_account
]
}
####################################################################################
# Dataplex: https://cloud.google.com/dataplex/docs/terraform
####################################################################################
/*
resource "google_dataplex_entry_group" "entry_group_customer_02" {
project = var.project_id
entry_group_id = "entry-group-customer-02"
location = "global"
labels = { "tag": "test-tf" }
display_name = "Customer 02 (entry group)"
description = "Customer 02 entry group used for customer data"
}
resource "google_dataplex_entry_group" "entry_group_customer_01" {
project = var.project_id
entry_group_id = "entry-group-customer-01"
location = "us"
labels = { "tag": "test-tf" }
display_name = "Customer 01 (entry group)"
description = "Customer 01 entry group used for customer data"
}
resource "google_dataplex_aspect_type" "aspect_type_pii_01" {
project = var.project_id
aspect_type_id = "aspect-type-pii-01"
location = "us"
labels = { "tag": "test-tf" }
display_name = "PII 01 (aspect type)"
description = "PII data aspect type"
metadata_template = <<EOF
{
"name": "tf-test-template",
"type": "record",
"recordFields": [
{
"name": "type",
"type": "enum",
"annotations": {
"displayName": "Type",
"description": "Specifies the type of view represented by the entry."
},
"index": 1,
"constraints": {
"required": true
},
"enumValues": [
{
"name": "VIEW",
"index": 1
}
]
}
]
}
EOF
}
resource "google_dataplex_entry_type" "entry_type_customer_01" {
project = var.project_id
entry_type_id = "entry-type-customer-01"
location = "us"
labels = { "tag": "test-tf" }
display_name = "Customer 01 (entry type)"
description = "Customer 01 entry type"
type_aliases = ["TABLE", "DATABASE"]
platform = "GCS"
system = "BigQuery"
required_aspects {
type = google_dataplex_aspect_type.aspect_type_pii_01.name
}
depends_on = [google_dataplex_aspect_type.aspect_type_pii_01]
}
*/
# entry_group (no children?)
# aspect_type (no children, not avilable)
# entry_type (pii aspect type child)
# entry (using curl below)
/*
# List the Entry Types
curl \
'https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryTypes' \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header 'Accept: application/json' \
--compressed
# List the Entry Groups
curl \
'https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups' \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header 'Accept: application/json' \
--compressed
# List the Aspect Types
curl \
'https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/aspectTypes' \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header 'Accept: application/json' \
--compressed
# Create a 'custom' Entry (Custom-Entry) and place in Entry Group (Entry-Group-Customer) and
# assign Entry Type (entry-type-customer-02) which uses Aspect (aspect-type-pii-02)
curl -X POST \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/entry-group-customer-01/entries?entry_id=navjot \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'entry_type': 'projects/governed-data-0t2p4zvntp/locations/us/entryTypes/entry-type-customer-01',
'aspects': {
'governed-data-0t2p4zvntp.us.aspect-type-pii-01': {
'data':{'type': 'VIEW'}
}
}
}"
# Set a name?
curl -X POST \
https://dataplex.googleapis.com/v1/projects/governed-data-wbi0cbgkhe/locations/us/entryGroups/entry-group-customer-01/entries?entry_id=adamp \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'name' : 'Paternostro',
'entry_type': 'projects/governed-data-wbi0cbgkhe/locations/us/entryTypes/entry-type-customer-01',
'aspects': {
'governed-data-wbi0cbgkhe.us.aspect-type-pii-01': {
'data':{'type': 'VIEW'}
}
}
}"
curl -X POST \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/entry-group-customer-01/entries?entry_id=navjot01 \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'display_name' : 'Navjot Singh 01',
'description' : 'My description',
'entry_type': 'projects/governed-data-0t2p4zvntp/locations/us/entryTypes/entry-type-customer-01',
'aspects': {
'governed-data-0t2p4zvntp.us.aspect-type-pii-01': {
'data':{'type': 'VIEW'}
}
}
}"
# View the entry in the entry group
curl \
'https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/entry-group-customer/entries' \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header 'Accept: application/json' \
--compressed
# Delete a custom entry (you must create test_entry first)
curl -X DELETE \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/entry-group-customer/entries/test_entry \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--compressed
# Add Aspect Type to an existing BigQuery table
# NOTE: You can put this in the data JSON: 'entry_type': 'projects/governed-data-0t2p4zvntp/locations/us/entryTypes/entry-type-customer-01'
# This will make this aspect Aspect Required and the system Aspects as optional (this might be a bug)
curl -X PATCH \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/@bigquery/entries/bigquery.googleapis.com/projects/governed-data-0t2p4zvntp/datasets/governed_data/tables/campaign?update_mask=aspects \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'entry_type': 'projects/governed-data-0t2p4zvntp/locations/us/entryTypes/entry-type-customer-01',
'aspects': {
'governed-data-0t2p4zvntp.us.aspect-type-pii-01': {
'data':{'type': 'VIEW'}
}
}
}"
# Update metadata (built in fields) on a BigQuery table
curl -X PATCH \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/@bigquery/entries/bigquery.googleapis.com/projects/governed-data-0t2p4zvntp/datasets/governed_data/tables/campaign?update_mask=aspects \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'aspects': {
'dataplex-types.global.overview': {
'data': {
'content': 'Hi This is a test'
}
},
'dataplex-types.global.contacts': {
'data': {
'identities': [
{'role':'Tech Lead','name':'Adam Paternostro'},
{'role':'SME','name':'Sam Iyer'}
]
}
}
}
}
"
# Add Aspect Type to an existing BigQuery table
curl -X PATCH \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/@bigquery/entries/bigquery.googleapis.com/projects/governed-data-0t2p4zvntp/datasets/governed_data/tables/campaign?update_mask=aspects \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'aspects': {
'governed-data-0t2p4zvntp.us.aspect-type-pii-01': {
'data':{'type': 'VIEW'}
}
}
}"
# Add Aspect Type to an existing BigQuery column
# To add a global aspect use this: 'dataplex-types.$GLOBAL_LOCATION.generic@Schema.column':
curl -X PATCH \
https://dataplex.googleapis.com/v1/projects/governed-data-0t2p4zvntp/locations/us/entryGroups/@bigquery/entries/bigquery.googleapis.com/projects/governed-data-0t2p4zvntp/datasets/governed_data/tables/campaign?update_mask=aspects \
--header "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
--header "Content-Type: application/json" \
--data \
"{
'aspects': {
'governed-data-0t2p4zvntp.us.aspect-type-pii-01@Schema.menu_id': {
'data':{'type': 'VIEW'}
}
}
}"
*/
/*
resource "null_resource" "dataplex_custom_entry_01" {
triggers = {
project_id = var.project_id
dataplex_region = "us"
entry_group_id = google_dataplex_entry_group.entry_group_customer_01.entry_group_id
entry_type_id = google_dataplex_entry_type.entry_type_customer_01.entry_type_id
aspect_id = google_dataplex_aspect_type.aspect_type_pii_01.aspect_type_id
entry_id = "custom-entry-01"
local_curl_impersonation = var.local_curl_impersonation
}
provisioner "local-exec" {
when = create
command = <<EOF
curl -X POST \
https://dataplex.googleapis.com/v1/projects/${self.triggers.project_id}/locations/${self.triggers.dataplex_region}/entryGroups/${self.triggers.entry_group_id}/entries?entry_id=${self.triggers.entry_id} \
--header "Authorization: Bearer $(gcloud auth print-access-token ${self.triggers.local_curl_impersonation})" \
--header "Content-Type: application/json" \
--compressed \
--data \
"{
'entry_type': 'projects/${self.triggers.project_id}/locations/${self.triggers.dataplex_region}/entryTypes/${self.triggers.entry_type_id}',
'aspects': {
'${self.triggers.project_id}.${self.triggers.dataplex_region}.${self.triggers.aspect_id}': {
'data':{'type': 'VIEW'}
}
}
}"
EOF
}
# Bash variable do not use currly brackets to avoid double dollars signs for Terraform
provisioner "local-exec" {
when = destroy
command = <<EOF
curl -X DELETE \
https://dataplex.googleapis.com/v1/projects/${self.triggers.project_id}/locations/${self.triggers.dataplex_region}/entryGroups/${self.triggers.entry_group_id}/entries/${self.triggers.entry_id} \
--header "Authorization: Bearer $(gcloud auth print-access-token ${self.triggers.local_curl_impersonation})" \
--header "Content-Type: application/json" \
--compressed
EOF
}
depends_on = [
google_dataplex_entry_group.entry_group_customer_01,
google_dataplex_aspect_type.aspect_type_pii_01,
google_dataplex_entry_type.entry_type_customer_01
]
}
*/
####################################################################################
# Analytics Hub
####################################################################################
resource "google_bigquery_analytics_hub_data_exchange" "analytics_hub_data_exchange" {
project = var.project_id
location = var.multi_region
data_exchange_id = "governed_data_data_exchange"
display_name = "BigQuery Data Governance Analytics Hub Data Exchange"
description = "BigQuery Data Governance Analytics Hub Data Exchange"
primary_contact = var.gcp_account_name
}
resource "google_bigquery_analytics_hub_listing" "analytics_hub_listing" {
project = var.project_id
location = var.multi_region
data_exchange_id = google_bigquery_analytics_hub_data_exchange.analytics_hub_data_exchange.data_exchange_id
listing_id = "governed_data_data_listing"
display_name = "BigQuery Governance Data Listing"
description = "BigQuery Governance Data Listing"
primary_contact = var.gcp_account_name
bigquery_dataset {
dataset = google_bigquery_dataset.google_bigquery_dataset_analytics_hub_publisher.id
}
restricted_export_config {
enabled = true
restrict_query_result = true
}
}
####################################################################################
# Bring in Analytics Hub reference
####################################################################################
# https://cloud.google.com/bigquery/docs/reference/analytics-hub/rest/v1/projects.locations.dataExchanges.listings/subscribe
resource "null_resource" "analyticshub_daily_weather_data" {
provisioner "local-exec" {
when = create
command = <<EOF
curl --request POST \
"https://analyticshub.googleapis.com/v1/projects/${var.project_number}/locations/${var.multi_region}/dataExchanges/governed_data_data_exchange/listings/governed_data_data_listing:subscribe" \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data '{"destinationDataset":{"datasetReference":{"datasetId":"${var.bigquery_analytics_hub_subscriber_dataset}","projectId":"${var.project_id}"},"friendlyName":"${var.bigquery_analytics_hub_subscriber_dataset}","location":"${var.multi_region}","description":"${var.bigquery_analytics_hub_subscriber_dataset}"}}' \
--compressed
EOF
}
depends_on = [
google_bigquery_analytics_hub_listing.analytics_hub_listing
]
}
####################################################################################
# Copy raw files
####################################################################################
resource "null_resource" "copy_raw_files_customer" {
provisioner "local-exec" {
when = create
command = <<EOF
curl --request POST \
"https://storage.googleapis.com/storage/v1/b/data-analytics-golden-demo/o/data-bytes%2Fdata-governance%2Fv1%2Fcustomer%2Fcustomer.avro/copyTo/b/${var.governed_data_raw_bucket}/o/customer%2Fcustomer.avro" \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data '' \
--compressed
EOF
}
depends_on = [
google_storage_bucket.google_storage_bucket_governed_data_raw_bucket
]
}
resource "null_resource" "copy_raw_files_customer_transaction" {
provisioner "local-exec" {
when = create
command = <<EOF
curl --request POST \
"https://storage.googleapis.com/storage/v1/b/data-analytics-golden-demo/o/data-bytes%2Fdata-governance%2Fv1%2Fcustomer_transaction%2Fcustomer_transaction.parquet/copyTo/b/${var.governed_data_raw_bucket}/o/customer_transaction%2Fcustomer_transaction.parquet" \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data '' \
--compressed
EOF
}
depends_on = [
google_storage_bucket.google_storage_bucket_governed_data_raw_bucket
]
}
resource "null_resource" "copy_raw_files_product" {
provisioner "local-exec" {
when = create
command = <<EOF
curl --request POST \
"https://storage.googleapis.com/storage/v1/b/data-analytics-golden-demo/o/data-bytes%2Fdata-governance%2Fv1%2Fproduct%2Fproduct.json/copyTo/b/${var.governed_data_raw_bucket}/o/product%2Fproduct.json" \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data '' \
--compressed
EOF
}
depends_on = [
google_storage_bucket.google_storage_bucket_governed_data_raw_bucket
]
}
resource "null_resource" "copy_raw_files_product_category" {
provisioner "local-exec" {
when = create
command = <<EOF
curl --request POST \
"https://storage.googleapis.com/storage/v1/b/data-analytics-golden-demo/o/data-bytes%2Fdata-governance%2Fv1%2Fproduct_category%2Fproduct_category.csv/copyTo/b/${var.governed_data_raw_bucket}/o/product_category%2Fproduct_category.csv" \
--header "Authorization: Bearer ${data.google_client_config.current.access_token}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--data '' \
--compressed
EOF
}
depends_on = [
google_storage_bucket.google_storage_bucket_governed_data_raw_bucket
]
}
####################################################################################
# Spark
####################################################################################
# Spark connection
resource "google_bigquery_connection" "spark_connection" {
project = var.project_id
connection_id = "spark-connection"
location = var.multi_region
friendly_name = "spark-connection"
description = "spark-connection"
spark {}
}
resource "time_sleep" "spark_connection_time_delay" {
depends_on = [google_bigquery_connection.spark_connection]
create_duration = "30s"
}
resource "google_project_iam_member" "spark_connection_bigquery_user" {
project = var.project_id
role = "roles/bigquery.user"
member = "serviceAccount:${google_bigquery_connection.spark_connection.spark[0].service_account_id}"
depends_on = [
time_sleep.spark_connection_time_delay,
]
}
resource "google_bigquery_dataset_access" "spark_connection_raw_dataset" {
project = var.project_id
dataset_id = google_bigquery_dataset.google_bigquery_dataset_governed_data_raw.dataset_id
role = "roles/bigquery.dataOwner"
user_by_email = google_bigquery_connection.spark_connection.spark[0].service_account_id
depends_on = [
time_sleep.spark_connection_time_delay,
google_bigquery_dataset.google_bigquery_dataset_governed_data_raw
]
}
resource "google_bigquery_dataset_access" "spark_connection_enriched_dataset" {
project = var.project_id
dataset_id = google_bigquery_dataset.google_bigquery_dataset_governed_data_enriched.dataset_id
role = "roles/bigquery.dataOwner"
user_by_email = google_bigquery_connection.spark_connection.spark[0].service_account_id
depends_on = [
time_sleep.spark_connection_time_delay,
google_bigquery_dataset.google_bigquery_dataset_governed_data_enriched
]
}
# Set bucket Object Admin on Raw
resource "google_storage_bucket_iam_member" "spark_connection_object_admin_code_bucket" {
bucket = google_storage_bucket.google_storage_bucket_governed_data_code_bucket.name
role = "roles/storage.objectAdmin"
member = "serviceAccount:${google_bigquery_connection.spark_connection.spark[0].service_account_id}"
depends_on = [
time_sleep.spark_connection_time_delay,
google_storage_bucket.google_storage_bucket_governed_data_code_bucket
]
}
####################################################################################
# Outputs
####################################################################################
output "dataflow_service_account" {
value = google_service_account.dataflow_service_account.email
}