modules/data_warehouse/bigquery.tf (294 lines of code) (raw):

/** * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ # Set up BigQuery resources ## Create the BigQuery dataset resource "google_bigquery_dataset" "ds_edw" { project = module.project-services.project_id dataset_id = "thelook" friendly_name = "My EDW Dataset" description = "My EDW Dataset with tables" location = var.region labels = var.labels delete_contents_on_destroy = var.force_destroy depends_on = [time_sleep.wait_after_apis] } ## Create a BigQuery connection for Cloud Storage to create BigLake tables resource "google_bigquery_connection" "ds_connection" { project = module.project-services.project_id connection_id = "ds_connection" location = var.region friendly_name = "Storage Bucket Connection" cloud_resource {} depends_on = [time_sleep.wait_after_apis] } ## Grant IAM access to the BigQuery Connection account for Cloud Storage resource "google_project_iam_member" "bq_connection_iam_object_viewer" { project = module.project-services.project_id role = "roles/storage.objectViewer" member = "serviceAccount:${google_bigquery_connection.ds_connection.cloud_resource[0].service_account_id}" depends_on = [google_project_iam_member.workflow_manage_sa_roles, google_bigquery_connection.ds_connection] } ## Create a BigQuery connection for Vertex AI to support GenerativeAI use cases resource "google_bigquery_connection" "vertex_ai_connection" { project = module.project-services.project_id connection_id = "genai_connection" location = var.region friendly_name = "BigQuery ML Connection" cloud_resource {} depends_on = [time_sleep.wait_after_apis] } ## Define IAM roles granted to the BigQuery Connection service account locals { bq_vertex_ai_roles = [ "roles/aiplatform.user", "roles/bigquery.connectionUser", "roles/serviceusage.serviceUsageConsumer", ] } ## Grant IAM access to the BigQuery Connection account for Vertex AI resource "google_project_iam_member" "bq_connection_iam_vertex_ai" { count = length(local.bq_vertex_ai_roles) role = local.bq_vertex_ai_roles[count.index] project = module.project-services.project_id member = "serviceAccount:${google_bigquery_connection.vertex_ai_connection.cloud_resource[0].service_account_id}" depends_on = [google_bigquery_connection.vertex_ai_connection, google_project_iam_member.bq_connection_iam_object_viewer] } # Create data tables in BigQuery ## Create a Biglake table for events with metadata caching resource "google_bigquery_table" "tbl_edw_events" { dataset_id = google_bigquery_dataset.ds_edw.dataset_id table_id = "events" project = module.project-services.project_id deletion_protection = var.deletion_protection schema = file("${path.module}/src/schema/events_schema.json") external_data_configuration { autodetect = true connection_id = google_bigquery_connection.ds_connection.name source_format = "PARQUET" source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/events.parquet"] } labels = var.labels } ## Create a Biglake table for inventory_items resource "google_bigquery_table" "tbl_edw_inventory_items" { dataset_id = google_bigquery_dataset.ds_edw.dataset_id table_id = "inventory_items" project = module.project-services.project_id deletion_protection = var.deletion_protection schema = file("${path.module}/src/schema/inventory_items_schema.json") external_data_configuration { autodetect = true connection_id = google_bigquery_connection.ds_connection.name source_format = "PARQUET" source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/inventory_items.parquet"] } labels = var.labels } ## Create a Biglake table with metadata caching for order_items resource "google_bigquery_table" "tbl_edw_order_items" { dataset_id = google_bigquery_dataset.ds_edw.dataset_id table_id = "order_items" project = module.project-services.project_id deletion_protection = var.deletion_protection schema = file("${path.module}/src/schema/order_items_schema.json") external_data_configuration { autodetect = true connection_id = google_bigquery_connection.ds_connection.name source_format = "PARQUET" source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/order_items.parquet"] } labels = var.labels } ## Create a Biglake table for orders resource "google_bigquery_table" "tbl_edw_orders" { dataset_id = google_bigquery_dataset.ds_edw.dataset_id table_id = "orders" project = module.project-services.project_id deletion_protection = var.deletion_protection schema = file("${path.module}/src/schema/orders_schema.json") external_data_configuration { autodetect = true connection_id = google_bigquery_connection.ds_connection.name source_format = "PARQUET" source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/orders.parquet"] } labels = var.labels } ## Create a Biglake table for products resource "google_bigquery_table" "tbl_edw_products" { dataset_id = google_bigquery_dataset.ds_edw.dataset_id table_id = "products" project = module.project-services.project_id deletion_protection = var.deletion_protection schema = file("${path.module}/src/schema/products_schema.json") external_data_configuration { autodetect = true connection_id = google_bigquery_connection.ds_connection.name source_format = "PARQUET" source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/products.parquet"] } labels = var.labels } ## Create a Biglake table for products resource "google_bigquery_table" "tbl_edw_users" { dataset_id = google_bigquery_dataset.ds_edw.dataset_id table_id = "users" project = module.project-services.project_id deletion_protection = var.deletion_protection schema = file("${path.module}/src/schema/users_schema.json") external_data_configuration { autodetect = true connection_id = google_bigquery_connection.ds_connection.name source_format = "PARQUET" source_uris = ["gs://${google_storage_bucket.raw_bucket.name}/thelook-ecommerce/users.parquet"] } labels = var.labels } # Load Queries for Stored Procedure Execution ## Load Distribution Center Lookup Data Tables resource "google_bigquery_routine" "sp_provision_lookup_tables" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_provision_lookup_tables" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_provision_lookup_tables.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id } ) } ## Add Looker Studio Data Report Procedure resource "google_bigquery_routine" "sproc_sp_demo_lookerstudio_report" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_lookerstudio_report" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_lookerstudio_report.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id } ) depends_on = [ google_bigquery_table.tbl_edw_inventory_items, google_bigquery_table.tbl_edw_order_items, google_bigquery_routine.sp_provision_lookup_tables, ] } ## Add Sample Queries resource "google_bigquery_routine" "sp_sample_queries" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_sample_queries" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_sample_queries.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id } ) depends_on = [ google_bigquery_table.tbl_edw_inventory_items, google_bigquery_table.tbl_edw_order_items, ] } ## Add Bigquery ML Model for clustering resource "google_bigquery_routine" "sp_bigqueryml_model" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_bigqueryml_model" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_bigqueryml_model.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id } ) depends_on = [ google_bigquery_table.tbl_edw_order_items ] } ## Create Bigquery ML Model for using text generation resource "google_bigquery_routine" "sp_bigqueryml_generate_create" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_bigqueryml_generate_create" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_bigqueryml_generate_create.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id, connection_id = google_bigquery_connection.vertex_ai_connection.id, model_name = var.text_generation_model_name, region = var.region } ) depends_on = [ google_bigquery_routine.sp_bigqueryml_model, google_bigquery_connection.vertex_ai_connection ] } ## Query Bigquery ML Model for describing customer clusters resource "google_bigquery_routine" "sp_bigqueryml_generate_describe" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_bigqueryml_generate_describe" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_bigqueryml_generate_describe.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id, model_name = var.text_generation_model_name } ) depends_on = [ google_bigquery_routine.sp_bigqueryml_generate_create ] } ## Add Translation Scripts resource "google_bigquery_routine" "sp_sample_translation_queries" { project = module.project-services.project_id dataset_id = google_bigquery_dataset.ds_edw.dataset_id routine_id = "sp_sample_translation_queries" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/src/sql/sp_sample_translation_queries.sql", { project_id = module.project-services.project_id, dataset_id = google_bigquery_dataset.ds_edw.dataset_id } ) depends_on = [ google_bigquery_table.tbl_edw_inventory_items ] } # Add Scheduled Query # Create specific service account for DTS Run ## Create a DTS specific service account resource "google_service_account" "dts" { project = module.project-services.project_id account_id = "cloud-dts-sa-${random_id.id.hex}" display_name = "Service Account for Data Transfer Service" description = "Service account used to manage Data Transfer Service" create_ignore_already_exists = var.create_ignore_service_accounts depends_on = [time_sleep.wait_after_apis] } ## Define the IAM roles granted to the DTS service account locals { dts_roles = [ "roles/bigquery.user", "roles/bigquery.dataEditor", "roles/bigquery.connectionUser", "roles/iam.serviceAccountTokenCreator" ] } ## Grant the DTS Specific service account access resource "google_project_iam_member" "dts_roles" { project = module.project-services.project_id count = length(local.dts_roles) role = local.dts_roles[count.index] member = "serviceAccount:${google_service_account.dts.email}" depends_on = [time_sleep.wait_after_apis, google_project_iam_member.bq_connection_iam_vertex_ai] } # Set up scheduled query resource "google_bigquery_data_transfer_config" "dts_config" { display_name = "nightlyloadquery" project = module.project-services.project_id location = var.region data_source_id = "scheduled_query" schedule = "every day 00:00" params = { query = "CALL `${module.project-services.project_id}.${google_bigquery_dataset.ds_edw.dataset_id}.sp_bigqueryml_model`()" } service_account_name = google_service_account.dts.email depends_on = [ google_project_iam_member.dts_roles, google_bigquery_dataset.ds_edw, module.workflow_polling_4 ] }