terraform-modules/deploy-files/tf-deploy-files.tf (928 lines of code) (raw):
####################################################################################
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
####################################################################################
####################################################################################
# Create the GCP resources
#
# Author: Adam Paternostro
####################################################################################
# Need this version to implement
terraform {
required_providers {
google = {
source = "hashicorp/google-beta"
version = "5.35.0"
}
}
}
####################################################################################
# Variables
####################################################################################
variable "project_id" {}
variable "dataplex_region" {}
variable "storage_bucket" {}
variable "random_extension" {}
variable "deployment_service_account_name" {}
variable "composer_name" {}
variable "composer_dag_bucket" {}
variable "demo_rest_api_service_uri" {}
variable "code_bucket_name" {}
variable "bigquery_rideshare_llm_raw_dataset" {}
variable "bigquery_rideshare_llm_enriched_dataset" {}
variable "bigquery_rideshare_llm_curated_dataset" {}
variable "bigquery_region" {}
variable "gcs_rideshare_lakehouse_raw_bucket" {}
variable "cloud_run_service_rideshare_plus_website_url" {}
variable "bigquery_taxi_dataset" {
type = string
default = "taxi_dataset"
}
locals {
# Replace gs://composer-generated-name/dags to composer-generated-name
local_composer_bucket_name = replace(replace(replace(var.composer_dag_bucket, "gs://", ""),"/dags",""),"/","")
local_composer_dag_path = "dags"
local_composer_data_path = "data"
local_dataproc_pyspark_path = "pyspark-code"
local_dataflow_source_path = "dataflow"
local_bigspark_path = "bigspark"
}
####################################################################################
# Deploy Composer DAGs and Data
###################################################################################
# Upload the Airflow initial DAGs needed to run the system (dependencies of run-all-dags)
# Upload all the DAGs can cause issues since the Airflow instance is so small they call cannot sync
# before run-all-dags is launched
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_step-01-taxi-data-download" {
name = "${local.local_composer_dag_path}/step-01-taxi-data-download.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/step-01-taxi-data-download.py"
depends_on = [
]
}
resource "google_storage_bucket_object" "deploy_airflow_dag_step-01-taxi-data-download-quick-copy" {
name = "${local.local_composer_dag_path}/step-01-taxi-data-download-quick-copy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/step-01-taxi-data-download-quick-copy.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_step-02-taxi-data-processing" {
name = "${local.local_composer_dag_path}/step-02-taxi-data-processing.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/step-02-taxi-data-processing.py"
depends_on = [
]
}
resource "google_storage_bucket_object" "deploy_airflow_dag_step-02-taxi-data-processing-quick-copy" {
name = "${local.local_composer_dag_path}/step-02-taxi-data-processing-quick-copy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/step-02-taxi-data-processing-quick-copy.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_step-03-hydrate-tables" {
name = "${local.local_composer_dag_path}/step-03-hydrate-tables.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/step-03-hydrate-tables.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataflow-start-streaming-job" {
name = "${local.local_composer_dag_path}/sample-dataflow-start-streaming-job.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataflow-start-streaming-job.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-dataprofile-ridehshare-llm" {
name = "${local.local_composer_dag_path}/sample-dataplex-dataprofile-ridehshare-llm.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-dataprofile-ridehshare-llm.py"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_dataplex_dataprofile_ridehshare_llm" {
name = "${local.local_composer_data_path}/bash_dataplex_dataprofile_ridehshare_llm.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_dataplex_dataprofile_ridehshare_llm.sh"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-dataprofile-taxi" {
name = "${local.local_composer_dag_path}/sample-dataplex-dataprofile-taxi.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-dataprofile-taxi.py"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_dataplex_dataprofile_taxi" {
name = "${local.local_composer_data_path}/bash_dataplex_dataprofile_taxi.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_dataplex_dataprofile_taxi.sh"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-dataprofile-thelook" {
name = "${local.local_composer_dag_path}/sample-dataplex-dataprofile-thelook.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-dataprofile-thelook.py"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_dataplex_dataprofile_thelook" {
name = "${local.local_composer_data_path}/bash_dataplex_dataprofile_thelook.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_dataplex_dataprofile_thelook.sh"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-dataprofile-rideshare-lakehouse" {
name = "${local.local_composer_dag_path}/sample-dataplex-dataprofile-rideshare-lakehouse.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-dataprofile-rideshare-lakehouse.py"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_dataplex_dataprofile_ridehshare_lakehouse" {
name = "${local.local_composer_data_path}/bash_dataplex_dataprofile_ridehshare_lakehouse.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_dataplex_dataprofile_ridehshare_lakehouse.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_sample_datastream_public_ip_deploy_postgres" {
name = "${local.local_composer_data_path}/sample_datastream_public_ip_deploy_postgres.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_datastream_public_ip_deploy_postgres.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_sample_datastream_public_ip_deploy_datastream" {
name = "${local.local_composer_data_path}/sample_datastream_public_ip_deploy_datastream.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_datastream_public_ip_deploy_datastream.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_sample_datastream_public_ip_destroy" {
name = "${local.local_composer_data_path}/sample_datastream_public_ip_destroy.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_datastream_public_ip_destroy.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_sample_datastream_private_ip_deploy_postgres" {
name = "${local.local_composer_data_path}/sample_datastream_private_ip_deploy_postgres.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_datastream_private_ip_deploy_postgres.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_sample_datastream_private_ip_deploy_datastream" {
name = "${local.local_composer_data_path}/sample_datastream_private_ip_deploy_datastream.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_datastream_private_ip_deploy_datastream.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_sample_datastream_private_ip_destroy" {
name = "${local.local_composer_data_path}/sample_datastream_private_ip_destroy.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_datastream_private_ip_destroy.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_create_spanner_connection" {
name = "${local.local_composer_data_path}/bash_create_spanner_connection.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_create_spanner_connection.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_deploy_dataplex" {
name = "${local.local_composer_data_path}/bash_deploy_dataplex.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_deploy_dataplex.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
#resource "google_storage_bucket_object" "deploy_airflow_data_bash_deploy_rideshare_website" {
# name = "${local.local_composer_data_path}/bash_deploy_rideshare_website.sh"
# bucket = local.local_composer_bucket_name
# source = "../cloud-composer/data/bash_deploy_rideshare_website.sh"
#
# depends_on = [
# ]
#}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_download_rideshare_images" {
name = "${local.local_composer_data_path}/bash_download_rideshare_images.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_download_rideshare_images.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_bash_seed_unstructured_data" {
name = "${local.local_composer_data_path}/bash_seed_unstructured_data.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/bash_seed_unstructured_data.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_cloud_sql_reverse_proxy_template" {
name = "${local.local_composer_data_path}/cloud_sql_reverse_proxy_template.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/cloud_sql_reverse_proxy_template.sh"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_postgres_create_datastream_replication" {
name = "${local.local_composer_data_path}/postgres_create_datastream_replication.sql"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/postgres_create_datastream_replication.sql"
depends_on = [
]
}
# Upload the Airflow "data/template" files
resource "google_storage_bucket_object" "deploy_airflow_data_postgres_create_schema" {
name = "${local.local_composer_data_path}/postgres_create_schema.sql"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/postgres_create_schema.sql"
depends_on = [
]
}
####################################################################################
# Dataplex Terraform
####################################################################################
# Upload Deploy DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-deploy" {
name = "${local.local_composer_dag_path}/sample-dataplex-deploy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload Destroy DAG (Copy the Deploy AS THE Destroy)
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-destroy" {
name = "${local.local_composer_dag_path}/sample-dataplex-destroy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Backend State file
# The bucket for the state must be substituted. We do not want to do this for the other files since we
# would need to escape all the ${var} with $${var}.
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_dataplex_backend" {
name = "${local.local_composer_data_path}/terraform/dataplex/backend.tf"
bucket = local.local_composer_bucket_name
content = templatefile("../cloud-composer/data/terraform/dataplex/backend.tf",
{
code_bucket_name = var.code_bucket_name
})
depends_on = [
]
}
# Variables file
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_dataplex_variables" {
name = "${local.local_composer_data_path}/terraform/dataplex/variables.tf"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/terraform/dataplex/variables.tf"
depends_on = [
]
}
# Main Resources file
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_dataplex" {
name = "${local.local_composer_data_path}/terraform/dataplex/terraform.tf"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/terraform/dataplex/terraform.tf"
depends_on = [
]
}
# Bash script to run and install Terraform
resource "google_storage_bucket_object" "deploy_airflow_data_sample_terraform_dataplex" {
name = "${local.local_composer_data_path}/sample_terraform_dataplex.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_terraform_dataplex.sh"
depends_on = [
]
}
####################################################################################
# Dataplex Terraform WITH Hive Metastore Service
####################################################################################
# Upload Deploy DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-with-hms-deploy" {
name = "${local.local_composer_dag_path}/sample-dataplex-with-hms-deploy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-with-hms-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload Destroy DAG (Copy the Deploy AS THE Destroy)
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-with-hms-destroy" {
name = "${local.local_composer_dag_path}/sample-dataplex-with-hms-destroy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-with-hms-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Backend State file
# The bucket for the state must be substituted. We do not want to do this for the other files since we
# would need to escape all the ${var} with $${var}.
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_dataplex_backend_with_hms" {
name = "${local.local_composer_data_path}/terraform/dataplex-with-hms/backend.tf"
bucket = local.local_composer_bucket_name
content = templatefile("../cloud-composer/data/terraform/dataplex-with-hms/backend.tf",
{
code_bucket_name = var.code_bucket_name
})
depends_on = [
]
}
# Variables file
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_dataplex_variables_with_hms" {
name = "${local.local_composer_data_path}/terraform/dataplex-with-hms/variables.tf"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/terraform/dataplex-with-hms/variables.tf"
depends_on = [
]
}
# Main Resources file
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_dataplex_with_hms" {
name = "${local.local_composer_data_path}/terraform/dataplex-with-hms/terraform.tf"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/terraform/dataplex-with-hms/terraform.tf"
depends_on = [
]
}
# Bash script to run and install Terraform
resource "google_storage_bucket_object" "deploy_airflow_data_sample_terraform_dataplex_with_hms" {
name = "${local.local_composer_data_path}/sample_terraform_dataplex_with_hms.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_terraform_dataplex_with_hms.sh"
depends_on = [
]
}
####################################################################################
# Bucket Demo Terraform
####################################################################################
# Upload Deploy DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-terraform-bucket-demo-deploy" {
name = "${local.local_composer_dag_path}/sample-terraform-bucket-demo-deploy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-terraform-bucket-demo-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload Destroy DAG (Copy the Deploy AS THE Destroy)
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-terraform-bucket-demo-destroy" {
name = "${local.local_composer_dag_path}/sample-terraform-bucket-demo-destroy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-terraform-bucket-demo-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Backend State file
# The bucket for the state must be substituted. We do not want to do this for the other files since we
# would need to escape all the ${var} with $${var}.
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_bucket-demo_backend" {
name = "${local.local_composer_data_path}/terraform/bucket-demo/backend.tf"
bucket = local.local_composer_bucket_name
content = templatefile("../cloud-composer/data/terraform/bucket-demo/backend.tf",
{
code_bucket_name = var.code_bucket_name
})
depends_on = [
]
}
# Variables file
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_bucket-demo_variables" {
name = "${local.local_composer_data_path}/terraform/bucket-demo/variables.tf"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/terraform/bucket-demo/variables.tf"
depends_on = [
]
}
# Main Resources file
resource "google_storage_bucket_object" "deploy_airflow_data_terraform_bucket-demo" {
name = "${local.local_composer_data_path}/terraform/bucket-demo/terraform.tf"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/terraform/bucket-demo/terraform.tf"
depends_on = [
]
}
# Bash script to run and install Terraform
resource "google_storage_bucket_object" "deploy_airflow_data_sample_terraform_bucket_demo" {
name = "${local.local_composer_data_path}/sample_terraform_bucket_demo.sh"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/data/sample_terraform_bucket_demo.sh"
depends_on = [
]
}
####################################################################################
# Upload the PySpark scripts
###################################################################################
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_compact_parquet_files" {
name = "${local.local_dataproc_pyspark_path}/compact_parquet_files.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/compact_parquet_files.py"
depends_on = [
]
}
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_convert_taxi_to_iceberg_create_tables" {
name = "${local.local_dataproc_pyspark_path}/convert_taxi_to_iceberg_create_tables.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/convert_taxi_to_iceberg_create_tables.py"
depends_on = [
]
}
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_convert_taxi_to_iceberg_data_updates" {
name = "${local.local_dataproc_pyspark_path}/convert_taxi_to_iceberg_data_updates.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/convert_taxi_to_iceberg_data_updates.py"
depends_on = [
]
}
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_convert_taxi_to_parquet" {
name = "${local.local_dataproc_pyspark_path}/convert_taxi_to_parquet.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/convert_taxi_to_parquet.py"
depends_on = [
]
}
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_export_taxi_data_biglake_storage_api" {
name = "${local.local_dataproc_pyspark_path}/export_taxi_data_biglake_storage_api.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/export_taxi_data_biglake_storage_api.py"
depends_on = [
]
}
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_export_taxi_data_from_bq_to_gcs" {
name = "${local.local_dataproc_pyspark_path}/export_taxi_data_from_bq_to_gcs.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/export_taxi_data_from_bq_to_gcs.py"
depends_on = [
]
}
# Upload PySpark
resource "google_storage_bucket_object" "deploy_pyspark_rideshare_iceberg_serverless" {
name = "${local.local_dataproc_pyspark_path}/rideshare_iceberg_serverless.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataproc/rideshare_iceberg_serverless.py"
depends_on = [
]
}
####################################################################################
# Upload the PySpark scripts
###################################################################################
# Download the BigQuery Spark JAR file
# Download the Iceberg JAR File
resource "null_resource" "download_dataproc_jars" {
provisioner "local-exec" {
interpreter = ["/bin/bash", "-c"]
command = <<EOF
if [ -z "$${GOOGLE_APPLICATION_CREDENTIALS}" ]
then
echo "We are not running in a local docker container. No need to login."
else
echo "We are running in local docker container. Logging in."
gcloud auth activate-service-account "${var.deployment_service_account_name}" --key-file="$${GOOGLE_APPLICATION_CREDENTIALS}" --project="${var.project_id}"
gcloud config set account "${var.deployment_service_account_name}"
fi
curl -L https://repo.maven.apache.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/0.14.0/iceberg-spark-runtime-3.1_2.12-0.14.0.jar --output iceberg-spark-runtime-3.1_2.12-0.14.0.jar
curl -L https://github.com/GoogleCloudDataproc/spark-bigquery-connector/releases/download/0.26.0/spark-bigquery-with-dependencies_2.12-0.26.0.jar --output spark-bigquery-with-dependencies_2.12-0.26.0.jar
EOF
}
}
# Upload PySpark JAR Files
resource "google_storage_bucket_object" "deploy_pyspark_iceberg-spark-runtime" {
name = "${local.local_dataproc_pyspark_path}/iceberg-spark-runtime-3.1_2.12-0.14.0.jar"
bucket = "raw-${var.storage_bucket}"
source = "iceberg-spark-runtime-3.1_2.12-0.14.0.jar"
depends_on = [
null_resource.download_dataproc_jars
]
}
# Upload PySpark JAR Files
resource "google_storage_bucket_object" "deploy_pyspark_spark-bigquery-with-dependencies" {
name = "${local.local_dataproc_pyspark_path}/spark-bigquery-with-dependencies_2.12-0.26.0.jar"
bucket = "raw-${var.storage_bucket}"
source = "spark-bigquery-with-dependencies_2.12-0.26.0.jar"
depends_on = [
null_resource.download_dataproc_jars
]
}
####################################################################################
# Upload the Dataflow scripts
###################################################################################
resource "google_storage_bucket_object" "deploy_dataflow_script_streaming-taxi-data" {
name = "${local.local_dataflow_source_path}/streaming-taxi-data.py"
bucket = "raw-${var.storage_bucket}"
source = "../dataflow/streaming-taxi-data.py"
depends_on = [
]
}
####################################################################################
# Upload the Dataplex scripts
####################################################################################
resource "google_storage_bucket_object" "dataplex_data_quality_yaml" {
name = "dataplex/data-quality/dataplex_data_quality_taxi.yaml"
content = templatefile("../dataplex/data-quality/dataplex_data_quality_taxi.yaml",
{
project_id = var.project_id
dataplex_region = var.dataplex_region
random_extension = var.random_extension
})
bucket = "code-${var.storage_bucket}"
}
resource "google_storage_bucket_object" "dataplex_data_quality_rideshare_yaml" {
name = "dataplex/data-quality/dataplex_data_quality_rideshare.yaml"
content = templatefile("../dataplex/data-quality/dataplex_data_quality_rideshare.yaml",
{
project_id = var.project_id
dataplex_region = var.dataplex_region
random_extension = var.random_extension
})
bucket = "code-${var.storage_bucket}"
}
resource "google_storage_bucket_object" "dataplex_data-explore-dataplex-explore-notebook" {
name = "dataplex/data-explore/dataplex-explore-notebook.ipynb"
content = templatefile("../dataplex/data-explore/dataplex-explore-notebook.ipynb",
{
project_id = var.project_id
dataplex_region = var.dataplex_region
random_extension = var.random_extension
bigquery_taxi_dataset = var.bigquery_taxi_dataset
})
bucket = "code-${var.storage_bucket}"
}
resource "google_storage_bucket_object" "dataplex_data-explore-dataplex-explore-script" {
name = "dataplex/data-explore/dataplex-explore-script.sql"
content = templatefile("../dataplex/data-explore/dataplex-explore-script.sql",
{
project_id = var.project_id
dataplex_region = var.dataplex_region
random_extension = var.random_extension
bigquery_taxi_dataset = var.bigquery_taxi_dataset
})
bucket = "code-${var.storage_bucket}"
}
####################################################################################
# Deploy BigSpark
####################################################################################
# Replace the Project and Bucket name
# Upload BigSpark script
resource "google_storage_bucket_object" "deploy_bigspark_sample-bigspark" {
name = "${local.local_bigspark_path}/sample-bigspark.py"
bucket = "raw-${var.storage_bucket}"
content = templatefile("../sample-data/bigspark/sample-bigspark.py",
{
project_id = var.project_id
bucket_name = "raw-${var.storage_bucket}"
})
depends_on = [
]
}
# Upload BigSpark sample data
resource "google_storage_bucket_object" "deploy_bigspark_sample-bigspark-discount-data" {
name = "${local.local_bigspark_path}/sample-bigspark-discount-data.csv"
bucket = "raw-${var.storage_bucket}"
source = "../sample-data/bigspark/sample-bigspark-discount-data.csv"
depends_on = [
]
}
# Random name file
resource "google_storage_bucket_object" "deploy_sample_data-random_names" {
name = "random_names/random_names.csv"
bucket = "raw-${var.storage_bucket}"
source = "../sample-data/random_names/random_names.csv"
depends_on = [
]
}
# Pickup and Dropoff location data
resource "google_storage_bucket_object" "deploy_sample_data-location" {
name = "processed/taxi-data/location/location.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/location/location.parquet"
depends_on = [
]
}
# Upload Resenet Imagenet Labels sample data
resource "google_storage_bucket_object" "deploy_resnet_imagenet_labels-data" {
name = "resnet_imagenet_labels/resnet_imagenet_labels.csv"
bucket = "raw-${var.storage_bucket}"
source = "../sample-data/resnet_imagenet_labels/resnet_imagenet_labels.csv"
depends_on = [
]
}
# Upload BigLake Meatastore sample data
resource "google_storage_bucket_object" "deploy_biglake-metastore-payment-type-data" {
name = "payment_type_table/payment_type_table.snappy.parquet"
bucket = "iceberg-source-data-${var.random_extension}"
source = "../sample-data/iceberg_source_data/payment_type_table.snappy.parquet"
depends_on = [
]
}
####################################################################################
# Delta IO Files
####################################################################################
# Upload the sample Delta.io files
# The manifest files need to have the GCS bucket name updated
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/part-00000-3bec3377-d4a1-4e29-9e1e-b106e63929a6.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/part-00001-9dd1b37f-6e98-48c5-bb5a-613ba36b2f70.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/part-00002-6d9993de-beb3-4c54-8aa7-a1ea576c2019.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/part-00003-0c324b19-b541-4ae1-b958-7090e8192c62.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/part-00004-0c280354-a13c-4b5b-9808-666ea0bcd49e.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/part-00005-d22c4ae9-e0e6-4887-b0b6-493bf313d049.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/part-00006-0aadcdad-a3a9-4e5c-a0f8-c5cc033f5878.c000.snappy.parquet
# sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/manifest:
# 1: gs://REPLACE-BUCKET-NAME/delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/part-00007-327dd29b-6c62-4f56-963d-d7c0d2a235be.c000.snappy.parquet
#sample-data/rideshare_trips/Rideshare_Vendor_Id=1:
#Pickup_Date=2021-12-01 Pickup_Date=2021-12-02 Pickup_Date=2021-12-03 Pickup_Date=2021-12-04 Pickup_Date=2021-12-06
# sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01:
# part-00000-3bec3377-d4a1-4e29-9e1e-b106e63929a6.c000.snappy.parquet part-00000-7dfd2262-fa70-4593-8d3a-d82efa1b94e2.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-3bec3377-d4a1-4e29-9e1e-b106e63929a6_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/part-00000-3bec3377-d4a1-4e29-9e1e-b106e63929a6.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/part-00000-3bec3377-d4a1-4e29-9e1e-b106e63929a6.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-7dfd2262-fa70-4593-8d3a-d82efa1b94e2_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/part-00000-7dfd2262-fa70-4593-8d3a-d82efa1b94e2.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/part-00000-7dfd2262-fa70-4593-8d3a-d82efa1b94e2.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02:
# part-00000-3c1d5fd2-43fe-4e9f-b51b-6089242ff338.c000.snappy.parquet part-00001-9dd1b37f-6e98-48c5-bb5a-613ba36b2f70.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-3c1d5fd2-43fe-4e9f-b51b-6089242ff338_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/part-00000-3c1d5fd2-43fe-4e9f-b51b-6089242ff338.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/part-00000-3c1d5fd2-43fe-4e9f-b51b-6089242ff338.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00001-9dd1b37f-6e98-48c5-bb5a-613ba36b2f70_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/part-00001-9dd1b37f-6e98-48c5-bb5a-613ba36b2f70.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/part-00001-9dd1b37f-6e98-48c5-bb5a-613ba36b2f70.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03:
# part-00000-5816f690-6c3b-4d38-8266-023ce2449b70.c000.snappy.parquet part-00002-6d9993de-beb3-4c54-8aa7-a1ea576c2019.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-5816f690-6c3b-4d38-8266-023ce2449b70_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/part-00000-5816f690-6c3b-4d38-8266-023ce2449b70.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/part-00000-5816f690-6c3b-4d38-8266-023ce2449b70.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00002-6d9993de-beb3-4c54-8aa7-a1ea576c2019_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/part-00002-6d9993de-beb3-4c54-8aa7-a1ea576c2019.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/part-00002-6d9993de-beb3-4c54-8aa7-a1ea576c2019.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04:
# part-00000-86dc7fc1-b24a-45d6-8245-b5db27040e5e.c000.snappy.parquet part-00003-0c324b19-b541-4ae1-b958-7090e8192c62.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-86dc7fc1-b24a-45d6-8245-b5db27040e5e_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/part-00000-86dc7fc1-b24a-45d6-8245-b5db27040e5e.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/part-00000-86dc7fc1-b24a-45d6-8245-b5db27040e5e.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00003-0c324b19-b541-4ae1-b958-7090e8192c62_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/part-00003-0c324b19-b541-4ae1-b958-7090e8192c62.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/part-00003-0c324b19-b541-4ae1-b958-7090e8192c62.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-06:
# part-00000-e9d227ec-236a-4090-8a2d-41ef9eda576d.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-e9d227ec-236a-4090-8a2d-41ef9eda576d_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-06/part-00000-e9d227ec-236a-4090-8a2d-41ef9eda576d.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-06/part-00000-e9d227ec-236a-4090-8a2d-41ef9eda576d.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=2:
# Pickup_Date=2021-12-01 Pickup_Date=2021-12-02 Pickup_Date=2021-12-03 Pickup_Date=2021-12-04 Pickup_Date=2021-12-06
# sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01:
# part-00000-4c85ae57-0beb-4bcb-b12e-3d96c12ca261.c000.snappy.parquet part-00004-0c280354-a13c-4b5b-9808-666ea0bcd49e.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-4c85ae57-0beb-4bcb-b12e-3d96c12ca261_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/part-00000-4c85ae57-0beb-4bcb-b12e-3d96c12ca261.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/part-00000-4c85ae57-0beb-4bcb-b12e-3d96c12ca261.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00004-0c280354-a13c-4b5b-9808-666ea0bcd49e_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/part-00004-0c280354-a13c-4b5b-9808-666ea0bcd49e.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/part-00004-0c280354-a13c-4b5b-9808-666ea0bcd49e.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02:
# part-00000-08363b08-e1f1-4a9a-99ee-b2dfc6a72253.c000.snappy.parquet part-00005-d22c4ae9-e0e6-4887-b0b6-493bf313d049.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-08363b08-e1f1-4a9a-99ee-b2dfc6a72253_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/part-00000-08363b08-e1f1-4a9a-99ee-b2dfc6a72253.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/part-00000-08363b08-e1f1-4a9a-99ee-b2dfc6a72253.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00005-d22c4ae9-e0e6-4887-b0b6-493bf313d049_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/part-00005-d22c4ae9-e0e6-4887-b0b6-493bf313d049.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/part-00005-d22c4ae9-e0e6-4887-b0b6-493bf313d049.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03:
# part-00000-cb84f5c4-33ab-4298-ae0c-2cae87cedf36.c000.snappy.parquet part-00006-0aadcdad-a3a9-4e5c-a0f8-c5cc033f5878.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-cb84f5c4-33ab-4298-ae0c-2cae87cedf36_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/part-00000-cb84f5c4-33ab-4298-ae0c-2cae87cedf36.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/part-00000-cb84f5c4-33ab-4298-ae0c-2cae87cedf36.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00006-0aadcdad-a3a9-4e5c-a0f8-c5cc033f5878_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/part-00006-0aadcdad-a3a9-4e5c-a0f8-c5cc033f5878.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/part-00006-0aadcdad-a3a9-4e5c-a0f8-c5cc033f5878.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04:
# part-00000-8ec550ea-5ced-4dc1-8555-9fbe815fef12.c000.snappy.parquet part-00007-327dd29b-6c62-4f56-963d-d7c0d2a235be.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-8ec550ea-5ced-4dc1-8555-9fbe815fef12_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/part-00000-8ec550ea-5ced-4dc1-8555-9fbe815fef12.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/part-00000-8ec550ea-5ced-4dc1-8555-9fbe815fef12.c000.snappy.parquet"
}
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00007-327dd29b-6c62-4f56-963d-d7c0d2a235be_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/part-00007-327dd29b-6c62-4f56-963d-d7c0d2a235be.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/part-00007-327dd29b-6c62-4f56-963d-d7c0d2a235be.c000.snappy.parquet"
}
# sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-06:
# part-00000-e8cdd40e-4587-4ac8-b511-511d1fbce5d1.c000.snappy.parquet
resource "google_storage_bucket_object" "deploy_sample_data_parquet_part-00000-e8cdd40e-4587-4ac8-b511-511d1fbce5d1_c000_snappy_parquet" {
name = "delta_io/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-06/part-00000-e8cdd40e-4587-4ac8-b511-511d1fbce5d1.c000.snappy.parquet"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-06/part-00000-e8cdd40e-4587-4ac8-b511-511d1fbce5d1.c000.snappy.parquet"
}
# sample-data/rideshare_trips//_delta_log:
# 00000000000000000000.crc 00000000000000000001.crc 00000000000000000002.crc
# 00000000000000000000.json 00000000000000000001.json 00000000000000000002.json
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_delta_log-00000000000000000000_crc" {
name = "delta_io/rideshare_trips/_delta_log/00000000000000000000.crc"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/_delta_log/00000000000000000000.crc"
}
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_delta_log-00000000000000000001_crc" {
name = "delta_io/rideshare_trips/_delta_log/00000000000000000001.crc"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/_delta_log/00000000000000000001.crc"
}
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_delta_log-00000000000000000002_crc" {
name = "delta_io/rideshare_trips/_delta_log/00000000000000000002.crc"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/_delta_log/00000000000000000002.crc"
}
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_delta_log-00000000000000000000_json" {
name = "delta_io/rideshare_trips/_delta_log/00000000000000000000.json"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/_delta_log/00000000000000000000.json"
}
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_delta_log-00000000000000000001_json" {
name = "delta_io/rideshare_trips/_delta_log/00000000000000000001.json"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/_delta_log/00000000000000000001.json"
}
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_delta_log-00000000000000000002_json" {
name = "delta_io/rideshare_trips/_delta_log/00000000000000000002.json"
bucket = "processed-${var.storage_bucket}"
source = "../sample-data/rideshare_trips/_delta_log/00000000000000000002.json"
}
# sample-data/rideshare_trips//_symlink_format_manifest:
# Rideshare_Vendor_Id=1 Rideshare_Vendor_Id=2
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=1:
# Pickup_Date=2021-12-01 Pickup_Date=2021-12-02 Pickup_Date=2021-12-03 Pickup_Date=2021-12-04
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_Rideshare_Vendor_Id_1_Pickup_Date_2021-12-01" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-01/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_Rideshare_Vendor_Id_1_Pickup_Date_2021-12-02" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-02/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_Rideshare_Vendor_Id_1_Pickup_Date_2021-12-03" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-03/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_manifest_Rideshare_Vendor_Id_1_Pickup_Date_2021-12-04-bigspark" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=1/Pickup_Date=2021-12-04/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=2:
# Pickup_Date=2021-12-01 Pickup_Date=2021-12-02 Pickup_Date=2021-12-03 Pickup_Date=2021-12-04
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_Rideshare_Vendor_Id_2_Pickup_Date_2021-12-01" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-01/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_Rideshare_Vendor_Id_2_Pickup_Date_2021-12-02" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-02/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_Rideshare_Vendor_Id_2_Pickup_Date_2021-12-03" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-03/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
# sample-data/rideshare_trips//_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04:
# manifest
# Upload Sample Delta IO file with Template substitution
resource "google_storage_bucket_object" "deploy_sample_data_delta_io_manifest_Rideshare_Vendor_Id_2_Pickup_Date_2021-12-04-bigspark" {
name = "delta_io/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/manifest"
bucket = "processed-${var.storage_bucket}"
content = templatefile("../sample-data/rideshare_trips/_symlink_format_manifest/Rideshare_Vendor_Id=2/Pickup_Date=2021-12-04/manifest",
{
project_id = var.project_id
bucket_name = "processed-${var.storage_bucket}"
})
}
####################################################################################
# Remaining Airflow / Composer DAGs
####################################################################################
# You need to wait for Airflow to read the DAGs just uploaded
# Only a few DAGs are uploaded so that we can sync quicker
resource "time_sleep" "wait_for_airflow_dag_sync" {
depends_on = [
google_storage_bucket_object.deploy_airflow_dag_step-01-taxi-data-download-quick-copy,
google_storage_bucket_object.deploy_airflow_dag_step-02-taxi-data-processing-quick-copy,
google_storage_bucket_object.deploy_airflow_dag_step-01-taxi-data-download,
google_storage_bucket_object.deploy_airflow_dag_step-02-taxi-data-processing,
google_storage_bucket_object.deploy_airflow_dag_step-03-hydrate-tables,
google_storage_bucket_object.deploy_airflow_dag_sample-dataflow-start-streaming-job,
google_storage_bucket_object.deploy_airflow_dag_sample-rideshare-iceberg-serverless,
google_storage_bucket_object.deploy_airflow_dag_sample-rideshare-download-images,
# google_storage_bucket_object.deploy_airflow_dag_sample-rideshare-website,
google_storage_bucket_object.deploy_airflow_dag_sample-rideshare-llm-hydrate-data,
google_storage_bucket_object.deploy_airflow_dag_sample-rideshare-hydrate-data,
]
# This just a "guess" and might need to be extended. The Composer (Airflow) cluster is sized very small so it
# takes longer to sync the DAG files
create_duration = "180s"
}
# Deploy all the remaining DAGs (hopefully the initial ones have synced)
# When the Run-All-Dag deploys, it should run automatically
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_run-all-dags" {
name = "${local.local_composer_dag_path}/run-all-dags.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/run-all-dags.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-bigquery-data-transfer-service" {
name = "${local.local_composer_dag_path}/sample-bigquery-data-transfer-service.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-bigquery-data-transfer-service.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-bigquery-start-spanner" {
name = "${local.local_composer_dag_path}/sample-bigquery-start-spanner.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-bigquery-start-spanner.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-bigquery-stop-spanner" {
name = "${local.local_composer_dag_path}/sample-bigquery-stop-spanner.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-bigquery-stop-spanner.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-create-data-fusion" {
name = "${local.local_composer_dag_path}/sample-create-data-fusion.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-create-data-fusion.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataflow-stop-streaming-job" {
name = "${local.local_composer_dag_path}/sample-dataflow-stop-streaming-job.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataflow-stop-streaming-job.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-datastream-public-ip-deploy" {
name = "${local.local_composer_dag_path}/sample-datastream-public-ip-deploy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-datastream-public-ip-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-datastream-public-ip-destroy" {
name = "${local.local_composer_dag_path}/sample-datastream-public-ip-destroy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-datastream-public-ip-destroy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-datastream-public-ip-generate-data" {
name = "${local.local_composer_dag_path}/sample-datastream-public-ip-generate-data.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-datastream-public-ip-generate-data.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-datastream-private-ip-deploy" {
name = "${local.local_composer_dag_path}/sample-datastream-private-ip-deploy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-datastream-private-ip-deploy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-datastream-private-ip-destroy" {
name = "${local.local_composer_dag_path}/sample-datastream-private-ip-destroy.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-datastream-private-ip-destroy.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-datastream-private-ip-generate-data" {
name = "${local.local_composer_dag_path}/sample-datastream-private-ip-generate-data.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-datastream-private-ip-generate-data.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-dataplex-run-data-quality" {
name = "${local.local_composer_dag_path}/sample-dataplex-run-data-quality.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-dataplex-run-data-quality.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-export-taxi-trips-from-bq-to-gcs-cluster" {
name = "${local.local_composer_dag_path}/sample-export-taxi-trips-from-bq-to-gcs-cluster.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-export-taxi-trips-from-bq-to-gcs-cluster.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-export-taxi-trips-from-bq-to-gcs-serverless" {
name = "${local.local_composer_dag_path}/sample-export-taxi-trips-from-bq-to-gcs-serverless.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-export-taxi-trips-from-bq-to-gcs-serverless.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-iceberg-create-tables-update-data" {
name = "${local.local_composer_dag_path}/sample-iceberg-create-tables-update-data.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-iceberg-create-tables-update-data.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-download-images" {
name = "${local.local_composer_dag_path}/sample-rideshare-download-images.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-rideshare-download-images.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-hydrate-data" {
name = "${local.local_composer_dag_path}/sample-rideshare-hydrate-data.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-rideshare-hydrate-data.py"
depends_on = [
]
}
# Upload DAG
# resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-hydrate-object-table" {
# name = "${local.local_composer_dag_path}/sample-rideshare-hydrate-object-table.py"
# bucket = local.local_composer_bucket_name
# source = "../cloud-composer/dags/sample-rideshare-hydrate-object-table.py"
#
# depends_on = [
# ]
# }
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-iceberg-serverless" {
name = "${local.local_composer_dag_path}/sample-rideshare-iceberg-serverless.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-rideshare-iceberg-serverless.py"
depends_on = [
]
}
# Upload DAG
# resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-object-table-delay" {
# name = "${local.local_composer_dag_path}/sample-rideshare-object-table-delay.py"
# bucket = local.local_composer_bucket_name
# source = "../cloud-composer/dags/sample-rideshare-object-table-delay.py"
#
# depends_on = [
# ]
# }
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-run-data-quality" {
name = "${local.local_composer_dag_path}/sample-rideshare-run-data-quality.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-rideshare-run-data-quality.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
# Upload DAG (no longer used, use Cloud Run website)
#resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-website" {
# name = "${local.local_composer_dag_path}/sample-rideshare-website.py"
# bucket = local.local_composer_bucket_name
# source = "../cloud-composer/dags/sample-rideshare-website.py"
#
# depends_on = [
# ]
#}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-rideshare-llm-hydrate-data" {
name = "${local.local_composer_dag_path}/sample-rideshare-llm-hydrate-data.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-rideshare-llm-hydrate-data.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-seed-unstructured-data" {
name = "${local.local_composer_dag_path}/sample-seed-unstructured-data.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-seed-unstructured-data.py"
depends_on = [
]
}
# Upload DAG
resource "google_storage_bucket_object" "deploy_airflow_dag_sample-sla-miss-task-groups" {
name = "${local.local_composer_dag_path}/sample-sla-miss-task-groups.py"
bucket = local.local_composer_bucket_name
source = "../cloud-composer/dags/sample-sla-miss-task-groups.py"
depends_on = [
time_sleep.wait_for_airflow_dag_sync
]
}
####################################################################################
# Deploy App Engine files
# No longer used, use Cloud Run website
####################################################################################
# e.g. gcloud app deploy ./app.yaml --project=data-analytics-demo-02kg6c8jm9 --quiet
# These are uploaded to the composer/data path where an Airflow job can deploy the website
# resource "google_storage_bucket_object" "deploy_rideshare_website_app_yaml" {
# name = "${local.local_composer_data_path}/rideshare-website/app.yaml"
# bucket = local.local_composer_bucket_name
# source = "../rideshare-website/app.yaml"
# }
# resource "google_storage_bucket_object" "deploy_rideshare_website_gcloudignore" {
# name = "${local.local_composer_data_path}/rideshare-website/.gcloudignore"
# bucket = local.local_composer_bucket_name
# source = "../rideshare-website/.gcloudignore"
# }
# resource "google_storage_bucket_object" "deploy_rideshare_website_www_configuration" {
# name = "${local.local_composer_data_path}/rideshare-website/www/configuration.html"
# bucket = local.local_composer_bucket_name
# content = templatefile("../rideshare-website/www/configuration.html", { project_id =var.project_id, demo_rest_api_service_uri=var.demo_rest_api_service_uri })
# }
# resource "google_storage_bucket_object" "deploy_rideshare_website_www_index" {
# name = "${local.local_composer_data_path}/rideshare-website/www/index.html"
# bucket = local.local_composer_bucket_name
# content = templatefile("../rideshare-website/www/index.html", { project_id =var.project_id, demo_rest_api_service_uri=var.demo_rest_api_service_uri,cloud_run_service_rideshare_plus_website_url=var.cloud_run_service_rideshare_plus_website_url })
# }
# resource "google_storage_bucket_object" "deploy_rideshare_website_www_predict" {
# name = "${local.local_composer_data_path}/rideshare-website/www/predict.html"
# bucket = local.local_composer_bucket_name
# content = templatefile("../rideshare-website/www/predict.html", { project_id =var.project_id, demo_rest_api_service_uri=var.demo_rest_api_service_uri })
# }
# resource "google_storage_bucket_object" "deploy_rideshare_website_www_realtime" {
# name = "${local.local_composer_data_path}/rideshare-website/www/realtime.html"
# bucket = local.local_composer_bucket_name
# content = templatefile("../rideshare-website/www/realtime.html", { project_id =var.project_id, demo_rest_api_service_uri=var.demo_rest_api_service_uri })
# }
# resource "google_storage_bucket_object" "deploy_rideshare_website_www_reports" {
# name = "${local.local_composer_data_path}/rideshare-website/www/reports.html"
# bucket = local.local_composer_bucket_name
# content = templatefile("../rideshare-website/www/reports.html", { project_id =var.project_id, demo_rest_api_service_uri=var.demo_rest_api_service_uri })
# }