infrastructure/terraform/bigquery-table.tf (203 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
resource "google_bigquery_dataset" "bus-stop-image-processing" {
dataset_id = "bus_stop_image_processing"
friendly_name = "Bus Stop Image Processing"
location = var.bigquery_dataset_location
delete_contents_on_destroy = true
}
resource "google_bigquery_connection" "image_bucket_connection" {
connection_id = "image_bucket_connection"
project = var.project_id
location = var.bigquery_dataset_location
cloud_resource {}
}
locals {
image_bucket_connection_sa = format("serviceAccount:%s", google_bigquery_connection.image_bucket_connection.cloud_resource[0].service_account_id)
dataset_id = google_bigquery_dataset.bus-stop-image-processing.dataset_id
fq_dataset_id = "${var.project_id}.${google_bigquery_dataset.bus-stop-image-processing.dataset_id}"
}
resource "time_sleep" "wait_for_propagation_of_bucket_connection_sa" {
depends_on = [google_bigquery_connection.image_bucket_connection]
create_duration = "45s"
triggers = {
connection_sa = google_bigquery_connection.image_bucket_connection.cloud_resource[0].service_account_id
}
}
resource "google_storage_bucket_iam_member" "image_bucket_connection_sa_bucket_viewer" {
bucket = google_storage_bucket.image_bucket.name
role = "roles/storage.objectViewer"
member = local.image_bucket_connection_sa
depends_on = [time_sleep.wait_for_propagation_of_bucket_connection_sa]
}
resource "google_project_iam_member" "image_bucket_connection_sa_vertex_ai_user" {
project = var.project_id
role = "roles/aiplatform.user"
member = local.image_bucket_connection_sa
depends_on = [time_sleep.wait_for_propagation_of_bucket_connection_sa]
}
resource "google_bigquery_table" "images" {
description = "Object table to access bus stop images"
deletion_protection = false
dataset_id = local.dataset_id
table_id = "images"
max_staleness = "0-0 0 0:30:0"
external_data_configuration {
connection_id = google_bigquery_connection.image_bucket_connection.id
source_uris = [format("gs://%s/images/*", google_storage_bucket.image_bucket.id)]
object_metadata = "SIMPLE"
metadata_cache_mode = "MANUAL"
autodetect = false
}
}
resource "google_bigquery_table" "image_reports" {
deletion_protection = false
dataset_id = local.dataset_id
table_id = "image_reports"
description = "Results of attribute extraction for an individual image"
clustering = ["bus_stop_id"]
schema = file("${path.module}/bigquery-schema/reports.json")
table_constraints {
primary_key {
columns = ["report_id"]
}
}
}
resource "random_id" "reports_search_job_id_suffix" {
byte_length = 4
keepers = {
table_creation_itme = google_bigquery_table.image_reports.creation_time
}
}
resource "google_bigquery_job" "reports_search_index" {
job_id = "reports_search_index_${random_id.reports_search_job_id_suffix.hex}"
depends_on = [google_bigquery_table.image_reports]
query {
query = "CREATE SEARCH INDEX IF NOT EXISTS reports_search_index ON `${local.fq_dataset_id}.${google_bigquery_table.image_reports.table_id}` (description)"
create_disposition = ""
write_disposition = ""
}
location = var.bigquery_dataset_location
}
resource "google_bigquery_table" "process_watermark" {
deletion_protection = false
dataset_id = local.dataset_id
table_id = "process_watermark"
description = "Table with a single row which contains the timestamp the data was last processed."
schema = file("${path.module}/bigquery-schema/process_watermark.json")
}
resource "random_id" "populate_process_watermark_job_id_suffix" {
byte_length = 4
keepers = {
table_creation_itme = google_bigquery_table.process_watermark.creation_time
}
}
resource "google_bigquery_job" "populate_process_watermark" {
job_id = "set_initial_process_watermark_${random_id.populate_process_watermark_job_id_suffix.hex}"
depends_on = [google_bigquery_table.process_watermark]
query {
query = "INSERT INTO ${local.dataset_id}.${google_bigquery_table.process_watermark.table_id} VALUES (TIMESTAMP('2000-01-01 00:00:00+00'))"
create_disposition = ""
write_disposition = ""
}
location = var.bigquery_dataset_location
}
resource "google_bigquery_table" "report_watermark" {
deletion_protection = false
dataset_id = local.dataset_id
table_id = "report_watermark"
description = "Table with a single row which contains the timestamp the report data was last processed."
schema = file("${path.module}/bigquery-schema/report_watermark.json")
}
resource "random_id" "populate_report_watermark_job_id_suffix" {
byte_length = 4
keepers = {
table_creation_itme = google_bigquery_table.report_watermark.creation_time
}
}
resource "google_bigquery_job" "populate_report_watermark" {
job_id = "set_initial_report_watermark_${random_id.populate_report_watermark_job_id_suffix.hex}"
depends_on = [google_bigquery_table.report_watermark]
query {
query = "INSERT INTO ${local.dataset_id}.${google_bigquery_table.report_watermark.table_id} VALUES (TIMESTAMP('2000-01-01 00:00:00+00'))"
create_disposition = ""
write_disposition = ""
}
location = var.bigquery_dataset_location
}
resource "google_bigquery_table" "incidents" {
deletion_protection = false
dataset_id = local.dataset_id
table_id = "incidents"
description = "Incidents generated based on the attributes of the processed images"
clustering = ["bus_stop_id"]
schema = file("${path.module}/bigquery-schema/incidents.json")
table_constraints {
foreign_keys {
name = "fk_incidents_open_reports"
referenced_table {
project_id = var.project_id
dataset_id = google_bigquery_dataset.bus-stop-image-processing.dataset_id
table_id = google_bigquery_table.image_reports.table_id
}
column_references {
referencing_column = "open_report_id"
referenced_column = "report_id"
}
}
foreign_keys {
name = "fk_incidents_resolve_reports"
referenced_table {
project_id = var.project_id
dataset_id = google_bigquery_dataset.bus-stop-image-processing.dataset_id
table_id = google_bigquery_table.image_reports.table_id
}
column_references {
referencing_column = "resolve_report_id"
referenced_column = "report_id"
}
}
}
}
resource "google_bigquery_table" "text_embeddings" {
deletion_protection = false
dataset_id = local.dataset_id
table_id = "text_embeddings"
description = "Embeddings generated on the image description produced by the model "
clustering = ["report_id"]
schema = file("${path.module}/bigquery-schema/text_embeddings.json")
table_constraints {
foreign_keys {
name = "fk_text_embeddings_reports"
referenced_table {
project_id = var.project_id
dataset_id = google_bigquery_dataset.bus-stop-image-processing.dataset_id
table_id = google_bigquery_table.image_reports.table_id
}
column_references {
referencing_column = "report_id"
referenced_column = "report_id"
}
}
}
}
resource "google_bigquery_table" "multimodal_embeddings" {
deletion_protection = false
dataset_id = local.dataset_id
table_id = "multimodal_embeddings"
description = "Multimodal mbeddings of images"
clustering = ["report_id"]
schema = file("${path.module}/bigquery-schema/multimodal_embeddings.json")
table_constraints {
foreign_keys {
name = "fk_multimodal_embeddings_reports"
referenced_table {
project_id = var.project_id
dataset_id = google_bigquery_dataset.bus-stop-image-processing.dataset_id
table_id = google_bigquery_table.image_reports.table_id
}
column_references {
referencing_column = "report_id"
referenced_column = "report_id"
}
}
}
}