infrastructure/terraform/bigquery-procedure.tf (94 lines of code) (raw):

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. data "local_file" "describe_image_prompt_config" { filename = "${path.module}/../../prompts/describe-image.prompt.yaml" } locals { prompt_config = yamldecode(data.local_file.describe_image_prompt_config.content) } resource "google_bigquery_routine" "clean_generate_text_json_response_function" { dataset_id = local.dataset_id routine_id = "clean_generate_text_json_response" routine_type = "SCALAR_FUNCTION" language = "SQL" definition_body = file("${path.module}/bigquery-routines/clean-generate-text-json-response.sql.tftpl") arguments { name = "input" data_type = "{\"typeKind\" : \"STRING\"}" } return_type = "{\"typeKind\" : \"STRING\"}" } resource "google_bigquery_routine" "process_images_procedure" { dataset_id = local.dataset_id routine_id = "process_images" routine_type = "PROCEDURE" language = "SQL" depends_on = [ time_sleep.wait_for_default_model_creation, time_sleep.wait_for_text_embedding_model_creation, time_sleep.wait_for_multimodal_embedding_model_creation, google_bigquery_routine.clean_generate_text_json_response_function ] definition_body = templatefile("${path.module}/bigquery-routines/process-images.sql.tftpl", { process_watermark_table = "${local.fq_dataset_id}.${google_bigquery_table.process_watermark.table_id}" images_table = "${local.fq_dataset_id}.${google_bigquery_table.images.table_id}" reports_table = "${local.fq_dataset_id}.${google_bigquery_table.image_reports.table_id}" multimodal_model = "${local.fq_dataset_id}.${local.default_model_name}" text_embeddings_table = "${local.fq_dataset_id}.${google_bigquery_table.text_embeddings.table_id}" text_embeddings_model = "${local.fq_dataset_id}.${local.text_embedding_model_name}" multimodal_embeddings_table = "${local.fq_dataset_id}.${google_bigquery_table.multimodal_embeddings.table_id}" multimodal_embedding_model = "${local.fq_dataset_id}.${local.multimodal_embedding_model_name}" multimodal_model_id = var.default_multimodal_vertex_ai_model text_embeddings_model_id = var.text_embeddings_vertex_ai_model multimodal_embeddings_model_id = var.multimodal_embeddings_vertex_ai_model prompt = local.prompt_config.prompt temperature = local.prompt_config.temperature max_output_tokens = local.prompt_config.max_output_tokens clean_generate_text_json_function = "${local.fq_dataset_id}.${google_bigquery_routine.clean_generate_text_json_response_function.routine_id}" }) } resource "google_bigquery_routine" "semantic_search_text_embeddings_tvf" { dataset_id = local.dataset_id routine_id = "semantic_search_text_embeddings" routine_type = "TABLE_VALUED_FUNCTION" language = "SQL" depends_on = [time_sleep.wait_for_text_embedding_model_creation] definition_body = templatefile("${path.module}/bigquery-routines/semantic-search-text-embeddings.sql.tftpl", { text_embeddings_table = "${local.fq_dataset_id}.${google_bigquery_table.text_embeddings.table_id}" text_embedding_model = "${local.fq_dataset_id}.${local.text_embedding_model_name}" reports_table = "${local.fq_dataset_id}.${google_bigquery_table.image_reports.table_id}" max_number_of_results = 10 }) arguments { name = "search_terms" argument_kind = "FIXED_TYPE" data_type = jsonencode({ "typeKind" : "STRING" }) } } resource "google_bigquery_routine" "semantic_search_multimodal_embeddings_tvf" { dataset_id = local.dataset_id routine_id = "semantic_search_multimodal_embeddings" routine_type = "TABLE_VALUED_FUNCTION" language = "SQL" depends_on = [time_sleep.wait_for_multimodal_embedding_model_creation] definition_body = templatefile("${path.module}/bigquery-routines/semantic-search-multimodal-embeddings.sql.tftpl", { multimodal_embeddings_table = "${local.fq_dataset_id}.${google_bigquery_table.multimodal_embeddings.table_id}" multimodal_embedding_model = "${local.fq_dataset_id}.${local.multimodal_embedding_model_name}" reports_table = "${local.fq_dataset_id}.${google_bigquery_table.image_reports.table_id}" max_number_of_results = 10 }) arguments { name = "search_terms" argument_kind = "FIXED_TYPE" data_type = jsonencode({ "typeKind" : "STRING" }) } } resource "google_bigquery_routine" "update_incidents_procedure" { dataset_id = local.dataset_id routine_id = "update_incidents" routine_type = "PROCEDURE" language = "SQL" definition_body = templatefile("${path.module}/bigquery-routines/update-incidents-procedure.sql.tftpl", { report_watermark_table = "${local.fq_dataset_id}.${google_bigquery_table.report_watermark.table_id}" incidents_table = "${local.fq_dataset_id}.${google_bigquery_table.incidents.table_id}" reports_table = "${local.fq_dataset_id}.${google_bigquery_table.image_reports.table_id}" }) }