benchmarks/benchmark/tools/locust-load-inference/main.tf (71 lines of code) (raw):

/** * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ locals { locust_templates = [ for f in fileset(local.locust_templates_path, "*tpl") : "${local.locust_templates_path}/${f}" ] locust_templates_path = ( var.templates_path == null ? "${path.module}/manifest-templates" : pathexpand(var.templates_path) ) hugging_face_token_secret = ( var.hugging_face_secret == null || var.hugging_face_secret_version == null ? null : "${var.hugging_face_secret}/versions/${var.hugging_face_secret_version}" ) all_locust_manifests = flatten([for manifest_file in local.locust_templates : [for data in split("---", templatefile(manifest_file, { artifact_registry = var.artifact_registry namespace = var.namespace inference_server_service = var.inference_server_service inference_server_framework = var.inference_server_framework best_of = var.best_of gcs_path = var.gcs_path ksa = var.ksa max_num_prompts = var.max_num_prompts max_output_len = var.max_output_len max_prompt_len = var.max_prompt_len num_locust_workers = var.num_locust_workers sax_model = var.sax_model tokenizer = var.tokenizer use_beam_search = var.use_beam_search hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret] k8s_hf_secret_list = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret] stop_timeout = var.stop_timeout request_type = var.request_type bucket = var.output_bucket })) : data] ]) } resource "google_project_service" "cloudbuild" { project = var.project_id service = "cloudbuild.googleapis.com" timeouts { create = "30m" update = "40m" } disable_on_destroy = false } resource "kubernetes_manifest" "default" { for_each = toset(local.all_locust_manifests) depends_on = [resource.null_resource.build_and_push_image] manifest = yamldecode(each.value) timeouts { create = "30m" } }