benchmarks/benchmark/tools/locust-load-inference/variables.tf

/** * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ variable "credentials_config" { description = "Configure how Terraform authenticates to the cluster." type = object({ fleet_host = optional(string) kubeconfig = optional(object({ context = optional(string) path = optional(string, "~/.kube/config") })) }) nullable = false validation { condition = ( (var.credentials_config.fleet_host != null) != (var.credentials_config.kubeconfig != null) ) error_message = "Exactly one of fleet host or kubeconfig must be set." } } variable "namespace" { description = "Namespace used for model and benchmarking deployments." type = string nullable = false default = "default" } variable "project_id" { description = "Project id of existing or created project." type = string nullable = false } variable "ksa" { description = "Kubernetes Service Account used for workload." type = string nullable = false default = "default" } variable "templates_path" { description = "Path where manifest templates will be read from. Set to null to use the default manifests" type = string default = null } variable "artifact_registry" { description = "Artifact registry for storing Locust container." type = string default = null } variable "num_locust_workers" { description = "Number of locust worker pods to deploy." type = number default = 1 } variable "stop_timeout" { description = "Length of time before a locust job is stopped." type = number default = 0 } variable "inference_server_service" { description = "Inference server service" type = string nullable = false } variable "inference_server_framework" { description = "Benchmark server configuration for inference server framework. Can be one of: vllm, tgi, tensorrt_llm_triton, sax" type = string nullable = false default = "tgi" validation { condition = var.inference_server_framework == "vllm" || var.inference_server_framework == "tgi" || var.inference_server_framework == "tensorrt_llm_triton" || var.inference_server_framework == "sax" || var.inference_server_framework == "jetstream" error_message = "The inference_server_framework must be one of: vllm, tgi, tensorrt_llm_triton, sax, or jetstream." } } variable "best_of" { description = "Benchmark server configuration for best of." type = number default = 1 } variable "gcs_path" { description = "Benchmark server configuration for gcs_path for downloading prompts." type = string nullable = false } variable "max_num_prompts" { description = "Benchmark server configuration for max number of prompts." type = number default = 1000 validation { condition = var.max_num_prompts > 0 error_message = "The max_num_prompts value must be greater than 0." } } variable "max_output_len" { description = "Benchmark server configuration for max output length." type = number default = 256 validation { condition = var.max_output_len > 4 error_message = "The max_output_len value must be greater than 4. TGI framework throws an error for too short of sequences." } } variable "max_prompt_len" { description = "Benchmark server configuration for max prompt length." type = number default = 256 validation { condition = var.max_prompt_len > 4 error_message = "The max_prompt_len value must be greater than 4. TGI framework throws an error for too short of sequences." } } variable "sax_model" { description = "Benchmark server configuration for sax model. Only required if framework is sax." type = string default = "" } variable "tokenizer" { description = "Benchmark server configuration for tokenizer." type = string nullable = false default = "tiiuae/falcon-7b" } variable "use_beam_search" { description = "Benchmark server configuration for use beam search." type = bool default = false } variable "output_bucket" { description = "Bucket name for storing results" type = string } variable "locust_runner_kubernetes_service_account" { description = "Kubernetes Service Account to be used for Locust runner tool" type = string default = "sample-runner-ksa" } variable "runner_endpoint_ip" { description = "External IP assigned to Locust Runner" type = string nullable = true default = null } variable "test_duration" { description = "Duration of automated test in seconds" type = number default = 120 } variable "test_users" { description = "Users parameter for Locust" type = number default = 1 } variable "test_rate" { description = "Rate parameter for Locust" type = number default = 5 } variable "run_test_automatically" { description = "Run the test after deployment" type = bool default = false } // TODO: add validation to make k8s_hf_secret & hugging_face_secret mutually exclusive once terraform is updated with: https://discuss.hashicorp.com/t/experiment-feedback-input-variable-validation-can-cross-reference-other-objects/66644 variable "k8s_hf_secret" { description = "Name of secret for huggingface token; stored in k8s " type = string nullable = true default = null } variable "hugging_face_secret" { description = "name of the kubectl huggingface secret token; stored in Secret Manager. Security considerations: https://kubernetes.io/docs/concepts/security/secrets-good-practices/" type = string nullable = true default = null } variable "hugging_face_secret_version" { description = "Secret version in Secret Manager" type = string nullable = true default = null } variable "request_type" { description = "The method of request used when calling the model server (http or grpc)" type = string nullable = true default = "http" validation { condition = var.request_type == "http" || var.request_type == "grpc" error_message = "The request_type must be 'http' or 'grpc'." } }

benchmarks/benchmark/tools/locust-load-inference/variables.tf (202 lines of code) (raw):