ai-infrastructure/terraform-modules/gke-aiml/variables.tf (200 lines of code) (raw):

# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. variable "project_id" { description = "The GCP project ID" type = string nullable = false } variable "region" { description = "The region for the environment" type = string nullable = false } variable "deletion_protection" { description = "Prevent Terraform from destroying data storage resources (storage buckets, GKE clusters). When this field is set, a terraform destroy or terraform apply that would delete data storage resources will fail." type = bool default = true nullable = false } variable "gcs_configs" { description = "The configs for GCS buckets" type = map(object({ versioning = optional(bool, false) location = optional(string, "") storage_class = optional(string, "STANDARD") iam = optional(list(string), ["storage.legacyBucketReader"]) })) default = {} nullable = false } variable "registry_config" { description = "The configs for Artifact registry" type = object({ name = string location = string }) default = null nullable = true } variable "node_pool_sa" { description = "The config for a node pool service account. If email is set the existing service account is used. If name is a new account is created. If roles are null the default roles are used." type = object({ name = optional(string, "node-pool-sa") email = optional(string, "") roles = optional(list(string), [ "storage.objectAdmin", "logging.logWriter", "artifactregistry.reader", ]) description = optional(string, "GKE workload identity service account") }) default = {} validation { condition = !(var.node_pool_sa.email == "" && var.node_pool_sa.name == "") error_message = "Either email or name must be set." } nullable = false } variable "vpc_ref" { description = "Network configurations of an existing VPC to use for the environment. If null a new VPC based on the provided network_config will be created" type = object({ host_project = string network_self_link = string subnet_self_link = string pods_ip_range_name = string services_ip_range_name = string }) default = null } variable "vpc_config" { description = "Network configurations of a VPC to create. Must be specified if vpc_reg is null" type = object({ network_name = optional(string, "gke-cluster-network") subnet_name = optional(string, "gke-cluster-subnetwork") subnet_ip_cidr_range = optional(string, "10.129.0.0/20") pods_ip_cidr_range = optional(string, "192.168.64.0/20") services_ip_cidr_range = optional(string, "192.168.80.0/20") routing_mode = optional(string, "REGIONAL") nat_router_name = optional(string, "") }) default = {} nullable = false } variable "cluster_config" { description = "Base cluster configurations" type = object({ name = optional(string, "gke-ml-cluster") release_channel = optional(string, "REGULAR") version = optional(string, null) description = optional(string, "GKE ML inference cluster") gcs_fuse_csi_driver = optional(bool, true) gce_persistent_disk_csi_driver = optional(bool, true) workload_identity = optional(bool, true) enable_workload_logs = optional(bool, true) enable_scheduler_logs = optional(bool, true) enable_controller_manager_logs = optional(bool, true) enable_api_server_logs = optional(bool, true) }) default = {} nullable = false } variable "cpu_node_pools" { description = "Configurations for CPU node pools" type = map(object({ zones = list(string) min_node_count = number max_node_count = number machine_type = string gcfs = optional(bool, true) gvnic = optional(bool, true) disk_type = optional(string, "pd-standard") disk_size_gb = optional(string, 200) auto_repair = optional(bool, true) auto_upgrade = optional(bool, true) oauth_scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"]) taints = optional(map(object({ value = string effect = string })), {}) labels = optional(map(string), {}) })) validation { condition = alltrue([ for k, v in merge([for name, node_pool in var.cpu_node_pools : node_pool.taints]...) : contains(["NO_SCHEDULE", "PREFER_NO_SCHEDULE", "NO_EXECUTE"], v.effect) ]) error_message = "Invalid taint effect." } default = {} nullable = false } variable "tpu_node_pools" { description = "Configurations for TPU node pools" type = map(object({ zones = list(string) min_node_count = number max_node_count = number tpu_type = string disk_type = optional(string, null) disk_size_gb = optional(string, 200) gvnic = optional(bool, true) gcfs = optional(bool, true) auto_repair = optional(bool, true) auto_upgrade = optional(bool, true) spot = optional(bool, false) reservation_affinity = optional(object({ consume_reservation_type = string key = string values = list(string) }), null) oauth_scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"]) taints = optional(map(object({ value = string effect = string })), {}) labels = optional(map(string), {}) })) validation { condition = alltrue([ for k, v in merge([for name, node_pool in var.tpu_node_pools : node_pool.taints]...) : contains(["NO_SCHEDULE", "PREFER_NO_SCHEDULE", "NO_EXECUTE"], v.effect) ]) error_message = "Invalid taint effect." } default = {} nullable = false } variable "gpu_node_pools" { description = "Configurations for GPU node pools" type = map(object({ zones = list(string) min_node_count = number max_node_count = number accelerator_type = string accelerator_count = number machine_type = string local_ssd_count= optional(number,0) spot = optional(bool,false) preemptible = optional(bool,false) image_type = optional(string,"COS_CONTAINERD") gcfs = optional(bool, true) gvnic = optional(bool, true) disk_type = optional(string, "pd-ssd") disk_size_gb = optional(string, 200) auto_repair = optional(bool, true) auto_upgrade = optional(bool, true) oauth_scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"]) taints = optional(map(object({ value = string effect = string })), {}) labels = optional(map(string), {}) })) validation { condition = alltrue([ for k, v in merge([for name, node_pool in var.gpu_node_pools : node_pool.taints]...) : contains(["NO_SCHEDULE", "PREFER_NO_SCHEDULE", "NO_EXECUTE"], v.effect) ]) error_message = "Invalid taint effect." } default = {} nullable = false }