community/modules/internal/slurm-gcp/instance_template/variables.tf (335 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###########
# GENERAL #
###########
variable "project_id" {
type = string
description = "Project ID to create resources in."
}
variable "on_host_maintenance" {
type = string
description = "Instance availability Policy"
default = "MIGRATE"
}
variable "labels" {
type = map(string)
description = "Labels, provided as a map"
default = {}
}
variable "enable_oslogin" {
type = bool
description = <<EOD
Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin
EOD
default = true
}
###########
# NETWORK #
###########
variable "subnetwork_project" {
type = string
description = "The ID of the project in which the subnetwork belongs. If it is not provided, the provider project is used."
default = null
}
variable "network" {
type = string
description = <<EOD
The name or self_link of the network to attach this interface to. Use network
attribute for Legacy or Auto subnetted networks and subnetwork for custom
subnetted networks.
EOD
default = null
}
variable "subnetwork" {
type = string
description = <<EOD
The name of the subnetwork to attach this interface to. The subnetwork must
exist in the same region this instance will be created in. Either network or
subnetwork must be provided.
EOD
default = null
}
variable "region" {
type = string
description = "Region where the instance template should be created."
default = null
}
variable "tags" {
type = list(string)
description = "Network tag list."
default = []
}
variable "can_ip_forward" {
type = bool
description = "Enable IP forwarding, for NAT instances for example."
default = false
}
variable "network_ip" {
type = string
description = "Private IP address to assign to the instance if desired."
default = ""
}
variable "name_prefix" {
type = string
description = "Prefix for template resource."
default = "default"
}
variable "bandwidth_tier" {
description = <<-EOD
Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `virtio_enabled` setting will only enable VirtioNet and will not enable TIER_1.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER_1.
Note that TIER_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details.
EOD
type = string
default = "platform_default"
validation {
condition = contains(["platform_default", "virtio_enabled", "gvnic_enabled", "tier_1_enabled"], var.bandwidth_tier)
error_message = "Allowed values for bandwidth_tier are 'platform_default', 'virtio_enabled', 'gvnic_enabled', or 'tier_1_enabled'."
}
}
variable "additional_networks" {
description = "Additional network interface details for GCE, if any."
default = []
type = list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
}))
}
variable "access_config" {
description = "Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet."
type = list(object({
nat_ip = string
network_tier = string
}))
default = []
}
############
# INSTANCE #
############
variable "machine_type" {
type = string
description = "Machine type to create."
default = "n1-standard-1"
}
variable "min_cpu_platform" {
type = string
description = <<EOD
Specifies a minimum CPU platform. Applicable values are the friendly names of
CPU platforms, such as Intel Haswell or Intel Skylake. See the complete list:
https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform
EOD
default = null
}
variable "gpu" {
type = object({
type = string
count = number
})
description = <<EOD
GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type
- count : number of GPUs
EOD
default = null
}
variable "service_account" {
type = object({
email = string
scopes = set(string)
})
description = <<EOD
Service account to attach to the instances. See
'main.tf:local.service_account' for the default.
EOD
default = null
}
variable "shielded_instance_config" {
type = object({
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
description = <<EOD
Shielded VM configuration for the instance. Note: not used unless
enable_shielded_vm is 'true'.
- enable_integrity_monitoring : Compare the most recent boot measurements to the
integrity policy baseline and return a pair of pass/fail results depending on
whether they match or not.
- enable_secure_boot : Verify the digital signature of all boot components, and
halt the boot process if signature verification fails.
- enable_vtpm : Use a virtualized trusted platform module, which is a
specialized computer chip you can use to encrypt objects like keys and
certificates.
EOD
default = {
enable_integrity_monitoring = true
enable_secure_boot = true
enable_vtpm = true
}
}
variable "enable_confidential_vm" {
type = bool
description = "Enable the Confidential VM configuration. Note: the instance image must support option."
default = false
}
variable "enable_shielded_vm" {
type = bool
description = "Enable the Shielded VM configuration. Note: the instance image must support option."
default = false
}
variable "preemptible" {
type = bool
description = "Allow the instance to be preempted."
default = false
}
variable "spot" {
description = <<-EOD
Provision as a SPOT preemptible instance.
See https://cloud.google.com/compute/docs/instances/spot for more details.
EOD
type = bool
default = false
}
variable "termination_action" {
description = <<-EOD
Which action to take when Compute Engine preempts the VM. Value can be: 'STOP', 'DELETE'. The default value is 'STOP'.
See https://cloud.google.com/compute/docs/instances/spot for more details.
EOD
type = string
default = null
validation {
condition = var.termination_action == null ? true : contains(["STOP", "DELETE"], var.termination_action)
error_message = "Allowed values are: 'STOP', 'DELETE'."
}
}
variable "resource_manager_tags" {
description = "(Optional) A set of key/value resource manager tag pairs to bind to the instances. Keys must be in the format tagKeys/{tag_key_id}, and values are in the format tagValues/456."
type = map(string)
default = {}
validation {
condition = alltrue([for value in var.resource_manager_tags : can(regex("tagValues/[0-9]+", value))])
error_message = "All Resource Manager tag values should be in the format 'tagValues/[0-9]+'"
}
validation {
condition = alltrue([for value in keys(var.resource_manager_tags) : can(regex("tagKeys/[0-9]+", value))])
error_message = "All Resource Manager tag keys should be in the format 'tagKeys/[0-9]+'"
}
}
############
# METADATA #
############
variable "metadata" {
type = map(string)
description = "Metadata, provided as a map."
default = {}
}
################
# SOURCE IMAGE #
################
variable "source_image_project" {
type = string
description = "Project where the source image comes from. If it is not provided, the provider project is used."
default = ""
}
variable "source_image_family" {
type = string
description = "Source image family."
default = ""
}
variable "source_image" {
type = string
description = "Source disk image."
default = ""
}
########
# DISK #
########
variable "disk_type" {
type = string
description = "Boot disk type, can be either pd-ssd, local-ssd, or pd-standard."
default = "pd-standard"
}
variable "disk_size_gb" {
type = number
description = "Boot disk size in GB."
default = 100
}
variable "disk_labels" {
type = map(string)
description = "Labels to be assigned to boot disk, provided as a map."
default = {}
}
variable "disk_auto_delete" {
type = bool
description = "Whether or not the boot disk should be auto-deleted."
default = true
}
variable "disk_resource_manager_tags" {
description = "(Optional) A set of key/value resource manager tag pairs to bind to the instance disks. Keys must be in the format tagKeys/{tag_key_id}, and values are in the format tagValues/456."
type = map(string)
default = {}
validation {
condition = alltrue([for value in var.disk_resource_manager_tags : can(regex("tagValues/[0-9]+", value))])
error_message = "All Resource Manager tag values should be in the format 'tagValues/[0-9]+'"
}
validation {
condition = alltrue([for value in keys(var.disk_resource_manager_tags) : can(regex("tagKeys/[0-9]+", value))])
error_message = "All Resource Manager tag keys should be in the format 'tagKeys/[0-9]+'"
}
}
variable "additional_disks" {
type = list(object({
source = optional(string)
disk_name = optional(string)
device_name = string
disk_type = optional(string)
disk_size_gb = optional(number)
disk_labels = map(string)
auto_delete = bool
boot = bool
disk_resource_manager_tags = optional(map(string))
}))
description = "List of maps of disks."
default = []
}
#########
# SLURM #
#########
variable "slurm_instance_role" {
type = string
description = "Slurm instance type. Must be one of: controller; login; compute; or null."
validation {
condition = contains(["controller", "login", "compute"], var.slurm_instance_role)
error_message = "Must be one of: controller; login; compute."
}
}
variable "slurm_cluster_name" {
type = string
description = "Cluster name, used for resource naming."
validation {
condition = can(regex("^[a-z](?:[a-z0-9]{0,9})$", var.slurm_cluster_name))
error_message = "Variable 'slurm_cluster_name' must be a match of regex '^[a-z](?:[a-z0-9]{0,9})$'."
}
}
variable "advanced_machine_features" {
description = "See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_template#nested_advanced_machine_features"
type = object({
enable_nested_virtualization = optional(bool)
threads_per_core = optional(number)
turbo_mode = optional(string)
visible_core_count = optional(number)
performance_monitoring_unit = optional(string)
enable_uefi_networking = optional(bool)
})
}
variable "slurm_bucket_path" {
description = "GCS Bucket URI of Slurm cluster file storage."
type = string
}
variable "max_run_duration" {
description = "The duration (in whole seconds) of the instance. Instance will run and be terminated after then."
type = number
default = null
}
variable "provisioning_model" {
description = "The provisioning model of the instance"
type = string
default = null
}
variable "reservation_affinity" {
description = "Specifies the reservations that this instance can consume from."
type = object({ type = string })
default = null
}