community/modules/compute/htcondor-execute-point/main.tf (184 lines of code) (raw):
/**
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
locals {
# This label allows for billing report tracking based on module.
labels = merge(var.labels, { ghpc_module = "htcondor-execute-point", ghpc_role = "compute" })
}
module "gpu" {
source = "../../../../modules/internal/gpu-definition"
machine_type = var.machine_type
guest_accelerator = var.guest_accelerator
}
locals {
guest_accelerator = module.gpu.guest_accelerator
zones = coalescelist(var.zones, data.google_compute_zones.available.names)
network_storage_metadata = var.network_storage == null ? {} : { network_storage = jsonencode(var.network_storage) }
oslogin_api_values = {
"DISABLE" = "FALSE"
"ENABLE" = "TRUE"
}
enable_oslogin = var.enable_oslogin == "INHERIT" ? {} : { enable-oslogin = lookup(local.oslogin_api_values, var.enable_oslogin, "") }
windows_startup_ps1 = join("\n\n", flatten([var.windows_startup_ps1, local.execute_config_windows_startup_ps1]))
is_windows_image = anytrue([for l in data.google_compute_image.compute_image.licenses : length(regexall("windows-cloud", l)) > 0])
windows_startup_metadata = local.is_windows_image && local.windows_startup_ps1 != "" ? {
windows-startup-script-ps1 = local.windows_startup_ps1
} : {}
disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" }
metadata = merge(
local.windows_startup_metadata,
local.network_storage_metadata,
local.enable_oslogin,
local.disable_automatic_updates_metadata,
var.metadata
)
autoscaler_runner = {
"type" = "ansible-local"
"content" = file("${path.module}/files/htcondor_configure_autoscaler.yml")
"destination" = "htcondor_configure_autoscaler_${module.mig.instance_group_manager.name}.yml"
"args" = join(" ", [
"-e project_id=${var.project_id}",
"-e region=${var.region}",
"-e zone=${local.zones[0]}", # this value is required, but ignored by regional MIG autoscaler
"-e mig_id=${module.mig.instance_group_manager.name}",
"-e max_size=${var.max_size}",
"-e min_idle=${var.min_idle}",
])
}
execute_config = templatefile("${path.module}/templates/condor_config.tftpl", {
htcondor_role = "get_htcondor_execute",
central_manager_ips = var.central_manager_ips,
guest_accelerator = local.guest_accelerator,
})
execute_object = "gs://${var.htcondor_bucket_name}/${google_storage_bucket_object.execute_config.output_name}"
execute_runner = {
type = "ansible-local"
content = file("${path.module}/files/htcondor_configure.yml")
destination = "htcondor_configure.yml"
args = join(" ", [
"-e htcondor_role=get_htcondor_execute",
"-e config_object=${local.execute_object}",
])
}
native_fstype = []
startup_script_network_storage = [
for ns in var.network_storage :
ns if !contains(local.native_fstype, ns.fs_type)
]
storage_client_install_runners = [
for ns in local.startup_script_network_storage :
ns.client_install_runner if ns.client_install_runner != null
]
mount_runners = [
for ns in local.startup_script_network_storage :
ns.mount_runner if ns.mount_runner != null
]
all_runners = concat(
local.storage_client_install_runners,
local.mount_runners,
var.execute_point_runner,
[local.execute_runner],
)
execute_config_windows_startup_ps1 = templatefile(
"${path.module}/templates/download-condor-config.ps1.tftpl",
{
config_object = local.execute_object,
}
)
name_prefix = "${var.deployment_name}-${var.name_prefix}-ep"
}
data "google_compute_zones" "available" {
project = var.project_id
region = var.region
}
resource "null_resource" "execute_config" {
triggers = {
config = local.execute_config
}
}
resource "google_storage_bucket_object" "execute_config" {
name = "${local.name_prefix}-config-${substr(md5(null_resource.execute_config.id), 0, 4)}"
content = local.execute_config
bucket = var.htcondor_bucket_name
}
module "startup_script" {
source = "../../../../modules/scripts/startup-script"
project_id = var.project_id
region = var.region
labels = local.labels
deployment_name = var.deployment_name
runners = local.all_runners
}
module "execute_point_instance_template" {
source = "terraform-google-modules/vm/google//modules/instance_template"
version = "~> 12.1"
name_prefix = local.name_prefix
project_id = var.project_id
network = var.network_self_link
subnetwork = var.subnetwork_self_link
service_account = {
email = var.execute_point_service_account_email
scopes = var.service_account_scopes
}
labels = local.labels
machine_type = var.machine_type
disk_size_gb = var.disk_size_gb
disk_type = var.disk_type
gpu = one(local.guest_accelerator)
preemptible = var.spot
startup_script = local.is_windows_image ? null : module.startup_script.startup_script
metadata = local.metadata
source_image = data.google_compute_image.compute_image.self_link
# secure boot
enable_shielded_vm = var.enable_shielded_vm
shielded_instance_config = var.shielded_instance_config
}
module "mig" {
source = "terraform-google-modules/vm/google//modules/mig"
version = "~> 12.1"
project_id = var.project_id
region = var.region
distribution_policy_target_shape = var.distribution_policy_target_shape
distribution_policy_zones = local.zones
target_size = var.target_size
hostname = local.name_prefix
mig_name = local.name_prefix
instance_template = module.execute_point_instance_template.self_link
health_check_name = "health-htcondor-${local.name_prefix}"
health_check = {
type = "tcp"
initial_delay_sec = 600
check_interval_sec = 20
healthy_threshold = 2
timeout_sec = 8
unhealthy_threshold = 3
response = ""
proxy_header = "NONE"
port = 9618
request = ""
request_path = ""
host = ""
enable_logging = true
}
update_policy = [{
instance_redistribution_type = "NONE"
replacement_method = "SUBSTITUTE"
max_surge_fixed = length(local.zones)
max_unavailable_fixed = length(local.zones)
max_surge_percent = null
max_unavailable_percent = null
min_ready_sec = 300
minimal_action = "REPLACE"
type = var.update_policy
}]
}