tf/scheduler.tf (117 lines of code) (raw):

resource "azurerm_network_interface" "scheduler-nic" { name = "${local.scheduler_name}-nic" location = local.create_rg ? azurerm_resource_group.rg[0].location : data.azurerm_resource_group.rg[0].location resource_group_name = local.create_rg ? azurerm_resource_group.rg[0].name : data.azurerm_resource_group.rg[0].name ip_configuration { name = "internal" subnet_id = local.create_admin_subnet ? azurerm_subnet.admin[0].id : data.azurerm_subnet.admin[0].id private_ip_address_allocation = "Dynamic" } } resource "azurerm_linux_virtual_machine" "scheduler" { name = local.scheduler_name location = local.create_rg ? azurerm_resource_group.rg[0].location : data.azurerm_resource_group.rg[0].location resource_group_name = local.create_rg ? azurerm_resource_group.rg[0].name : data.azurerm_resource_group.rg[0].name size = try(local.configuration_yml["scheduler"].vm_size, "Standard_D2s_v3") admin_username = local.admin_username network_interface_ids = [ azurerm_network_interface.scheduler-nic.id, ] identity { type = "SystemAssigned" } admin_ssh_key { username = local.admin_username public_key = tls_private_key.internal.public_key_openssh #file("~/.ssh/id_rsa.pub") } os_disk { caching = "ReadWrite" storage_account_type = "Standard_LRS" } dynamic "source_image_reference" { for_each = local.use_linux_image_id ? [] : [1] content { publisher = local.linux_base_image_reference.publisher offer = local.linux_base_image_reference.offer sku = local.linux_base_image_reference.sku version = local.linux_base_image_reference.version } } source_image_id = local.linux_image_id dynamic "plan" { for_each = try (length(local.linux_image_plan.name) > 0, false) ? [1] : [] content { name = local.linux_image_plan.name publisher = local.linux_image_plan.publisher product = local.linux_image_plan.product } } lifecycle { ignore_changes = [ tags ] } } resource "azurerm_network_interface_application_security_group_association" "scheduler-asg-asso" { for_each = toset(local.asg_associations["scheduler"]) network_interface_id = azurerm_network_interface.scheduler-nic.id application_security_group_id = local.create_nsg ? azurerm_application_security_group.asg[each.key].id : data.azurerm_application_security_group.asg[each.key].id } resource "azurerm_virtual_machine_extension" "AzureMonitorLinuxAgent_sched" { count = local.ama_install ? 1 : 0 name = "AzureMonitorLinuxAgent" virtual_machine_id = azurerm_linux_virtual_machine.scheduler.id publisher = "Microsoft.Azure.Monitor" type = "AzureMonitorLinuxAgent" type_handler_version = "1.0" auto_upgrade_minor_version = true } resource "azurerm_monitor_data_collection_rule_association" "dcra_sched_metrics" { count = local.monitor ? 1 : 0 name = "sched-data-collection-ra" target_resource_id = azurerm_linux_virtual_machine.scheduler.id data_collection_rule_id = azurerm_monitor_data_collection_rule.vm_data_collection_rule[0].id description = "Scheduler Data Collection Rule Association for VM Metrics" } resource "azurerm_monitor_data_collection_rule_association" "dcra_sched_insights" { count = local.monitor ? 1 : 0 name = "sched-insights-collection-ra" target_resource_id = azurerm_linux_virtual_machine.scheduler.id data_collection_rule_id = azurerm_monitor_data_collection_rule.vm_insights_collection_rule[0].id description = "Scheduler Data Collection Rule Association for VM Insights" } resource "azurerm_monitor_scheduled_query_rules_alert_v2" "sched_volume_alert" { count = local.create_alerts ? 1 : 0 name = "sched-volume-alert" location = local.create_rg ? azurerm_resource_group.rg[0].location : data.azurerm_resource_group.rg[0].location resource_group_name = local.create_rg ? azurerm_resource_group.rg[0].name : data.azurerm_resource_group.rg[0].name evaluation_frequency = "PT5M" window_duration = "PT5M" scopes = [azurerm_linux_virtual_machine.scheduler.id] severity = 3 criteria { query = <<-QUERY let mountpoints = dynamic(${local.mountpoints_str}); InsightsMetrics | where TimeGenerated >= ago(5min) and Name == "FreeSpacePercentage" and Val <= ${local.local_vol_threshold} and not(Tags has_any (mountpoints) ) | project TimeGenerated, Computer, Name, Val, Tags, _ResourceId | summarize arg_max(TimeGenerated, *) by Tags | project Tags, Name, Val, Computer, _ResourceId QUERY time_aggregation_method = "Count" operator = "GreaterThan" threshold = 0 failing_periods { minimum_failing_periods_to_trigger_alert = 1 number_of_evaluation_periods = 1 } } auto_mitigation_enabled = true description = "Alert when the volumes of the scheduler VM is above ${100 - local.local_vol_threshold}%" display_name = "scheduler volumes full" enabled = true query_time_range_override = "P2D" action { action_groups = [azurerm_monitor_action_group.azhop_action_group[0].id] } }