google_gke/cluster.tf (252 lines of code) (raw):
# TBD block of things to investigate further in the future:
# * confidential nodes (basically encrypted in-use memory on VMs underlying nodes; in beta)
# * GKE sandbox (basically host kernel protection on nodes; in beta)
# * cluster telemetry (some kinda new monitoring / logging / metrics aggregation & dashboard for gke clusters; in beta)
# * enable_binary_authorization (all container images validated by Google Binary Authorization; needs further impact investigation)
# * enable_l4_ilb_subsetting (needs further impact investigation)
# * shielded_instance_config.enable_secure_boot & shielded_instance_config.enable_integrity_monitoring (needs further impact investigation)
# * database_encryption to be added with CloudKMS key (postponed for adding CloudKMS keys structure to Terraform or secrets management)
#
# GKE Cluster
#
resource "google_container_cluster" "primary" {
provider = google-beta
name = local.cluster_name
description = var.description
location = var.region
project = local.project_id
resource_labels = local.labels
cluster_autoscaling {
autoscaling_profile = var.autoscaling_profile
}
release_channel {
channel = var.release_channel
}
workload_identity_config {
workload_pool = "${local.project_id}.svc.id.goog"
}
# Internal Networking: Defaulting to IPTables & KubeProxy over DataPlane, eBPF & Cilium
datapath_provider = local.datapath_provider
dynamic "network_policy" {
for_each = local.datapath_provider == "ADVANCED_DATAPATH" ? [] : [1]
content {
enabled = true
provider = "CALICO"
}
}
dynamic "cost_management_config" {
for_each = var.enable_cost_allocation ? [1] : []
content {
enabled = var.enable_cost_allocation
}
}
dynamic "vertical_pod_autoscaling" {
for_each = var.enable_vertical_pod_autoscaling ? [1] : []
content {
enabled = var.enable_vertical_pod_autoscaling
}
}
default_snat_status {
disabled = var.disable_snat_status
}
# Networking: Defaulting to Shared VPC Setup
network = local.network
subnetwork = local.subnetwork
networking_mode = "VPC_NATIVE"
ip_allocation_policy {
cluster_secondary_range_name = local.pods_ip_cidr_range_name
services_secondary_range_name = local.services_ip_cidr_range_name
}
master_auth {
client_certificate_config {
issue_client_certificate = false
}
}
master_authorized_networks_config {
dynamic "cidr_blocks" {
for_each = var.master_authorized_networks
content {
cidr_block = lookup(cidr_blocks.value, "cidr_block", "")
display_name = lookup(cidr_blocks.value, "display_name", "")
}
}
gcp_public_cidrs_access_enabled = var.enable_public_cidrs_access
}
control_plane_endpoints_config {
dns_endpoint_config {
allow_external_traffic = var.enable_dns_endpoint
}
}
dynamic "private_cluster_config" {
for_each = var.enable_private_cluster ? [1] : []
content {
enable_private_endpoint = var.enable_private_cluster
enable_private_nodes = var.enable_private_cluster
master_ipv4_cidr_block = local.master_ipv4_cidr_block
master_global_access_config {
enabled = var.enable_private_cluster
}
}
}
# Observability
logging_config {
enable_components = [
"APISERVER",
"CONTROLLER_MANAGER",
"SCHEDULER",
"SYSTEM_COMPONENTS",
"WORKLOADS"
]
}
monitoring_config {
enable_components = var.monitoring_config_enable_components
dynamic "managed_prometheus" {
for_each = var.monitoring_enable_managed_prometheus ? [1] : []
content {
enabled = var.monitoring_enable_managed_prometheus
}
}
}
dynamic "resource_usage_export_config" {
for_each = local.resource_usage_export_dataset_id != null ? [{
dataset_id = local.resource_usage_export_dataset_id
enable_network_egress_metering = var.enable_network_egress_export
enable_resource_consumption_metering = var.enable_resource_consumption_export
}] : []
content {
enable_network_egress_metering = resource_usage_export_config.value.enable_network_egress_metering
enable_resource_consumption_metering = resource_usage_export_config.value.enable_resource_consumption_metering
bigquery_destination {
dataset_id = resource_usage_export_config.value.dataset_id
}
}
}
# Add-Ons
addons_config {
gce_persistent_disk_csi_driver_config {
enabled = true
}
network_policy_config {
disabled = local.datapath_provider == "ADVANCED_DATAPATH" ? true : false
}
gcp_filestore_csi_driver_config {
enabled = var.filestore_csi_driver
}
gcs_fuse_csi_driver_config {
enabled = var.fuse_csi_driver
}
dns_cache_config {
enabled = var.dns_cache
}
}
# Gateway-api
dynamic "gateway_api_config" {
for_each = var.gateway_api_enabled ? [1] : []
content {
channel = "CHANNEL_STANDARD"
}
}
# Google Groups for RBAC
dynamic "authenticator_groups_config" {
for_each = local.cluster_authenticator_security_group
content {
security_group = authenticator_groups_config.value.security_group
}
}
# Maintenance
maintenance_policy {
daily_maintenance_window {
start_time = var.maintenance_start_time
}
dynamic "maintenance_exclusion" {
for_each = var.maintenance_exclusions
content {
exclusion_name = maintenance_exclusion.value.name
start_time = maintenance_exclusion.value.start_time
end_time = maintenance_exclusion.value.end_time
}
}
}
# Configurations for Cluster's Default Nodepool / Nodes Defaults
# We fully expect the default node pool to be immediately removed.
default_max_pods_per_node = 32
remove_default_node_pool = true
node_pool {
name = "default-pool"
initial_node_count = 1
node_config {
labels = local.labels
tags = local.tags
}
}
dynamic "node_pool_defaults" {
for_each = var.enable_gcfs ? [1] : []
content {
# TODO: If we end up needing to configure more parts of `node_pool_defaults`
# we will need to make this more dynamic
node_config_defaults {
gcfs_config {
enabled = true
}
}
}
}
lifecycle {
ignore_changes = [
initial_node_count,
node_pool,
resource_labels["asmv"],
resource_labels["mesh_id"]
]
prevent_destroy = true
}
}
#
# GKE Node Pools as configured via Variables
#
resource "google_container_node_pool" "pools" {
for_each = local.node_pools
provider = google-beta
name = tobool(each.value.use_name_prefix) == false ? each.key : null
name_prefix = tobool(each.value.use_name_prefix) == true ? format("%s-", each.key) : null
cluster = google_container_cluster.primary.name
initial_node_count = each.value.initial_node_count
location = var.region
max_pods_per_node = each.value.max_pods_per_node
project = local.project_id
node_locations = lookup(each.value, "node_locations", null) != null ? jsondecode(each.value.node_locations) : null
autoscaling {
min_node_count = each.value.min_count
max_node_count = each.value.max_count
}
dynamic "network_config" {
for_each = try({ (each.value.pod_range) = { enable_private_nodes = each.value.enable_private_nodes } }, {})
content {
create_pod_range = false
pod_range = network_config.key
enable_private_nodes = network_config.value.enable_private_nodes
}
}
node_config {
disk_size_gb = each.value.disk_size_gb
disk_type = each.value.disk_type
image_type = "COS_CONTAINERD"
labels = local.node_pools_labels[each.key]
dynamic "guest_accelerator" {
for_each = length(local.node_pools_guest_accelerator[each.key]) != 0 ? [1] : []
content {
type = local.node_pools_guest_accelerator[each.key].type
count = local.node_pools_guest_accelerator[each.key].count
}
}
dynamic "linux_node_config" {
for_each = length(local.node_pools_sysctls[each.key]) != 0 ? [1] : []
content {
sysctls = local.node_pools_sysctls[each.key]
}
}
spot = local.node_pools_spot_enabled[each.key]
machine_type = each.value.machine_type
oauth_scopes = local.node_pools_oauth_scopes[each.key]
service_account = google_service_account.cluster_service_account.email
tags = local.node_pools_tags[each.key]
workload_metadata_config {
mode = "GKE_METADATA"
}
dynamic "taint" {
for_each = local.node_pools_taints[each.key]
content {
key = taint.value.key
value = taint.value.value
effect = taint.value.effect
}
}
}
upgrade_settings {
max_surge = each.value.max_surge
max_unavailable = each.value.max_unavailable
}
lifecycle {
create_before_destroy = true
ignore_changes = [
initial_node_count,
node_config[0].oauth_scopes,
node_config[0].metadata,
]
}
}