iac/modules/gke_cluster/main.tf (190 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Note: all configs in this directory have been created by using Cloud
# config-connector to export configs from existing Cloudflyer projects and
# tweaking / trimming them to get to a deployable state. So it is possible there
# are unnecessary settings here that might be hardcoding things that might as
# well be left as defaults, so it should be OK to edit these configs liberally
# and not treating them as set in stone.
locals {
oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append"
]
}
resource "google_service_account" "cluster_service_account" {
count = var.service_account_email == "" ? 1 : 0
project = var.project_id
account_id = "${var.cluster_name}-cluster-sa"
display_name = "Service account of GKE cluster."
}
# https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster
resource "google_container_cluster" "cluster" {
addons_config {
network_policy_config {
disabled = true
}
}
cluster_autoscaling {
auto_provisioning_defaults {
oauth_scopes = local.oauth_scopes
service_account = var.service_account_email == "" ? google_service_account.cluster_service_account[0].email : var.service_account_email
}
enabled = true
resource_limits {
maximum = 6400
minimum = 1
resource_type = "cpu"
}
resource_limits {
maximum = 51200
minimum = 1
resource_type = "memory"
}
}
# This is set to avoid destruction/creation in every update, without this
# terraform forces destruction of the resource when updating. (with a
# `# forces replacement` comment in `terraform apply` output)
# Blank value means use the default.
# https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#cluster_ipv4_cidr
ip_allocation_policy {
cluster_ipv4_cidr_block = ""
services_ipv4_cidr_block = ""
}
# Imitating instructions in
# https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
# in terraform.
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
default_max_pods_per_node = 110
enable_intranode_visibility = true
enable_shielded_nodes = true
location = var.zone == "" ? var.region : var.zone
master_auth {
client_certificate_config {
issue_client_certificate = false
}
}
name = "${var.cluster_name}-cluster"
network = var.network == "" ? "projects/${var.project_id}/global/networks/default" : var.network
network_policy {
enabled = false
provider = "PROVIDER_UNSPECIFIED"
}
project = var.project_id
release_channel {
channel = "REGULAR"
}
subnetwork = var.subnetwork == "" ? "projects/${var.project_id}/regions/${var.region}/subnetworks/default" : var.subnetwork
vertical_pod_autoscaling {
enabled = true
}
# Cannot create cluster without a node pool, so creating the smallest possible
# initial pool and immediately deleting it. Using separately managed pools to
# allow modifying them without having to recreate cluster.
# See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_node_pool
initial_node_count = 1
remove_default_node_pool = true
resource_labels = {
goog-packaged-solution : "medical-imaging-suite"
goog-packaged-solution-type : "digital-pathology"
}
}
resource "google_container_node_pool" "e2_pool" {
name = "e2-pool"
project = var.project_id
location = var.zone == "" ? var.region : var.zone
cluster = google_container_cluster.cluster.name
autoscaling {
min_node_count = 0
max_node_count = 1000
}
initial_node_count = 1
management {
auto_repair = true
auto_upgrade = true
}
max_pods_per_node = 110
node_config {
disk_size_gb = 100
disk_type = "pd-standard"
machine_type = "e2-medium"
metadata = { disable-legacy-endpoints = "true" }
# Imitating instructions in
# https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
# in terraform.
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = local.oauth_scopes
service_account = var.service_account_email == "" ? google_service_account.cluster_service_account[0].email : var.service_account_email
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = true
}
}
upgrade_settings {
max_surge = 1
max_unavailable = 0
}
depends_on = [
google_container_cluster.cluster,
]
}
resource "google_container_node_pool" "transformation_pool" {
name = "transformation-pool"
project = var.project_id
location = var.zone == "" ? var.region : var.zone
cluster = google_container_cluster.cluster.name
autoscaling {
min_node_count = 0
max_node_count = 5
}
initial_node_count = 1
management {
auto_repair = true
auto_upgrade = true
}
max_pods_per_node = 100
node_config {
disk_size_gb = 100
disk_type = "pd-ssd"
machine_type = var.transformation_machine_type
metadata = { disable-legacy-endpoints = "true" }
# Imitating instructions in
# https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
# in terraform.
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = local.oauth_scopes
service_account = var.service_account_email == "" ? google_service_account.cluster_service_account[0].email : var.service_account_email
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = true
}
}
upgrade_settings {
max_surge = 1
max_unavailable = 0
}
depends_on = [
google_container_cluster.cluster,
]
}
resource "google_container_node_pool" "dicom_proxy_pool" {
name = "dicom-proxy-pool"
project = var.project_id
location = var.zone == "" ? var.region : var.zone
cluster = google_container_cluster.cluster.name
autoscaling {
min_node_count = 0
max_node_count = 5
}
initial_node_count = 1
management {
auto_repair = true
auto_upgrade = true
}
max_pods_per_node = 100
node_config {
disk_size_gb = 100
disk_type = "pd-ssd"
machine_type = var.dicom_proxy_machine_type
metadata = { disable-legacy-endpoints = "true" }
# Imitating instructions in
# https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
# in terraform.
workload_metadata_config {
mode = "GKE_METADATA"
}
oauth_scopes = local.oauth_scopes
service_account = var.service_account_email == "" ? google_service_account.cluster_service_account[0].email : var.service_account_email
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = true
}
}
upgrade_settings {
max_surge = 1
max_unavailable = 0
}
depends_on = [
google_container_cluster.cluster,
]
}