terraform-provision-infra/modules/nonagones/k8s-res/main.tf (184 lines of code) (raw):
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.63.1"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "2.20.0"
}
http = {
source = "hashicorp/http"
version = "3.3.0"
}
}
}
data "google_client_config" "default" {}
data "google_container_cluster" "my_cluster" {
name = var.gke_cluster_name
location = var.gke_cluster_location
project = var.project_id
}
provider "kubernetes" {
host = "https://${data.google_container_cluster.my_cluster.endpoint}"
token = data.google_client_config.default.access_token
cluster_ca_certificate = base64decode(
data.google_container_cluster.my_cluster.master_auth[0].cluster_ca_certificate,
)
experiments {
manifest_resource = true
}
}
resource "kubernetes_storage_class" "nfs" {
metadata {
name = "filestore"
}
reclaim_policy = "Retain"
storage_provisioner = "nfs"
}
resource "kubernetes_persistent_volume_v1" "nfs_pv" {
metadata {
name = "filestore-nfs-pv"
}
spec {
capacity = {
storage = "1Ti"
}
storage_class_name = kubernetes_storage_class.nfs.metadata[0].name
access_modes = ["ReadWriteMany"]
persistent_volume_source {
nfs {
path = "/vol1"
server = var.google_filestore_reserved_ip_range
}
}
}
}
resource "kubernetes_persistent_volume_claim_v1" "nfs_pvc" {
metadata {
name = "vol1"
}
spec {
access_modes = ["ReadWriteMany"]
storage_class_name = kubernetes_storage_class.nfs.metadata[0].name
volume_name = kubernetes_persistent_volume_v1.nfs_pv.metadata.0.name
resources {
requests = {
storage = "1Ti"
}
}
}
}
data "http" "gpu_driver_file" {
url = "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded-latest.yaml"
}
resource "kubernetes_manifest" "gpu_driver" {
manifest = yamldecode(data.http.gpu_driver_file.response_body)
}
resource "kubernetes_manifest" "webui_deployment" {
manifest = yamldecode(<<-EOF
apiVersion: apps/v1
kind: Deployment
metadata:
name: stable-diffusion-sd15-deployment
namespace: default
labels:
app: stable-diffusion-sd15
spec:
replicas: 1
selector:
matchLabels:
app: stable-diffusion-sd15
template:
metadata:
labels:
app: stable-diffusion-sd15
spec:
nodeSelector:
cloud.google.com/gke-nodepool: "${var.gke_cluster_nodepool}"
volumes:
- name: stable-diffusion-storage
persistentVolumeClaim:
claimName: vol1
containers:
- name: stable-diffusion-webui
image: "${var.webui_image_url}"
resources:
requests:
cpu: 5
memory: 24Gi
nvidia.com/gpu: 1
limits:
cpu: 5
memory: 24Gi
nvidia.com/gpu: 1
ports:
- containerPort: 7860
volumeMounts:
- mountPath: "/stable-diffusion-webui/models/Stable-diffusion"
name: stable-diffusion-storage
subPath: models/Stable-diffusion/sd15
- mountPath: "/stable-diffusion-webui/outputs"
name: stable-diffusion-storage
subPath: outputs
EOF
)
}
resource "kubernetes_manifest" "webui_backend_config" {
manifest = yamldecode(<<-EOF
apiVersion: cloud.google.com/v1
kind: BackendConfig
metadata:
name: sd-webui-backendconfig
namespace: default
spec:
sessionAffinity:
affinityType: "GENERATED_COOKIE"
affinityCookieTtlSec: 1000
EOF
)
}
resource "kubernetes_manifest" "webui_svc" {
manifest = yamldecode(<<-EOF
apiVersion: v1
kind: Service
metadata:
name: sd-webui
namespace: default
annotations:
cloud.google.com/backend-config: '{"ports": {"80":"sd-webui-backendconfig"}}'
spec:
ports:
- port: 80
protocol: TCP
targetPort: 7860
selector:
app: stable-diffusion-sd15
type: ClusterIP
EOF
)
}
resource "kubernetes_manifest" "webui_ingress" {
manifest = yamldecode(<<-EOF
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: sd-webui
namespace: default
spec:
defaultBackend:
service:
name: sd-webui
port:
number: 80
EOF
)
}
data "kubernetes_ingress_v1" "sd_webui" {
metadata {
name = "sd-webui"
namespace = "default"
}
depends_on = [kubernetes_manifest.webui_ingress]
}
#resource "kubernetes_manifest" "ingress_sd_agones_ingress" {
# manifest = {
# "apiVersion" = "networking.k8s.io/v1"
# "kind" = "Ingress"
# "metadata" = {
# "annotations" = {
# "kubernetes.io/ingress.class" = "gce"
# "kubernetes.io/ingress.global-static-ip-name" = var.webui_address_name
# "networking.gke.io/managed-certificates" = "managed-cert"
# }
# "name" = "sd-agones-ingress"
# "namespace" = "default"
# }
# "spec" = {
# "defaultBackend" = {
# "service" = {
# "name" = "stable-diffusion-nginx-service"
# "port" = {
# "number" = 8080
# }
# }
# }
# }
# }
#}
#resource "kubernetes_manifest" "fleet_sd_agones_fleet" {
# manifest = {
# "apiVersion" = "agones.dev/v1"
# "kind" = "Fleet"
# "metadata" = {
# "name" = "sd-agones-fleet"
# "namespace" = "default"
# }
# "spec" = {
# "replicas" = 1
# "template" = {
# "spec" = {
# "container" = "simple-game-server"
# "ports" = [
# {
# "container" = "simple-game-server"
# "containerPort" = 7654
# "name" = "default"
# },
# {
# "container" = "stable-diffusion-webui"
# "containerPort" = 7860
# "name" = "sd"
# "protocol" = "TCP"
# },
# ]
# "template" = {
# "spec" = {
# "containers" = [
# {
# "image" = "us-docker.pkg.dev/agones-images/examples/simple-game-server:0.14"
# "name" = "simple-game-server"
# "resources" = {
# "limits" = {
# "cpu" = "20m"
# "memory" = "64Mi"
# }
# "requests" = {
# "cpu" = "20m"
# "memory" = "64Mi"
# }
# }
# },
# {
# "command" = [
# "/bin/sh",
# "start.sh",
# ]
# "image" = var.webui_image_url
# "name" = "stable-diffusion-webui"
# "resources" = {
# "limits" = {
# "nvidia.com/gpu" = "1"
# }
# }
# "volumeMounts" = [
# {
# "mountPath" = "/stable-diffusion-webui/models"
# "name" = "stable-diffusion-storage"
# "subPath" = "models"
# },
# {
# "mountPath" = "/result"
# "name" = "stable-diffusion-storage"
# "subPath" = "result"
# },
# ]
# },
# ]
# "volumes" = [
# {
# "name" = "stable-diffusion-storage"
# "persistentVolumeClaim" = {
# "claimName" = "vol1"
# }
# },
# ]
# }
# }
# }
# }
# }
# }
#}
#
#resource "kubernetes_manifest" "fleetautoscaler_fleet_autoscaler_policy" {
# depends_on = [kubernetes_manifest.fleet_sd_agones_fleet]
# manifest = {
# "apiVersion" = "autoscaling.agones.dev/v1"
# "kind" = "FleetAutoscaler"
# "metadata" = {
# "name" = "fleet-autoscaler-policy"
# "namespace" = "default"
# }
# "spec" = {
# "fleetName" = "sd-agones-fleet"
# "policy" = {
# "buffer" = {
# "bufferSize" = 1
# "maxReplicas" = 20
# "minReplicas" = 1
# }
# "type" = "Buffer"
# }
# "sync" = {
# "fixedInterval" = {
# "seconds" = 30
# }
# "type" = "FixedInterval"
# }
# }
# }
#}
#
#resource "kubernetes_manifest" "backendconfig_config_default" {
# manifest = {
# "apiVersion" = "cloud.google.com/v1"
# "kind" = "BackendConfig"
# "metadata" = {
# "name" = "config-default"
# "namespace" = "default"
# }
# "spec" = {
# "iap" = {
# "enabled" = true
# "oauthclientCredentials" = {
# "secretName" = "iap-secret"
# }
# }
# "timeoutSec" = 900
# }
# }
#}
#resource "kubernetes_manifest" "managedcertificate_managed_cert" {
# manifest = {
# "apiVersion" = "networking.gke.io/v1"
# "kind" = "ManagedCertificate"
# "metadata" = {
# "name" = "managed-cert"
# "namespace" = "default"
# }
# "spec" = {
# "domains" = [
# var.sd_webui_domain
# ]
# }
# }
#}
#
#resource "kubernetes_manifest" "service_stable_diffusion_nginx_service" {
# manifest = {
# "apiVersion" = "v1"
# "kind" = "Service"
# "metadata" = {
# "annotations" = {
# "beta.cloud.google.com/backend-config" = "{\"default\": \"config-default\"}"
# "cloud.google.com/neg" = "{\"ingress\": true}"
# }
# "labels" = {
# "app" = "stable-diffusion-nginx"
# }
# "name" = "stable-diffusion-nginx-service"
# "namespace" = "default"
# }
# "spec" = {
# "ports" = [
# {
# "port" = 8080
# "protocol" = "TCP"
# "targetPort" = 8080
# },
# ]
# "selector" = {
# "app" = "stable-diffusion-nginx"
# }
# "type" = "ClusterIP"
# }
# }
#}