platforms/gke-aiml/playground/configsync.tf (433 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
locals {
configsync_manifests_directory = abspath("${path.module}/manifests/configsync/${var.environment_name}")
namespace_default_kubernetes_service_account = "default"
ray_head_kubernetes_service_account = "ray-head"
ray_worker_kubernetes_service_account = "ray-worker"
mlflow_kubernetes_service_account = "mlflow"
rag_frontend_service_account = "rag-frontend"
}
# TEMPLATE MANIFESTS
###############################################################################
resource "null_resource" "template_manifests" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
]
provisioner "local-exec" {
command = "${path.module}/scripts/template_manifests.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync", "**") : md5("${path.module}/templates/configsync/${f}")]))
md5_script = filemd5("${path.module}/scripts/template_manifests.sh")
}
}
# CLUSTER MANIFESTS
###############################################################################
resource "null_resource" "cluster_manifests" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.template_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/cluster_manifests.sh"
environment = {
CLUSTER_ENV = var.environment_name
CLUSTER_NAME = google_container_cluster.mlp.name
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_cluster_template", "**") : md5("${path.module}/templates/configsync/templates/_cluster_template${f}")]))
md5_script = filemd5("${path.module}/scripts/cluster_manifests.sh")
}
}
# GIT CREDENTIALS SECRET CONFIGSYNC
###############################################################################
resource "null_resource" "git_cred_secret_cms" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.connect_gateway_kubeconfig,
]
provisioner "local-exec" {
command = "${path.module}/scripts/git_cred_secret.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
K8S_NAMESPACE = "config-management-system"
KUBECONFIG = "${local.kubeconfig_dir}/${data.google_project.environment.project_id}_${google_gke_hub_membership.cluster.membership_id}"
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_credentials = md5(join("", [var.git_user_name, var.git_token]))
md5_script = filemd5("${path.module}/scripts/git_cred_secret.sh")
}
}
# KUEUE
###############################################################################
resource "null_resource" "kueue" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
null_resource.cluster_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/kueue_manifests.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_cluster_template", "**") : md5("${path.module}/templates/configsync/templates/_cluster_template/${f}")]))
md5_script = filemd5("${path.module}/scripts/kueue_manifests.sh")
}
}
# NVIDIA DCGM
###############################################################################
# resource "null_resource" "nvidia_dcgm" {
# depends_on = [
# google_gke_hub_feature_membership.cluster_configmanagement,
# google_secret_manager_secret_version.git_config,
# module.configsync_repository,
# null_resource.kueue,
# ]
# provisioner "local-exec" {
# command = "${path.module}/scripts/nvidia_dcgm_manifests.sh"
# environment = {
# GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
# GIT_REPOSITORY = local.git_repository
# MANIFESTS_DIRECTORY = local.configsync_manifests_directory
# PROJECT_ID = data.google_project.environment.project_id
# }
# }
# triggers = {
# md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_cluster_template/gmp-public/nvidia-dcgm", "**") : md5("${path.module}/templates/configsync/templates/_cluster_template/gmp-public/nvidia-dcgm/${f}")]))
# md5_script = filemd5("${path.module}/scripts/nvidia_dcgm_manifests.sh")
# }
# }
# KUBERAY MANIFESTS
###############################################################################
resource "null_resource" "kuberay_manifests" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.kueue,
#null_resource.nvidia_dcgm,
]
provisioner "local-exec" {
command = "${path.module}/scripts/kuberay_manifests.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
K8S_NAMESPACE = var.namespace
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_cluster_template/kuberay", "**") : md5("${path.module}/templates/configsync/templates/_cluster_template/kuberay/${f}")]))
md5_script = filemd5("${path.module}/scripts/kuberay_manifests.sh")
}
}
# NAMESPACE
###############################################################################
resource "null_resource" "namespace_manifests" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.connect_gateway_kubeconfig,
null_resource.kuberay_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/namespace_manifests.sh"
environment = {
CLUSTER_ENV = var.environment_name
CLUSTER_NAME = google_container_cluster.mlp.name
CONFIGSYNC_IMAGE = self.triggers.configsync_image
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
KUBECONFIG = self.triggers.kubeconfig
K8S_NAMESPACE = self.triggers.namespace
MANIFESTS_DIRECTORY = self.triggers.manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
provisioner "local-exec" {
command = "scripts/namespace_cleanup.sh"
environment = {
CONFIGSYNC_IMAGE = self.triggers.configsync_image
ENVIRONMENT_NAME = self.triggers.environment_name
GIT_CONFIG_SECRET_NAME = self.triggers.git_config_secret_name
GIT_REPOSITORY = self.triggers.git_repository
KUBECONFIG = self.triggers.kubeconfig
K8S_NAMESPACE = self.triggers.namespace
MANIFESTS_DIRECTORY = self.triggers.manifests_directory
PROJECT_ID = self.triggers.project_id
REPO_SYNC_NAME = self.triggers.repo_sync_name
REPO_SYNC_NAMESPACE = self.triggers.repo_sync_namespace
ROOT_SYNC_NAME = self.triggers.root_sync_name
}
when = destroy
working_dir = path.module
}
triggers = {
configsync_image = local.configsync_image
environment_name = var.environment_name
git_config_secret_name = local.git_config_secret_name
git_repository = local.git_repository
kubeconfig = "${local.kubeconfig_dir}/${data.google_project.environment.project_id}_${google_gke_hub_membership.cluster.membership_id}"
project_id = data.google_project.environment.project_id
manifests_directory = local.configsync_manifests_directory
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_cluster_template/team", "**") : md5("${path.module}/templates/configsync/templates/_cluster_template/team/${f}")]))
md5_script = filemd5("${path.module}/scripts/namespace_manifests.sh")
namespace = var.namespace
repo_sync_name = "${var.environment_name}-${var.namespace}"
repo_sync_namespace = var.namespace
root_sync_name = "root-sync"
}
}
# GIT CREDENTIALS SECRET NAMESPACE
###############################################################################
resource "null_resource" "git_cred_secret_ns" {
depends_on = [
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.connect_gateway_kubeconfig,
null_resource.namespace_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/git_cred_secret.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
K8S_NAMESPACE = var.namespace
KUBECONFIG = "${local.kubeconfig_dir}/${data.google_project.environment.project_id}_${google_gke_hub_membership.cluster.membership_id}"
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_credentials = md5(join("", [var.git_user_name, var.git_token]))
md5_script = filemd5("${path.module}/scripts/git_cred_secret.sh")
}
}
# KUBERAY WATCH NAMESPACE MANIFESTS
###############################################################################
resource "null_resource" "kuberay_watch_namespace_manifests" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.namespace_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/kuberay_watch_namespace_manifests.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
K8S_NAMESPACE = var.namespace
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_script = filemd5("${path.module}/scripts/kuberay_watch_namespace_manifests.sh")
}
}
# RAY CLUSTER IN NAMESPACE
###############################################################################
resource "null_resource" "cluster_namespace_manifests" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.kuberay_watch_namespace_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/cluster_namespace_manifests.sh"
environment = {
DATA_BUCKET = local.bucket_data_name
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
K8S_NAMESPACE = var.namespace
K8S_SERVICE_ACCOUNT_HEAD = local.ray_head_kubernetes_service_account
K8S_SERVICE_ACCOUNT_WORKER = local.ray_worker_kubernetes_service_account
K8S_SERVICE_ACCOUNT_MLFLOW = local.mlflow_kubernetes_service_account
K8S_SERVICE_ACCOUNT_RAG_FRONTEND = local.rag_frontend_service_account
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
}
}
triggers = {
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_namespace_template/app", "**") : md5("${path.module}/templates/configsync/templates/_namespace_template/app/${f}")]))
md5_script = filemd5("${path.module}/scripts/cluster_namespace_manifests.sh")
}
}
# SYNCHRONIZE CONFIGSYNC
###############################################################################
# resource "null_resource" "synchronize_configsync" {
# depends_on = [
# google_gke_hub_feature_membership.cluster_configmanagement,
# google_secret_manager_secret_version.git_config,
# module.configsync_repository,
# null_resource.cluster_namespace_manifests,
# ]
# provisioner "local-exec" {
# command = "${path.module}/scripts/synchronize_configsync.sh"
# environment = {
# CONFIGSYNC_IMAGE = local.configsync_image
# GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
# GIT_REPOSITORY = local.git_repository
# KUBECONFIG = "${local.kubeconfig_dir}/${data.google_project.environment.project_id}_${google_gke_hub_membership.cluster.membership_id}"
# MANIFESTS_DIRECTORY = local.configsync_manifests_directory
# PROJECT_ID = data.google_project.environment.project_id
# REPO_SYNC_NAME = "${var.environment_name}-${var.namespace}"
# REPO_SYNC_NAMESPACE = var.namespace
# ROOT_SYNC_NAME = "root-sync"
# }
# }
# triggers = {
# md5_script = filemd5("${path.module}/scripts/synchronize_configsync.sh")
# }
# }
# GATEWAY
###############################################################################
resource "kubernetes_secret_v1" "ray_head_client" {
data = {
secret = google_iap_client.ray_head_client.secret
}
metadata {
name = "ray-head-client"
namespace = data.kubernetes_namespace_v1.team.metadata[0].name
}
}
resource "local_file" "policy_iap_gradio_yaml" {
content = templatefile(
"${path.module}/templates/gateway/gcp-backend-policy-iap-service.tftpl.yaml",
{
oauth_client_id = google_iap_client.ray_head_client.client_id
oauth_client_secret_name = kubernetes_secret_v1.ray_head_client.metadata[0].name
policy_name = "gradio"
service_name = local.gradio_service_name
}
)
filename = "${local.gateway_manifests_directory}/policy-iap-gradio.yaml"
}
resource "local_file" "policy_iap_locust_yaml" {
content = templatefile(
"${path.module}/templates/gateway/gcp-backend-policy-iap-service.tftpl.yaml",
{
oauth_client_id = google_iap_client.ray_head_client.client_id
oauth_client_secret_name = kubernetes_secret_v1.ray_head_client.metadata[0].name
policy_name = "locust"
service_name = local.locust_service_name
}
)
filename = "${local.gateway_manifests_directory}/policy-iap-locust.yaml"
}
resource "local_file" "policy_iap_ray_head_yaml" {
content = templatefile(
"${path.module}/templates/gateway/gcp-backend-policy-iap-service.tftpl.yaml",
{
oauth_client_id = google_iap_client.ray_head_client.client_id
oauth_client_secret_name = kubernetes_secret_v1.ray_head_client.metadata[0].name
policy_name = "ray-head"
service_name = local.ray_head_service_name
}
)
filename = "${local.gateway_manifests_directory}/policy-iap-ray-head.yaml"
}
resource "local_file" "policy_iap_mlflow_tracking_yaml" {
content = templatefile(
"${path.module}/templates/gateway/gcp-backend-policy-iap-service.tftpl.yaml",
{
oauth_client_id = google_iap_client.ray_head_client.client_id
oauth_client_secret_name = kubernetes_secret_v1.ray_head_client.metadata[0].name
policy_name = "mlflow"
service_name = local.mlflow_tracking_service_name
}
)
filename = "${local.gateway_manifests_directory}/policy-iap-mlflow.yaml"
}
resource "local_file" "policy_iap_rag_frontend_yaml" {
content = templatefile(
"${path.module}/templates/gateway/gcp-backend-policy-iap-service.tftpl.yaml",
{
oauth_client_id = google_iap_client.ray_head_client.client_id
oauth_client_secret_name = kubernetes_secret_v1.ray_head_client.metadata[0].name
policy_name = "rag-frontend"
service_name = local.rag_frontend_service_name
}
)
filename = "${local.gateway_manifests_directory}/policy-iap-rag-frontend.yaml"
}
resource "local_file" "gateway_kustomization_yaml" {
content = templatefile(
"${path.module}/templates/kustomize/kustomization.tftpl.yaml",
{
namespace = data.kubernetes_namespace_v1.team.metadata[0].name
resources = [
basename(local_file.gateway_external_https_yaml.filename),
basename(local_file.policy_iap_gradio_yaml.filename),
basename(local_file.policy_iap_locust_yaml.filename),
basename(local_file.policy_iap_mlflow_tracking_yaml.filename),
basename(local_file.policy_iap_rag_frontend_yaml.filename),
basename(local_file.policy_iap_ray_head_yaml.filename),
basename(local_file.route_gradio_https_yaml.filename),
basename(local_file.route_locust_https_yaml.filename),
basename(local_file.route_mlflow_tracking_https_yaml.filename),
basename(local_file.route_rag_frontend_https_yaml.filename),
basename(local_file.route_ray_dashboard_https_yaml.filename),
]
}
)
filename = "${local.gateway_manifests_directory}/kustomization.yaml"
}
resource "null_resource" "gateway_manifests" {
depends_on = [
google_compute_managed_ssl_certificate.external_gateway,
google_endpoints_service.ray_dashboard_https,
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
kubernetes_secret_v1.ray_head_client,
module.configsync_repository,
null_resource.cluster_namespace_manifests,
]
provisioner "local-exec" {
command = "scripts/gateway_manifests.sh"
environment = {
CONFIGSYNC_IMAGE = self.triggers.configsync_image
ENVIRONMENT_NAME = self.triggers.environment_name
GIT_CONFIG_SECRET_NAME = self.triggers.git_config_secret_name
GIT_REPOSITORY = self.triggers.git_repository
KUBECONFIG = self.triggers.kubeconfig
K8S_NAMESPACE = self.triggers.namespace
MANIFESTS_DIRECTORY = self.triggers.manifests_directory
PROJECT_ID = self.triggers.project_id
REPO_SYNC_NAME = self.triggers.repo_sync_name
REPO_SYNC_NAMESPACE = self.triggers.repo_sync_namespace
}
interpreter = ["bash", "-c"]
working_dir = path.module
}
provisioner "local-exec" {
command = "scripts/gateway_cleanup.sh"
environment = {
CONFIGSYNC_IMAGE = self.triggers.configsync_image
GIT_CONFIG_SECRET_NAME = self.triggers.git_config_secret_name
GIT_REPOSITORY = self.triggers.git_repository
K8S_NAMESPACE = self.triggers.namespace
KUBECONFIG = self.triggers.kubeconfig
MANIFESTS_DIRECTORY = self.triggers.manifests_directory
PROJECT_ID = self.triggers.project_id
REPO_SYNC_NAME = self.triggers.repo_sync_name
REPO_SYNC_NAMESPACE = self.triggers.repo_sync_namespace
}
interpreter = ["bash", "-c"]
when = destroy
working_dir = path.module
}
triggers = {
configsync_image = local.configsync_image
environment_name = var.environment_name
gateway_name = local.gateway_name
git_config_secret_name = local.git_config_secret_name
git_repository = local.git_repository
github_email = var.git_user_email
github_token = var.git_token
github_user = var.git_user_name
kubeconfig = "${local.kubeconfig_dir}/${data.google_project.environment.project_id}_${google_gke_hub_membership.cluster.membership_id}"
manifests_directory = local.configsync_manifests_directory
md5_script = filemd5("${path.module}/scripts/gateway_manifests.sh")
md5_files = md5(join("", [
local_file.gateway_external_https_yaml.content_md5,
local_file.gateway_kustomization_yaml.content_md5,
local_file.policy_iap_gradio_yaml.content_md5,
local_file.policy_iap_locust_yaml.content_md5,
local_file.policy_iap_mlflow_tracking_yaml.content_md5,
local_file.policy_iap_rag_frontend_yaml.content_md5,
local_file.policy_iap_ray_head_yaml.content_md5,
local_file.route_gradio_https_yaml.content_md5,
local_file.route_locust_https_yaml.content_md5,
local_file.route_mlflow_tracking_https_yaml.content_md5,
local_file.route_rag_frontend_https_yaml.content_md5,
local_file.route_ray_dashboard_https_yaml.content_md5,
]))
namespace = data.kubernetes_namespace_v1.team.metadata[0].name
project_id = data.google_project.environment.project_id
repo_sync_name = "${var.environment_name}-${data.kubernetes_namespace_v1.team.metadata[0].name}"
repo_sync_namespace = data.kubernetes_namespace_v1.team.metadata[0].name
}
}
# WAIT FOR CONFIGSYNC
###############################################################################
resource "null_resource" "wait_for_configsync" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
null_resource.gateway_manifests,
]
provisioner "local-exec" {
command = "${path.module}/scripts/synchronize_configsync.sh"
environment = {
CONFIGSYNC_IMAGE = local.configsync_image
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
KUBECONFIG = "${local.kubeconfig_dir}/${data.google_project.environment.project_id}_${google_gke_hub_membership.cluster.membership_id}"
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
REPO_SYNC_NAME = "${var.environment_name}-${data.kubernetes_namespace_v1.team.metadata[0].name}"
REPO_SYNC_NAMESPACE = data.kubernetes_namespace_v1.team.metadata[0].name
ROOT_SYNC_NAME = "root-sync"
}
}
triggers = {
md5_script = filemd5("${path.module}/scripts/synchronize_configsync.sh")
}
}