modules/kuberay-cluster/values.yaml (119 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for ray-cluster.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# The KubeRay community welcomes PRs to expose additional configuration
# in this Helm chart.
image:
# Replace this with your own image if needed.
repository: ${image}
tag: ${image_tag}
pullPolicy: IfNotPresent
nameOverride: "kuberay"
fullnameOverride: ""
imagePullSecrets: []
# - name: an-existing-secret
head:
groupName: headgroup
# If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod.
# Ray autoscaler integration is supported only for Ray versions >= 1.11.0
# Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0.
enableInTreeAutoscaling: true
# autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler.
# The example configuration shown below below represents the DEFAULT values.
# autoscalerOptions:
# upscalingMode: Default
# idleTimeoutSeconds: 60
# securityContext: {}
# env: []
# envFrom: []
# resources specifies optional resource request and limit overrides for the autoscaler container.
# For large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required.
# resources:
# limits:
# cpu: "500m"
# memory: "512Mi"
# requests:
# cpu: "500m"
# memory: "512Mi"
labels:
cloud.google.com/gke-ray-node-type: head
${indent(4, chomp(yamlencode(additional_labels)))}
serviceAccountName: ${k8s_service_account}
rayStartParams:
dashboard-host: '0.0.0.0'
block: 'true'
# containerEnv specifies environment variables for the Ray container,
# Follows standard K8s container env schema.
containerEnv:
- name: RAY_memory_monitor_refresh_ms
value: "0"
- name: RAY_GRAFANA_IFRAME_HOST
value: http://${grafana_host}
- name: RAY_GRAFANA_HOST
value: http://grafana:80
- name: RAY_PROMETHEUS_HOST
value: http://frontend:9090
- name: CLOUDSQL_INSTANCE_CONNECTION_NAME
value: ${cloudsql_instance_connection_name}
envFrom: []
# - secretRef:
# name: my-env-secret
# ports optionally allows specifying ports for the Ray container.
# ports: []
# resource requests and limits for the Ray head container.
# Modify as needed for your application.
# Note that the resources in this example are much too small for production;
# we don't recommend allocating less than 8G memory for a Ray pod in production.
# Ray pods should be sized to take up entire K8s nodes when possible.
# Always set CPU and memory limits for Ray pods.
# It is usually best to set requests equal to limits.
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
# for further guidance.
resources:
limits:
cpu: "4"
# To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
# Ray recommends at least 8G memory for production workloads.
memory: "8G"
# Sum of ephemeral storage requests must be max 10Gi on Autopilot default class.
# This includes, ray-head, gcsfuse-sidecar, fluent-bit, and ray Autoscaler sidecar which requests 1Gi by default.
ephemeral-storage: 3Gi
requests:
cpu: "4"
memory: "8G"
ephemeral-storage: 3Gi
annotations:
gke-gcsfuse/volumes: "true"
gke-gcsfuse/cpu-limit: "1"
gke-gcsfuse/memory-limit: 2Gi
gke-gcsfuse/ephemeral-storage-limit: 3Gi
nodeSelector:
iam.gke.io/gke-metadata-server-enabled: "true"
tolerations: []
affinity: {}
# Ray container security context.
securityContext:
${indent(4, chomp(yamlencode(security_context)))}
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
#readOnly: true
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "implicit-dirs,uid=1000,gid=100"
- name: secret-volume
secret:
secretName: ${secret_name}
optional: true
# Ray writes logs to /tmp/ray/session_latests/logs
volumeMounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /data
- name: secret-volume
mountPath: /etc/secret-volume
readOnly: true
worker:
# If you want to disable the default workergroup
# uncomment the line below
# disabled: true
groupName: workerGroup
replicas: 0
minReplicas: 0
maxReplicas: 5
type: worker
labels:
cloud.google.com/gke-ray-node-type: worker
${indent(4, chomp(yamlencode(additional_labels)))}
serviceAccountName: ${k8s_service_account}
rayStartParams:
block: 'true'
initContainerImage: 'busybox:1.28' # Enable users to specify the image for init container. Users can pull the busybox image from their private repositories.
# Security context for the init container.
initContainerSecurityContext: {}
# containerEnv specifies environment variables for the Ray container,
# Follows standard K8s container env schema.
containerEnv:
- name: CLOUDSQL_INSTANCE_CONNECTION_NAME
value: ${cloudsql_instance_connection_name}
envFrom: []
# - secretRef:
# name: my-env-secret
# ports optionally allows specifying ports for the Ray container.
# ports: []
# resource requests and limits for the Ray head container.
# Modify as needed for your application.
# Note that the resources in this example are much too small for production;
# we don't recommend allocating less than 8G memory for a Ray pod in production.
# Ray pods should be sized to take up entire K8s nodes when possible.
# Always set CPU and memory limits for Ray pods.
# It is usually best to set requests equal to limits.
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
# for further guidance.
resources:
requests:
${indent(6, chomp(yamlencode(resource_requests)))}
limits:
${indent(6, chomp(yamlencode(resource_requests)))}
annotations:
${indent(4, chomp(yamlencode(annotations)))}
nodeSelector:
${indent(4, chomp(yamlencode(node_selectors)))}
tolerations: []
affinity: {}
securityContext:
${indent(4, chomp(yamlencode(security_context)))}
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
#readOnly: true
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "implicit-dirs,uid=1000,gid=100"
- name: secret-volume
secret:
secretName: ${secret_name}
optional: true
# Ray writes logs to /tmp/ray/session_latests/logs
volumeMounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /data
- name: secret-volume
mountPath: /etc/secret-volume
readOnly: true
# The map's key is used as the groupName.
# For example, key:small-group in the map below
# will be used as the groupName
additionalWorkerGroups:
smallGroup:
# Disabled by default
disabled: true
service:
type: ClusterIP