aws/eks-values.yaml (49 lines of code) (raw):
# numReplicas: 1
image:
registry: # Add your custom HUGS Registry here as XXXXXXXXXXXX.dkr.ecr.us-east-1.amazonaws.com
repository: hugging-face
name: nvidia-meta-llama-meta-llama-3.1-8b-instruct
pullPolicy: Always
tag: 0.1.0
serviceAccountName: hugs-service-account
podSecurityContext:
{}
# fsGroup: 2000
securityContext:
{}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
# Via `env` you can specify the port, defaults to 80.
# Note that if you update the exposed port, you should also
# update the service, readiness, and liveness ports too.
# env:
# PORT: "80"
livenessProbe:
enabled: true
# You may want to increase the initialDelaySeconds for the bigger LLMs as Llama 3.1 405B
# since the download will take longer and the default delay may not be enough for the
# download to be completed
# initialDelaySeconds: 360
# periodSeconds: 15
# timeoutSeconds: 5
# failureThreshold: 3
readinessProbe:
enabled: true
# You may want to increase the initialDelaySeconds for the bigger LLMs as Llama 3.1 405B
# since the download will take longer and the default delay may not be enough for the
# download to be completed
# initialDelaySeconds: 360
# periodSeconds: 30
# timeoutSeconds: 5
# failureThreshold: 3
service:
type: NodePort
port: 80
# Optionally, one may want to start an ingress service too, with
# a custom IP to avoid having to forward the port to localhost
ingress:
enabled: true
className: alb
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
hosts:
- host: ""
paths:
- path: /
pathType: Prefix
resources:
requests:
nvidia.com/gpu: 1
# EKS requires both the `requests` and the `limits` properties
# within the `resources`
limits:
nvidia.com/gpu: 1
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 1Gi
volumeMounts:
- mountPath: /dev/shm
name: dshm
nodeSelector:
eks.amazonaws.com/nodegroup: hugs-node-group
autoscaling:
enabled: true
minReplicas: 1
maxReplicas: 2
targetMemoryUtilizationPercentage: ""
targetCPUUtilizationPercentage: ""