aws/eks-values.yaml (49 lines of code) (raw):

# numReplicas: 1 image: registry: # Add your custom HUGS Registry here as XXXXXXXXXXXX.dkr.ecr.us-east-1.amazonaws.com repository: hugging-face name: nvidia-meta-llama-meta-llama-3.1-8b-instruct pullPolicy: Always tag: 0.1.0 serviceAccountName: hugs-service-account podSecurityContext: {} # fsGroup: 2000 securityContext: {} # capabilities: # drop: # - ALL # readOnlyRootFilesystem: true # runAsNonRoot: true # runAsUser: 1000 # Via `env` you can specify the port, defaults to 80. # Note that if you update the exposed port, you should also # update the service, readiness, and liveness ports too. # env: # PORT: "80" livenessProbe: enabled: true # You may want to increase the initialDelaySeconds for the bigger LLMs as Llama 3.1 405B # since the download will take longer and the default delay may not be enough for the # download to be completed # initialDelaySeconds: 360 # periodSeconds: 15 # timeoutSeconds: 5 # failureThreshold: 3 readinessProbe: enabled: true # You may want to increase the initialDelaySeconds for the bigger LLMs as Llama 3.1 405B # since the download will take longer and the default delay may not be enough for the # download to be completed # initialDelaySeconds: 360 # periodSeconds: 30 # timeoutSeconds: 5 # failureThreshold: 3 service: type: NodePort port: 80 # Optionally, one may want to start an ingress service too, with # a custom IP to avoid having to forward the port to localhost ingress: enabled: true className: alb annotations: alb.ingress.kubernetes.io/scheme: internet-facing hosts: - host: "" paths: - path: / pathType: Prefix resources: requests: nvidia.com/gpu: 1 # EKS requires both the `requests` and the `limits` properties # within the `resources` limits: nvidia.com/gpu: 1 volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 1Gi volumeMounts: - mountPath: /dev/shm name: dshm nodeSelector: eks.amazonaws.com/nodegroup: hugs-node-group autoscaling: enabled: true minReplicas: 1 maxReplicas: 2 targetMemoryUtilizationPercentage: "" targetCPUUtilizationPercentage: ""