tutorials-and-examples/nvidia-nim/blueprints/digitalhuman/digital-human-nimbp.yaml (482 lines of code) (raw):

apiVersion: apps/v1 kind: Deployment metadata: name: dighum-llama3-8b spec: replicas: 1 selector: matchLabels: app: dighum-llama3-8b template: metadata: labels: app: dighum-llama3-8b spec: containers: - name: dighum-llama3-8b image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.3 ports: - containerPort: 8000 resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY - name: NIM_PEFT_REFRESH_INTERVAL value: "3600" livenessProbe: httpGet: path: /v1/health/ready port: 8000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 20Gi --- apiVersion: v1 kind: Service metadata: name: dighum-llama3-8b-lb spec: type: LoadBalancer selector: app: dighum-llama3-8b ports: - protocol: TCP port: 80 targetPort: 8000 # The container still listens on 8000 --- apiVersion: apps/v1 kind: Deployment metadata: name: dighum-embedqa-e5v5 spec: replicas: 1 selector: matchLabels: app: dighum-embedqa-e5v5 template: metadata: labels: app: dighum-embedqa-e5v5 spec: containers: - name: dighum-embedqa-e5v5 image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1 ports: - containerPort: 8000 resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY livenessProbe: httpGet: path: /v1/health/ready port: 8000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 16Gi --- apiVersion: v1 kind: Service metadata: name: dighum-embedqa-e5v5-lb spec: type: LoadBalancer selector: app: dighum-embedqa-e5v5 ports: - protocol: TCP port: 80 targetPort: 8000 --- apiVersion: apps/v1 kind: Deployment metadata: name: dighum-rerankqa-mistral4bv3 spec: replicas: 1 selector: matchLabels: app: dighum-rerankqa-mistral4bv3 template: metadata: labels: app: dighum-rerankqa-mistral4bv3 spec: containers: - name: dighum-rerankqa-mistral4bv3 image: nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.1 ports: - containerPort: 8000 resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY livenessProbe: httpGet: path: /v1/health/ready port: 8000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 20Gi --- apiVersion: v1 kind: Service metadata: name: dighum-rerankqa-mistral4bv3-lb spec: type: LoadBalancer selector: app: dighum-rerankqa-mistral4bv3 ports: - protocol: TCP port: 80 targetPort: 8000 --- apiVersion: apps/v1 kind: Deployment metadata: name: dighum-parakeet-asr-1-1b spec: replicas: 1 selector: matchLabels: app: dighum-parakeet-asr-1-1b template: metadata: labels: app: dighum-parakeet-asr-1-1b spec: containers: - name: dighum-parakeet-asr-1-1b image: nvcr.io/nim/nvidia/riva-asr:1.3.0 ports: - containerPort: 9000 name: http - containerPort: 50051 name: grpc resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY - name: NIM_HTTP_API_PORT value: "9000" - name: NIM_GRPC_API_PORT value: "50051" - name: NIM_TAGS_SELECTOR value: "name=parakeet-1-1b-ctc-riva-en-us,mode=str" livenessProbe: httpGet: path: /v1/health/ready port: 9000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 8Gi --- apiVersion: v1 kind: Service metadata: name: dighum-parakeet-asr-1-1b-lb spec: type: LoadBalancer selector: app: dighum-parakeet-asr-1-1b ports: - protocol: TCP name: http port: 80 targetPort: 9000 - protocol: TCP name: grpc port: 50051 targetPort: 50051 --- apiVersion: apps/v1 kind: Deployment metadata: name: dighum-fastpitch-tts spec: replicas: 1 selector: matchLabels: app: dighum-fastpitch-tts template: metadata: labels: app: dighum-fastpitch-tts spec: containers: - name: dighum-fastpitch-tts image: nvcr.io/nim/nvidia/riva-tts:1.3.0 ports: - containerPort: 9000 name: http - containerPort: 50051 name: grpc resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY - name: NIM_HTTP_API_PORT value: "9000" - name: NIM_GRPC_API_PORT value: "50051" - name: NIM_TAGS_SELECTOR value: "name=fastpitch-hifigan-en-us" livenessProbe: httpGet: path: /v1/health/ready port: 9000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 8Gi --- apiVersion: v1 kind: Service metadata: name: dighum-fastpitch-tts-lb spec: type: LoadBalancer selector: app: dighum-fastpitch-tts ports: - protocol: TCP name: http port: 80 targetPort: 9000 - protocol: TCP name: grpc port: 50051 targetPort: 50051 --- apiVersion: apps/v1 kind: Deployment metadata: name: dighum-audio2face-3d spec: replicas: 1 selector: matchLabels: app: dighum-audio2face-3d template: metadata: labels: app: dighum-audio2face-3d spec: hostNetwork: true containers: - name: dighum-audio2face-3d image: nvcr.io/nim/nvidia/audio2face-3d:1.2 securityContext: privileged: true ports: - containerPort: 9000 name: http - containerPort: 50051 name: grpc resources: requests: nvidia.com/gpu: 2 limits: nvidia.com/gpu: 2 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY # - name: NIM_MANIFEST_PROFILE # value: "default" - name: NIM_DISABLE_MODEL_DOWNLOAD value: "true" livenessProbe: httpGet: path: /v1/health/ready port: 9000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 16Gi --- apiVersion: apps/v1 kind: Deployment metadata: name: dighum-maxine-audio2face-2d spec: replicas: 1 selector: matchLabels: app: dighum-maxine-audio2face-2d template: metadata: labels: app: dighum-maxine-audio2face-2d spec: containers: - name: dighum-maxine-audio2face-2d image: nvcr.io/nim/nvidia/maxine-audio2face-2d:latest ports: - containerPort: 8000 name: http - containerPort: 8001 name: ws resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 volumeMounts: - name: dshm mountPath: /dev/shm env: - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api-key key: NGC_API_KEY - name: NIM_HTTP_API_PORT value: "8000" livenessProbe: httpGet: path: /v1/health/ready port: 8000 initialDelaySeconds: 10 timeoutSeconds: 20 periodSeconds: 10 failureThreshold: 100 imagePullSecrets: - name: secret-nvcr nodeSelector: cloud.google.com/gke-gpu: "true" volumes: - name: dshm emptyDir: medium: Memory sizeLimit: 16Gi --- apiVersion: v1 kind: Service metadata: name: dighum-maxine-audio2face-2d-lb spec: type: LoadBalancer selector: app: dighum-maxine-audio2face-2d ports: - protocol: TCP name: http port: 80 targetPort: 8000 - protocol: TCP name: ws port: 8001 targetPort: 8001