sample_workloads/pingpong/gke/templates/pingpong.yaml (141 lines of code) (raw):
{{- $requiredVar := .Values.cluster.nNodes | required ".Values.cluster.nNodes is required" -}}
{{- $requiredVar := .Values.cluster.nodePool | required ".Values.cluster.nodePool is required" -}}
{{- $requiredVar := .Values.workload.jobTimestamp | required ".Values.jobTimestamp is required" -}}
{{- $requiredVar := .Values.workload.image | required ".Values.image is required" -}}
apiVersion: v1
kind: Service
metadata:
name: "pingpong-leader-{{$.Release.Name}}"
spec:
selector:
name: "pingpong-leader-{{$.Release.Name}}"
clusterIP: None
ports:
- name: pingpong-leader
port: 6002
---
{{$node_count := .Values.cluster.nNodes | int}}
# This needs to be updated to allow uneven distribution of nodes to SBs
{{- $root := . -}}
{{range $node_index, $element := until $node_count}}
apiVersion: v1
kind: Pod
metadata:
name: pingpong-{{$.Release.Name}}-pod{{$node_index}}
{{if eq $node_index 0}}
labels:
name: pingpong-leader-{{$.Release.Name}}
{{end}}
spec:
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
hostname: pingpong-pod{{$node_index}}
subdomain: pingpong-{{$.Release.Name}}
serviceAccountName: "default"
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-accelerator
operator: Exists
- key: cloud.google.com/gke-nodepool
operator: In
values: [{{$.Values.cluster.nodePool}}]
tolerations:
- operator: "Exists"
key: nvidia.com/gpu
volumes:
- name: nvidia-install-dir-host
hostPath:
path: /home/kubernetes/bin/nvidia/lib64
- name: tcpd-socket
hostPath:
path: /run/tcpx
- name: shared-memory
emptyDir:
medium: "Memory"
sizeLimit: 200Gi
- name: workload-terminated-volume
emptyDir: {}
- name: tcpx-nccl-plugin-volume
emptyDir: {}
{{if eq $root.Values.network.useGPUDirectTcpx "yes"}}
initContainers:
- name: tcpx-nccl-plugin-installer
image: {{$root.Values.network.ncclPlugin}}
imagePullPolicy: Always
volumeMounts:
- name: tcpx-nccl-plugin-volume
mountPath: /var/lib/tcpx
resources:
requests:
cpu: 150m
command:
- /bin/sh
- -c
- |
/scripts/container_entry.sh install --install-nccl
{{end}}
containers:
{{if eq $root.Values.network.useGPUDirectTcpx "yes"}}
- name: tcpd-daemon
image: {{$root.Values.network.rxdmContainer}}
imagePullPolicy: Always
command:
- "bash"
- "-c"
- |
/tcpgpudmarxd/build/app/tcpgpudmarxd --gpu_nic_preset a3vm --gpu_shmem_type fd --setup_param "--verbose 128 2 0" &
while [ ! -e "/usr/share/pingpong/workload_terminated" ]; do sleep 10; echo "sleeping"; done
securityContext:
privileged: true
volumeMounts:
- name: nvidia-install-dir-host
mountPath: /usr/local/nvidia/lib64
- name: tcpd-socket
mountPath: /tmp
- name: workload-terminated-volume
mountPath: /usr/share/pingpong
env:
- name: LD_LIBRARY_PATH
value: /usr/local/nvidia/lib64
{{end}}
- name: pingpong
image: {{$root.Values.workload.image}}
imagePullPolicy: Always
securityContext:
privileged: true
env:
- name: JOB_TIMESTAMP
value: "{{$root.Values.workload.jobTimestamp}}"
- name: MASTER_PORT
value: "6000"
- name: GPUS_PER_NODE
value: "8"
- name: MASTER_ADDR
value: "pingpong-leader-{{$.Release.Name}}"
- name: NNODES
value: "{{$node_count}}"
- name: NODE_RANK
value: "{{ $node_index }}"
- name: USE_GPUDIRECT_TCPX
value: "{{$root.Values.network.useGPUDirectTcpx}}"
- name: CLUSTER_TYPE
value: "GKE"
volumeMounts:
- name: nvidia-install-dir-host
mountPath: /usr/local/nvidia/lib64
- name: tcpx-nccl-plugin-volume
mountPath: /usr/local/tcpx
- name: tcpd-socket
mountPath: /tmp
- name: shared-memory
mountPath: /dev/shm
- name: workload-terminated-volume
mountPath: /usr/share/pingpong
resources:
limits:
nvidia.com/gpu: !!int 8
---
{{end}}