tutorials-and-examples/nvidia-bionemo/fine-tuning/job/job.yaml (33 lines of code) (raw):

apiVersion: batch/v1 kind: Job metadata: name: esm2-finetuning namespace: bionemo-training spec: backoffLimit: 3 # Number of retries before marking job as failed template: spec: containers: - name: finetuning image: nvcr.io/nvidia/clara/bionemo-framework:2.3 command: ["python3"] args: ["/app/finetuning.py"] resources: limits: nvidia.com/gpu: 1 volumeMounts: - name: bionemo-storage mountPath: /mnt/data - name: scripts mountPath: /app volumes: - name: bionemo-storage persistentVolumeClaim: claimName: bionemo-filestore - name: scripts configMap: name: finetuning-script defaultMode: 0755 restartPolicy: Never nodeSelector: cloud.google.com/gke-gpu: "true"