tutorials-and-examples/nvidia-bionemo/fine-tuning/job/job.yaml (33 lines of code) (raw):
apiVersion: batch/v1
kind: Job
metadata:
name: esm2-finetuning
namespace: bionemo-training
spec:
backoffLimit: 3 # Number of retries before marking job as failed
template:
spec:
containers:
- name: finetuning
image: nvcr.io/nvidia/clara/bionemo-framework:2.3
command: ["python3"]
args: ["/app/finetuning.py"]
resources:
limits:
nvidia.com/gpu: 1
volumeMounts:
- name: bionemo-storage
mountPath: /mnt/data
- name: scripts
mountPath: /app
volumes:
- name: bionemo-storage
persistentVolumeClaim:
claimName: bionemo-filestore
- name: scripts
configMap:
name: finetuning-script
defaultMode: 0755
restartPolicy: Never
nodeSelector:
cloud.google.com/gke-gpu: "true"