gen-ai/fine-tune-llama2/job.yaml (41 lines of code) (raw):

taskGroups: - taskSpec: runnables: - script: text: | #!/bin/bash mkdir -p $WORK_DIR mkdir -p $RESULTS_DIR apt install -y python3-pip pip3 install bitsandbytes datasets huggingface_hub peft torch transformers - script: text: | #!/bin/bash python3 $SCRIPTS_DIR/download-model.py --output $WORK_DIR - script: text: | #!/bin/bash python3 $SCRIPTS_DIR/fine-tune.py --input $WORK_DIR --output $RESULTS_DIR volumes: - nfs: server: <YOUR FILESTORE IP ADDRESS> remotePath: <YOUR FILESTORE SHARE PATH, e.g. /share> mountPath: "/mnt/disks/llm" environment: variables: WORK_DIR: /tmp RESULTS_DIR: /mnt/disks/llm/fine-tune-results SCRIPTS_DIR: /mnt/disks/llm/scripts PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin secretVariables: HUGGING_FACE_HUB_TOKEN: projects/<YOUR PROJECT ID>/secrets/HuggingFaceHubToken/versions/1 allocationPolicy: instances: - policy: machineType: g2-standard-96 bootDisk: image: projects/ml-images/global/images/c0-deeplearning-common-gpu-v20240128-debian-11-py310 sizeGb: 150 accelerators: - type: nvidia-l4 count: 8 installGpuDrivers: true logsPolicy: destination: CLOUD_LOGGING