use-cases/model-fine-tuning-pipeline/data-preparation/gemma-it/manifests/job.yaml (46 lines of code) (raw):
apiVersion: batch/v1
kind: Job
metadata:
name: data-prep
spec:
backoffLimit: 0
template:
metadata:
labels:
app: data-prep
ml-platform: data-prep
spec:
containers:
- name: job
image: V_IMAGE_URL
imagePullPolicy: Always
env:
- name: "BUCKET"
value: "V_DATA_BUCKET"
- name: "DATASET_INPUT_PATH"
value: "V_DATASET_INPUT_PATH"
- name: "DATASET_INPUT_FILE"
value: "V_DATASET_INPUT_FILE"
- name: "DATASET_OUTPUT_PATH"
value: "V_DATASET_OUTPUT_PATH"
- name: "PROJECT_ID"
value: "V_PROJECT_ID"
- name: "PROMPT_MODEL_ID"
value: "V_PROMPT_MODEL_ID"
- name: "REGION"
value: "V_REGION"
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 250m
memory: 1Gi
nodeSelector:
resource-type: "cpu"
restartPolicy: Never
serviceAccountName: V_KSA
tolerations:
- key: "on-demand"
operator: "Exists"
effect: "NoSchedule"