auto_sizing/workflows/run.yaml (70 lines of code) (raw):
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: auto-sizing-
spec:
entrypoint: auto-sizing
ttl-strategy:
secondsAfterSuccess: 2592000 # delete workflows automatically after 30 days
arguments:
parameters:
- name: targets # set dynamically when workflow gets deployed
- name: project_id
- name: dataset_id
- name: bucket
templates:
- name: auto-sizing
parallelism: 5 # run up to 5 containers in parallel at the same time
inputs:
parameters:
- name: targets
steps:
- - name: target-autosizing
template: target-autosizing
arguments:
parameters:
- name: slug
value: "{{item.slug}}"
withParam: "{{inputs.parameters.targets}}" # process these targets in parallel
continueOn:
failed: true
- - name: export-results
template: export-results
continueOn:
failed: true
- name: target-autosizing
inputs:
parameters:
- name: slug
container:
image: gcr.io/moz-fx-data-experiments/auto_sizing:latest
command: [
auto_sizing, --log_to_bigquery, run,
"--target_slug={{inputs.parameters.slug}}",
"--dataset_id={{workflow.parameters.dataset_id}}",
"--project_id={{workflow.parameters.project_id}}",
"--bucket={{workflow.parameters.bucket}}",
"--run-presets"
]
resources:
requests:
memory: 10Gi # make sure there is at least 10Gb of memory available for the task
limits:
cpu: 4 # limit to 4 cores
retryStrategy:
limit: 3 # execute a container max. 3x; sometimes a container run might fail due to limited resources
retryPolicy: "Always"
backoff:
duration: "1m"
factor: 2
maxDuration: "5m"
- name: export-results
inputs:
container:
image: gcr.io/moz-fx-data-experiments/auto_sizing:latest
command: [
auto_sizing, export-aggregate-results,
"--project_id={{workflow.parameters.project_id}}",
"--bucket={{workflow.parameters.bucket}}"
]
activeDeadlineSeconds: 600 # terminate container template after 10 minutes