auto_sizing/workflows/run.yaml (70 lines of code) (raw):

apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: generateName: auto-sizing- spec: entrypoint: auto-sizing ttl-strategy: secondsAfterSuccess: 2592000 # delete workflows automatically after 30 days arguments: parameters: - name: targets # set dynamically when workflow gets deployed - name: project_id - name: dataset_id - name: bucket templates: - name: auto-sizing parallelism: 5 # run up to 5 containers in parallel at the same time inputs: parameters: - name: targets steps: - - name: target-autosizing template: target-autosizing arguments: parameters: - name: slug value: "{{item.slug}}" withParam: "{{inputs.parameters.targets}}" # process these targets in parallel continueOn: failed: true - - name: export-results template: export-results continueOn: failed: true - name: target-autosizing inputs: parameters: - name: slug container: image: gcr.io/moz-fx-data-experiments/auto_sizing:latest command: [ auto_sizing, --log_to_bigquery, run, "--target_slug={{inputs.parameters.slug}}", "--dataset_id={{workflow.parameters.dataset_id}}", "--project_id={{workflow.parameters.project_id}}", "--bucket={{workflow.parameters.bucket}}", "--run-presets" ] resources: requests: memory: 10Gi # make sure there is at least 10Gb of memory available for the task limits: cpu: 4 # limit to 4 cores retryStrategy: limit: 3 # execute a container max. 3x; sometimes a container run might fail due to limited resources retryPolicy: "Always" backoff: duration: "1m" factor: 2 maxDuration: "5m" - name: export-results inputs: container: image: gcr.io/moz-fx-data-experiments/auto_sizing:latest command: [ auto_sizing, export-aggregate-results, "--project_id={{workflow.parameters.project_id}}", "--bucket={{workflow.parameters.bucket}}" ] activeDeadlineSeconds: 600 # terminate container template after 10 minutes