config/jobmgr/base.yaml (89 lines of code) (raw):
storage:
cassandra:
max_parallel_batches: 1000
max_updates_job: 10
connection:
contactPoints: ["127.0.0.1"]
port: 9042
consistency: LOCAL_QUORUM
serialConsistency: LOCAL_SERIAL
hostPolicy: TokenAwareHostPolicy
# Need to increase timeout from 10s to 20s to avoid recovery code from timing out
# We saw recovery code timing out when peloton was recovering from a
# Cassandra latency spike issue.
timeout: 20s
store_name: peloton_test
migrations: pkg/storage/cassandra/migrations/
use_cassandra: false
db_write_concurrency: 40
job_manager:
http_port: 5292
grpc_port: 5392
goal_state:
job_batch_runtime_update_interval: 10s
job_service_runtime_update_interval: 1s
task_launcher:
placement_dequeue_limit: 10
get_placements_timeout_ms: 100
task_evictor:
eviction_period: 60s
eviction_dequeue_limit: 100
eviction_dequeue_timeout_ms: 100
deadline:
deadline_tracking_period: 30m
job_service:
# TODO (adityacb): Adjust this limit once we fix T1689063 and T1689077
# and have a better data model
max_tasks_per_job: 100000
med_instance_count: 500
high_instance_count: 1000
low_get_workflow_events_workers: 25
med_get_workflow_events_workers: 50
high_get_workflow_events_workers: 100
enable_secrets: false
thermos_executor:
path: "/usr/share/aurora/bin/thermos_executor.pex"
flags: "--preserve_env --nosetuid-health-checks --nosetuid --no-create-user"
# Refresh AciveTaskCache every 5 min
active_task_update_period: 300s
# being deprecated
job_runtime_calculation_via_cache: false
workflow_progress_check:
# check all the workflow progress every 30 min
workflow_progress_check_period: 30m
# if a workflow is not updated for 30min,
# consider it to be stale
stale_workflow_threshold: 30m
election:
root: "/peloton"
health:
heartbeat_interval: 5s
metrics:
runtime_metrics:
enabled: true
interval: 10s
rate_limit:
enabled: false
methods:
# order of the entries matters here.
# a procedure will try to match each entry from top to bottom,
# and the first satisfied config will be used.
- name: 'peloton.api.v1alpha.job.stateless.svc.JobService:GetJob'
rate: 100
burst: 100
- name: 'peloton.api.v1alpha.job.stateless.svc.JobService:Get*'
rate: 100
burst: 100
- name: 'peloton.api.v1alpha.job.stateless.svc.JobService:Browse*'
rate: 100
burst: 100
- name: 'peloton.api.v1alpha.job.stateless.svc.JobService:List*'
rate: -1 # if either burst or rate is < 0, no rate limit
# # if a user calls a method not listed in methods list,
# # it would fall through to this branch
# default:
# rate: -1
# burst: -1
# TODO: need to find a way to auto generate the list
api_lock:
read_apis:
- '*:Get*'
- '*:Query*'
- '*:List*'
- '*:Browse*'
- 'peloton.api.v1alpha.watch.svc.WatchService:*'
write_apis:
- '*:Create*'
- '*:Delete*'
- '*:Start*'
- '*:Stop*'
- '*:Restart*'
- '*:Refresh*'
- '*:Update*'
- '*:Pause*'
- '*:Resume*'
- '*:Rollback*'
- '*:Abort*'
- '*:Replace*'
- '*:Patch*'