config/jobmgr/base.yaml (89 lines of code) (raw):

storage: cassandra: max_parallel_batches: 1000 max_updates_job: 10 connection: contactPoints: ["127.0.0.1"] port: 9042 consistency: LOCAL_QUORUM serialConsistency: LOCAL_SERIAL hostPolicy: TokenAwareHostPolicy # Need to increase timeout from 10s to 20s to avoid recovery code from timing out # We saw recovery code timing out when peloton was recovering from a # Cassandra latency spike issue. timeout: 20s store_name: peloton_test migrations: pkg/storage/cassandra/migrations/ use_cassandra: false db_write_concurrency: 40 job_manager: http_port: 5292 grpc_port: 5392 goal_state: job_batch_runtime_update_interval: 10s job_service_runtime_update_interval: 1s task_launcher: placement_dequeue_limit: 10 get_placements_timeout_ms: 100 task_evictor: eviction_period: 60s eviction_dequeue_limit: 100 eviction_dequeue_timeout_ms: 100 deadline: deadline_tracking_period: 30m job_service: # TODO (adityacb): Adjust this limit once we fix T1689063 and T1689077 # and have a better data model max_tasks_per_job: 100000 med_instance_count: 500 high_instance_count: 1000 low_get_workflow_events_workers: 25 med_get_workflow_events_workers: 50 high_get_workflow_events_workers: 100 enable_secrets: false thermos_executor: path: "/usr/share/aurora/bin/thermos_executor.pex" flags: "--preserve_env --nosetuid-health-checks --nosetuid --no-create-user" # Refresh AciveTaskCache every 5 min active_task_update_period: 300s # being deprecated job_runtime_calculation_via_cache: false workflow_progress_check: # check all the workflow progress every 30 min workflow_progress_check_period: 30m # if a workflow is not updated for 30min, # consider it to be stale stale_workflow_threshold: 30m election: root: "/peloton" health: heartbeat_interval: 5s metrics: runtime_metrics: enabled: true interval: 10s rate_limit: enabled: false methods: # order of the entries matters here. # a procedure will try to match each entry from top to bottom, # and the first satisfied config will be used. - name: 'peloton.api.v1alpha.job.stateless.svc.JobService:GetJob' rate: 100 burst: 100 - name: 'peloton.api.v1alpha.job.stateless.svc.JobService:Get*' rate: 100 burst: 100 - name: 'peloton.api.v1alpha.job.stateless.svc.JobService:Browse*' rate: 100 burst: 100 - name: 'peloton.api.v1alpha.job.stateless.svc.JobService:List*' rate: -1 # if either burst or rate is < 0, no rate limit # # if a user calls a method not listed in methods list, # # it would fall through to this branch # default: # rate: -1 # burst: -1 # TODO: need to find a way to auto generate the list api_lock: read_apis: - '*:Get*' - '*:Query*' - '*:List*' - '*:Browse*' - 'peloton.api.v1alpha.watch.svc.WatchService:*' write_apis: - '*:Create*' - '*:Delete*' - '*:Start*' - '*:Stop*' - '*:Restart*' - '*:Refresh*' - '*:Update*' - '*:Pause*' - '*:Resume*' - '*:Rollback*' - '*:Abort*' - '*:Replace*' - '*:Patch*'