pkg/hostmgr/p2k/hostcache/hostcache.go (12 lines): - line 190: // TODO: implement defrag/firstfit ranker, for now default to first fit - line 236: // TODO: metrics - line 266: // TODO: metrics - line 270: // TODO: remove held hosts. - line 283: // TODO: do this for slack resources too. - line 317: // TODO: this should return an error. But keep it the same way as in - line 431: // TODO: metrics - line 468: // TODO: evaluate locking strategy - line 489: // TODO: evaluate locking strategy - line 515: // TODO: figure out how to differemtiate mesos/k8s hosts, - line 705: // TODO: Implement - line 727: // TODO: populate capacity and version correctly pkg/jobmgr/cached/update.go (11 lines): - line 331: // TODO: do recovery automatically when read state - line 398: // TODO: do recovery automatically when read state - line 427: // TODO: do recovery automatically when read state - line 451: // TODO: do recovery automatically when read state - line 546: // TODO: Add error handling, if accurate persistence of workflow events - line 630: // TODO: optimize the recover path, since it needs to read from task store - line 654: // TODO: do recovery automatically when read state - line 678: // TODO: do recovery automatically when read state - line 935: // TODO: find the right place to put the func - line 944: // TODO: figure out if cache can be used to read task runtime - line 1114: // TODO what happens if the update does not change the instance pkg/storage/cassandra/store.go (9 lines): - line 204: // TODO: Break this up into different files (and or structs) that implement - line 458: // TODO: investigate if there are any golang library that can build lucene query - line 494: // TODO (adityacb): change this once we have query spec support - line 547: // TODO (adityacb): remove artificially enforcing default time range for - line 700: // TODO (chunyang.shen): use job/task cache to get JobConfig T1760469 - line 806: // TODO: remove the line after all tasks have desired mesos task id - line 1651: // TODO: This implementation is not perfect, as if it's getting an transient - line 1911: // TODO Remove this when QueryJobs() uses ORM. - line 2413: // TODO determine if this function should be part of storage or api handler. tools/minicluster/minicluster.py (7 lines): - line 58: # TODO: Start all apps at the same time. - line 140: # TODO: Use this namespace. - line 148: # TODO: Fix race condition between the find_free_port() and the process - line 167: # TODO: Save those to local disk, or print them to stdout. - line 383: # TODO: Enable following flags for fully authentication. - line 519: # TODO: move docker run logic into a common function for all - line 659: # TODO: It's very implicit that the first port is the HTTP pkg/resmgr/respool/respoolsvc/handler.go (6 lines): - line 137: // TODO Handle parent of the new_resource_pool_config - line 149: // TODO differentiate between n/w errors vs other data errors - line 196: //TODO temporary solution to unblock, - line 451: // TODO differentiate between n/w errors vs other data errors - line 471: // TODO differentiate between n/w errors - line 597: // TODO use query request to read filters pkg/hostmgr/p2k/hostcache/hostsummary/basehostsummary.go (6 lines): - line 58: // TODO: Make this configurable (T3312219). - line 127: // TODO: make the initial port range configs. - line 174: // TODO: Handle oversubscription - line 305: // TODO: replace this with models.HostResources. - line 324: // TODO: lease may be expired already. - line 548: // TODO: Match ports resources. pkg/hostmgr/offer/handler.go (6 lines): - line 211: //TODO: refactor OfferPruner as a background worker - line 303: // TODO: We should start a study of https://github.com/uber-common/inject - line 345: // TODO: Handle inverse offers from Mesos - line 367: // TODO: handle rescind inverse offer events - line 562: // TODO: add error handling - line 573: // TODO: add error handling protobuf/peloton/private/hostmgr/hostsvc/hostsvc.proto (6 lines): - line 346: * TODO: figure out a better name for InternalHostService - line 412: // TODO move out to separate service if scope widens - line 810: // TODO: Add errors that could fail a reserve resources request - line 818: // TODO: Add errors that could fail a unreserve resources request - line 826: // TODO: Add errors that could fail a create volumes request - line 834: // TODO: Add errors that could fail a destroy volumes request pkg/hostmgr/p2k/plugins/k8s/k8s.go (5 lines): - line 170: // TODO: catch and aggregate erros here - line 256: // TODO: Generate an alert. - line 274: // TODO: Generate an alert. - line 292: // TODO: Generate an alert. - line 340: // TODO: see if you can delete the pods actively here and get their pkg/resmgr/queue/multi_level_list.go (5 lines): - line 120: // TODO: We need to optimize the locking - line 121: // TODO: Need to take RLock on Map and Exclusive lock on individual list - line 144: // TODO: We need to optimize the locking - line 145: // TODO: Need to take RLock on Map and Excusive lock on individual list - line 343: // TODO: we also can use heap for index to scan it Which can help getting the highest level in in O(1) cmd/hostmgr/main.go (5 lines): - line 393: // TODO: Skip it when k8s is enabled. - line 430: // TODO: update Mesos url when leading mesos master changes - line 455: // TODO: Delete the outbounds from hostmgr to resmgr after switch - line 645: // TODO: start MesosPlugin after it's implemented. - line 673: // TODO: Refactor event stream handler and move it out of offer package pkg/jobmgr/cached/job.go (4 lines): - line 55: // TODO there a lot of methods in this interface. To determine if - line 143: // TODO: no config update should go through this API, divide this API into - line 2405: // TODO: move this under update cache object - line 3092: // TODO: check entity version and bump the version pkg/hostmgr/hostpool/resizer/resizer.go (4 lines): - line 159: // TODO: Get the appropriate source/dest pools to move hosts to/from. - line 216: // TODO: Get desired hosts. - line 224: // TODO: check for min pool size here once it is available as part - line 246: // TODO: handle error gracefully. We would need desired host pkg/jobmgr/task/event/update.go (4 lines): - line 120: // TODO: add config for BucketEventProcessor - line 251: // TODO p2k: can we build TerminationStatus from PodEvent? - line 475: // TODO p2k: verify v1 pod id in taskInfo - line 678: // TODO p2k: not sure which kubelet reason matches this. protobuf/peloton/api/v0/respool/respool.proto (4 lines): - line 99: // TODO use peloton.Changelog - line 196: //TODO: parent duplicated from ResourcePoolConfig - line 372: // TODO Filters - line 378: // TODO add error types pkg/hostmgr/p2k/hostcache/hostsummary/kubelethostsummary.go (4 lines): - line 71: // TODO: how can we differentiate sync with incremental changes? - line 152: // TODO validate slack too. - line 181: // TODO: populate slackAllocated from this map. - line 210: // TODO: do this for both slack and non-slack. pkg/hostmgr/p2k/plugins/mesos/mesos.go (4 lines): - line 128: //TODO: remove comment after MesosManager takes over mesos callback - line 418: // TODO: fill in implementation - line 423: // TODO: add metrics similar to what offerpool has - line 430: // TODO: extract slack and non slack resources from offer manager. tools/deploy/app.py (4 lines): - line 99: # TODO: query Peloton app endpoint to determine leader/follower role - line 171: # TODO: fix Peloton code to only take self.cluster.mesos_zk_path - line 553: # TODO: query Peloton app endpoint to find role - line 721: # TODO: find the leader/follower role of each app instance pkg/jobmgr/util/handler/thermos.go (4 lines): - line 319: // TODO: Validate parameters are set in ContainerSpec - line 320: // TODO: Right now we are overriding all the custom docker - line 475: // TODO: Fill MaxTaskFailures - line 630: // TODO: Fill Cluster pkg/placement/plugins/strategy.go (4 lines): - line 69: // TODO: Constraint - line 72: // TODO: RankingHint - line 93: // TODO: Remove this, it should definitely not be here. - line 97: // TODO: Remove this, it should definitely not be here. pkg/resmgr/respool/restree.go (4 lines): - line 62: // TODO: Redo package imports, such that a method Calculator.SuggestRefresh - line 129: // TODO cleanup the queues? - line 190: // TODO: We need to detect cycle here. - line 297: // TODO update only if leaf node ??? pkg/jobmgr/cached/job_factory.go (4 lines): - line 201: //TODO Refactor to remove the metrics loop into a separate component. - line 367: // TODO add metric for listener execution latency - line 374: // TODO add metric for listener execution latency - line 395: // TODO add metric for listener execution latency pkg/hostmgr/server.go (3 lines): - line 44: // TODO: Make these backoff configurations. - line 61: // TODO: move Mesos related fields into hostmgr.ServiceHandler - line 251: // TODO: Consider start offer handler with event stream pkg/jobmgr/jobsvc/stateless/handler.go (3 lines): - line 296: // TODO: handle secretes - line 1841: // TODO: set the rest of the fields in result - line 2081: // TODO: remove this function once eventstream is enabled in RM pkg/common/metrics/config.go (3 lines): - line 45: // TODO: reuse the global var name constant CLUSTER - line 78: // TODO: move mux setup out of metrics initialization - line 171: // TODO: make this healthcheck live, and check some kind of internal health? pkg/jobmgr/goalstate/update_run.go (3 lines): - line 55: // TODO: remove after recovery is done when reading state - line 84: // TODO: use job SLA if GetMaxFailureInstances is not set - line 171: // TODO (varung): pkg/resmgr/queue/priority.go (3 lines): - line 58: // TODO: optimize the write lock here with potential read lock - line 65: // TODO: Need to add test case for this case - line 89: // TODO: optimize the write lock here with potential read lock pkg/jobmgr/jobsvc/handler.go (3 lines): - line 1225: // TODO: Remove this restriction after authN/authZ is enabled - line 1244: // TODO: Remove this after we have separate API - line 1305: // TODO: cli send nil ranges when not specified pkg/resmgr/task/tracker.go (3 lines): - line 38: // TODO: Get rid of peloton-task-id from tracker - line 92: // TODO: Get rid of peloton-task-id from tracker - line 108: // TODO: Move `placements` and `orphanTasks` out of tracker pkg/archiver/engine/engine.go (3 lines): - line 177: // TODO: remove this delay once we move to API server - line 325: // TODO: have a reasonable threshold for tolerating such failures - line 347: // TODO (adityacb) pkg/jobmgr/goalstate/task_stop.go (3 lines): - line 76: // TODO: Due to missing atomic updates in DB, there is a race - line 100: // TODO: As of now this function supports one task - line 135: // TODO: this is write after read, should use optimistic concurrency control pkg/aurorabridge/event_publisher.go (3 lines): - line 216: // TODO (varung): Add pod filter to watch on desired labels - line 230: // TODO (varung): Explore the option of exponential backoff - line 266: // TODO (varung): in a lost leadership scenario, receive pod will block pkg/hostmgr/p2k/hostmgrsvc/handler.go (3 lines): - line 84: // TODO: call v0 AcquireHostOffers API for mesos, translate that result - line 120: // TODO: call v0 LaunchTasks API for mesos, translate that result - line 282: // TODO: kill pods in parallel. pkg/hostmgr/mesos/driver.go (3 lines): - line 83: // TODO: load framework ID from ZK or DB - line 144: // TODO: This cache variable was never used? - line 238: // TODO: Require consistent framework once all clusters are rebuilt. pkg/hostmgr/factory/task/task_builder.go (3 lines): - line 115: // TODO: Look into whether we need to prefer reserved (non-* role) - line 484: // TODO: Consider add protocol, visibility and labels. - line 495: // TODO: pkg/jobmgr/task/placement/placement.go (3 lines): - line 260: // TODO: unify this call with p.taskConfigV2Ops.GetTaskConfig(). - line 462: // TODO: Notify resmgr that the state of this task - line 650: // TODO: turn getplacement metric into gauge so we can pkg/hostmgr/goalstate/host.go (3 lines): - line 43: // TODO: remove once host state & goalState backed by host cache - line 64: // TODO: remove RequeueAction once host state & goalState - line 82: // TODO: remove RequeueAction once host state & goalState pkg/common/util/util.go (3 lines): - line 268: // TODO: add other building functions when needed - line 277: // TODO: adjust in case there are additional peloton states - line 449: // TODO: deprecate the check once mesos task id migration is complete from pkg/hostmgr/mesos/yarpc/transport/mhttp/outbound.go (3 lines): - line 18: // TODO: add an option to http outbound so that we can disable the - line 113: // TODO: Use option pattern with varargs instead - line 216: // TODO Behavior for 300-range status codes is undefined pkg/placement/plugins/helpers.go (3 lines): - line 41: // TODO: This is ok for now since this is the only place getting constraint - line 63: // TODO: Need a different way to annotate required - line 97: // TODO: It assumes PlacementNeeds.Constraint is *peloton_api_v0_task.Constraint, pkg/hostmgr/mesos/detector.go (2 lines): - line 99: // TODO: handle `Done()` from `master` so we know that underlying - line 101: // TODO: consider whether we need to Cancel (aka stop) this detector. pkg/storage/objects/host_infos.go (2 lines): - line 147: // TODO: after merging with host cache, concurrency control should be achieved - line 160: // TODO: Remove singleton once we move to host cache for concurrency control pkg/resmgr/handler.go (2 lines): - line 471: // TODO: handle the dequeue errors better - line 783: // TODO: We probably want to terminate all the tasks in gang pkg/jobmgr/jobsvc/metrics.go (2 lines): - line 59: // TODO: find a better way of organizing metrics per package - line 71: // TODO: find a better way of organizing metrics per package so we pkg/hostmgr/p2k/scalar/host.go (2 lines): - line 101: // TODO: create podMap (map of podID to resource). - line 151: // TODO: make this an interface with a noop impl for Mesos. pkg/hostmgr/mesos/yarpc/encoding/mpb/inbound.go (2 lines): - line 76: // TODO: Decode the nested event object using reflect.MethodByName - line 82: // TODO: make Request.Body in YARPC to be interface{} pkg/storage/interfaces.go (2 lines): - line 51: // TODO: Move all arguments from proto pointers to golang data types - line 62: // TODO: Move all arguments from proto pointers to golang data types pkg/common/async/queue.go (2 lines): - line 36: // TODO: This queue may be changed dramatically going forward, as the main - line 40: // TODO: Consider using circular buffer, if memory overhead can be lowered. cmd/jobmgr/main.go (2 lines): - line 187: // TODO: remove this flag and all related code after - line 674: common.PelotonResourceManager, // TODO: to be removed protobuf/peloton/api/v1alpha/pod/pod.proto (2 lines): - line 680: // TODO Avoid leaking job abstractions into public pod APIs. - line 685: // TODO Avoid leaking job abstractions into public pod APIs. pkg/jobmgr/cached/task.go (2 lines): - line 151: // TODO: remove this check, post mesostaskID migration. - line 162: // TODO: remove prevMesosTaskID len check post mesostaskID migration pkg/hostmgr/mesos/yarpc/transport/mhttp/handler.go (2 lines): - line 56: // TODO: make request.Validator public in yarpc - line 68: // TODO: capture and handle panic pkg/jobmgr/goalstate/job.go (2 lines): - line 104: // TODO: revisit the rules after new job kill - line 257: // TODO: after all job kill is controlled by job state version and desired state version, pkg/jobmgr/cached/workflow_strategy.go (2 lines): - line 47: // TODO: now a task can both get true for IsInstanceInProgress and - line 253: // TODO: reuse the function in jobmgr/util, now it would create import cycle. config/jobmgr/base.yaml (2 lines): - line 36: # TODO (adityacb): Adjust this limit once we fix T1689063 and T1689077 - line 94: # TODO: need to find a way to auto generate the list protobuf/peloton/api/v1alpha/pod/svc/pod_svc.proto (2 lines): - line 115: // TODO: distinguish files and directories in the sandbox - line 199: // TODO move to private job manager APIs. pkg/jobmgr/goalstate/update_actions.go (2 lines): - line 107: // TODO: remove after recovery is done when reading state - line 211: // TODO: remove after recovery is done when reading state pkg/hostmgr/p2k/scalar/pod.go (2 lines): - line 119: // TODO: missing ports and termination reason. - line 147: // TODO: Reason pkg/aurorabridge/handler.go (2 lines): - line 1329: "request": request, // TODO (varung): remove post PRR or as necessary - line 2007: // TODO: To be deprecated in favor of getJobCacheFromJobKey. pkg/jobmgr/goalstate/task.go (2 lines): - line 149: // TODO @avyas: remove once we have the longer term fix for this. - line 294: // TODO: remove goalState.MesosTaskID check after all tasks protobuf/peloton/api/v1alpha/job/stateless/svc/stateless_svc.proto (2 lines): - line 561: // TODO find the appropriate service to put this method in. - line 579: // TODO move to private job manager APIs. pkg/jobmgr/tasksvc/handler.go (2 lines): - line 1390: // TODO: remove this function once eventstream is enabled in RM - line 1504: // TODO: remove this function once eventstream is enabled in RM protobuf/peloton/api/v0/task/task.proto (2 lines): - line 344: // TODO: We need to correct the numbering - line 597: // TODO add reason tools/deploy/cluster.py (2 lines): - line 15: # TODO: use a separate binary for DB migration - line 37: # TODO: Add integration tests here pkg/hostmgr/hostpool/manager/manager.go (2 lines): - line 90: // TODO: Use new host cache once it merges with agent map. - line 302: // TODO: Add more implementation after required hostInfo store change is done. pkg/jobmgr/jobsvc/private/handler.go (1 line): - line 440: // TODO: set the rest of the fields in result pkg/resmgr/common/config.go (1 line): - line 20: // TODO merge resmgr config to common pkg/jobmgr/goalstate/driver.go (1 line): - line 408: // TODO find the right place to run recovery in job manager. pkg/jobmgr/cached/listener.go (1 line): - line 58: // TODO Remove once batch moves to v1 alpha apis tools/deploy/aurora/client.py (1 line): - line 127: TODO: Use @async.context here to create AuroraClient asynchronously pkg/hostmgr/mesos/yarpc/transport/mhttp/inbound.go (1 line): - line 122: } // TODO: Determine whether we need check else case. pkg/jobmgr/task/activermtask/active_rm_tasks.go (1 line): - line 83: // TODO: resmgrsvc.GetActiveTasksRequest.States takes a slice of TaskState instead of string cmd/cli/main.go (1 line): - line 52: // TODO: deprecate jobMgrURL/resMgrURL/hostMgrURL once we fix minicluster container network pkg/jobmgr/updatesvc/handler.go (1 line): - line 437: // TODO: what if the workflow in job is not what is intended to be aborted pkg/storage/cassandra/impl/resultset.go (1 line): - line 68: // TODO: slu figure out if instrumentation can be done using open source pkg/jobmgr/job/config/validate.go (1 line): - line 208: //TODO: uncomment the following once all Peloton clients have been pkg/resmgr/task/rmtask.go (1 line): - line 732: // TODO : Commenting it for now to not publish yet, Until we have solution for pkg/common/deadline_queue/priority_queue.go (1 line): - line 53: // TODO: Down-size if len(pq) < cap(pq) / 2. pkg/common/v1alpha/eventstream/client.go (1 line): - line 33: // TODO: move these into config, if necessary pkg/jobmgr/goalstate/config.go (1 line): - line 39: // TODO determine the correct value of the number of pkg/hostmgr/p2k/plugins/mesos/offer.go (1 line): - line 181: // TODO: separate slack and non slack available resources. pkg/jobmgr/task/lifecyclemgr/v1_lifecyclemgr.go (1 line): - line 104: // TODO: peloton system labels contain invalid characters for labels in pkg/common/backoff/policy.go (1 line): - line 71: // TODO: add backoff into retry. pkg/hostmgr/goalstate/host_actions.go (1 line): - line 43: // TODO: remove once host state & goalState backed by host cache cmd/placement/main.go (1 line): - line 478: // TODO avyas check mimir concurrency parameters config/migratedb/base.yaml (1 line): - line 3: # FIXME: need to increase batch size limit dynamically in cassandra (T968823) pkg/resmgr/task/reconciler.go (1 line): - line 58: // TODO do something useful with this pkg/storage/objects/store.go (1 line): - line 54: // TODO: Load up all objects automatically instead of explicitly adding pkg/common/async/pool.go (1 line): - line 62: // TODO: Take an context argument that will be associated to the job. That way pkg/jobmgr/goalstate/task_terminated_retry.go (1 line): - line 26: // TODO: use jobFactory.AddJob after GetJob and AddJob get cleaned up pkg/resmgr/respool/admission.go (1 line): - line 343: // TODO this should enqueue at the head of the queue pkg/hostmgr/offer/offerpool/pool.go (1 line): - line 96: // TODO: Add following API for viewing offers, and optionally expose pkg/common/eventstream/client.go (1 line): - line 32: // TODO: move these into config, if necessary pkg/hostmgr/host/drainer/drainer.go (1 line): - line 303: // TODO: replace with read from host cache config/hostmgr/base.yaml (1 line): - line 71: # TODO : add roles for other components pkg/resmgr/task/scheduler.go (1 line): - line 140: // TODO: we need to remove ticker and use chanel for signaling pkg/storage/objects/pod_events.go (1 line): - line 165: // TODO: remove the line after all tasks have desired mesos task id protobuf/peloton/api/v1alpha/respool/respool.proto (1 line): - line 180: //TODO: parent duplicated from ResourcePoolConfig pkg/common/background/work.go (1 line): - line 173: // TODO: Make this non-blocking. pkg/hostmgr/handler.go (1 line): - line 1025: // TODO: can release only the failed tasks, for now it is ok, since pkg/hostmgr/p2k/podeventmanager/podeventmanager.go (1 line): - line 30: // TODO: we need this flag because v1alpha event stream is only tested with pkg/jobmgr/logmanager/logmanager.go (1 line): - line 30: // TODO: (varung) Move this component to HostManger pkg/placement/plugins/mimir/strategy.go (1 line): - line 48: // TODO: mimir plugin should use plugins.Config as what batch plugin does, pkg/placement/plugins/v0/needs.go (1 line): - line 57: // TODO: It assumes PlacementNeeds.Constraint is pkg/jobmgr/goalstate/update.go (1 line): - line 170: // TODO What if instancesTotal has same length as instancesDone, but has pkg/hostmgr/p2k/plugins/k8s/util.go (1 line): - line 53: // TODO: pkg/jobmgr/task/lifecyclemgr/v0_lifecyclemgr.go (1 line): - line 495: // TODO: remove this once all Peloton clients have been modified pkg/placement/plugins/batch/strategy.go (1 line): - line 132: // TODO (pourchet): Is the above note a bug? Or is it deliberate? pkg/jobmgr/goalstate/task_start.go (1 line): - line 87: // TODO: Investigate how to create proper gangs for scheduling (currently, task are treat independently) tools/minicluster/config.yaml (1 line): - line 1: # TODO: make the configs hierarchical for each component pkg/common/leader/id.go (1 line): - line 48: // TODO: Populate the version in ID pkg/jobmgr/util/task/runtime.go (1 line): - line 121: // TODO: deprecate the check once mesos task id migration is complete protobuf/peloton/api/v0/respool/svc/respool_svc.proto (1 line): - line 176: // TODO Filters pkg/resmgr/server.go (1 line): - line 52: // TODO move these to use ServerProcess protobuf/peloton/private/resmgr/taskqueue/taskqueue.proto (1 line): - line 30: // TODO: Add error handling here protobuf/peloton/api/v1alpha/respool/svc/respool_svc.proto (1 line): - line 104: // TODO Filters pkg/common/constraints/label_values.go (1 line): - line 90: // TODO: Add support for range attributes. pkg/common/async/job.go (1 line): - line 22: // TODO: Error result? protobuf/peloton/api/v0/task/svc/task_svc.proto (1 line): - line 272: // TODO: distinguish files and directories in the sandbox pkg/storage/cassandra/models.go (1 line): - line 272: // TODO: Add versioning. pkg/storage/cassandra/impl/decorator.go (1 line): - line 37: /* TODO: slu figure out if it is possible to keep the instrumentation not using uber go-common pkg/placement/engine.go (1 line): - line 203: // TODO: Dynamically adjust this based on some signal pkg/placement/offers/v0/service.go (1 line): - line 182: // TODO: Differentiate known error types by metrics and logs. pkg/hostmgr/p2k/hostcache/hostsummary/mesoshostsummary.go (1 line): - line 50: // TODO: figure out how to handle available/allocated resources pkg/jobmgr/task/evictor/evictor.go (1 line): - line 239: // TODO: remove the below translation once job manager polls host manager pkg/cli/job_actions.go (1 line): - line 82: // TODO remove this once respool is moved out of jobconfig pkg/common/eventstream/handler.go (1 line): - line 48: // TODO: we probably should keep the recently acked events in a LRU cache. protobuf/peloton/api/v1alpha/pod/apachemesos/apachemesos.proto (1 line): - line 105: // TODO Move to PodSpec once we determine how the network namespace in pkg/storage/cassandra/impl/store.go (1 line): - line 109: if stmt.IsCAS() { // TODO: performance