src/job-exporter/src/collector.py (3 lines): - line 427: # TODO: this piece of code seems not corret, gpu_mem_util is - line 656: # TODO speed this up, since this is O(n^2) - line 895: cmd_timeout = 10 # TODO 99th latency is xxx src/rest-server/src/models/v2/group.js (3 lines): - line 50: // TODO: workaround for circular dependencies, need redesign module structure - line 231: // TODO: replace updateGroup2ExnternalMapper - line 363: // TODO: update grouplist at initialization subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/SelectionManager.java (2 lines): - line 156: //TODO: Node Gpu policy filter the nodes; - line 160: //TODO: apply other node selection policy in the future; deployment/configCmd.py (2 lines): - line 34: # TODO on premise, using ip as "nodename" - line 90: # TODO we some k8s template still using the 'dashboard_host' subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/ApplicationMaster.java (2 lines): - line 577: // TODO: Update TaskStatus.ContainerIsDecommissioning - line 1158: // TODO: Implement Service Rolling Upgrade contrib/autoscaler/scaler.py (1 line): - line 98: # TODO: tell openpai / HiveD to keep idle when scaling?? src/rest-server/src/middlewares/v2/hived.js (1 line): - line 28: // TODO: make it a cluster-wise config deployment/layoutCmd.py (1 line): - line 35: # TODO nodename == hostname on aks src/utilities/doc_checker.py (1 line): - line 25: # TODO check remot links subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/WebServer.java (1 line): - line 73: // TODO: Only Restart WebServer instead of exit whole process and Restart by external system. src/webportal/src/app/cluster-view/services/service-table.component.ejs (1 line): - line 69: // TODO: Insert code to detect warnings. src/webportal/src/app/job/job-view/fabric/job-detail/components/clone-button.jsx (1 line): - line 28: // TODO: align same format of jobname with each submit ways deployment/paiLibrary/common/docker_handler.py (1 line): - line 27: # TODO: Change the command with linux_shell.execute_shell to docker lib. subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/StatusManager.java (1 line): - line 274: // TODO: Store AttemptId in AMStatus, and double check it before pushStatus subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/service/Service.java (1 line): - line 119: // TODO: Only Restart Service instead of exit whole process and Restart by external system. src/webportal/src/app/home/home/utilization-chart.jsx (1 line): - line 81: // TODO: hardcode here, change it when has better solution contrib/autoscaler/cloud_monitors.py (1 line): - line 35: # TODO: + azure information src/webportal/src/app/job-submission/components/task-roles.jsx (1 line): - line 59: // TODO: use other policy to update index src/utilities/gen-amtool-config.py (1 line): - line 54: host = alert_manager_hosts[0] # TODO not sure if alert manager with HA workds this way