in torchx/schedulers/local_scheduler.py [0:0]
def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
if app_id not in self._apps:
return None
local_app = self._apps[app_id]
structured_error_msg = local_app.get_structured_error_msg()
# check if the app is known to have finished
if is_terminal(local_app.state):
state = local_app.state
else:
running = False
failed = False
for replicas in local_app.role_replicas.values():
for r in replicas:
running |= r.is_alive()
failed |= r.failed()
if running:
state = AppState.RUNNING
elif failed:
state = AppState.FAILED
else:
state = AppState.SUCCEEDED
local_app.set_state(state)
if is_terminal(local_app.state):
local_app.close()
resp = DescribeAppResponse()
resp.app_id = app_id
resp.structured_error_msg = structured_error_msg
resp.state = state
resp.num_restarts = 0
resp.ui_url = f"file://{local_app.log_dir}"
return resp