in torchx/schedulers/kubernetes_scheduler.py [0:0]
def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
namespace, name = app_id.split(":")
roles = {}
roles_statuses = {}
resp = self._custom_objects_api().get_namespaced_custom_object_status(
group="batch.volcano.sh",
version="v1alpha1",
namespace=namespace,
plural="jobs",
name=name,
)
status = resp.get("status")
if status:
state_str = status["state"]["phase"]
app_state = JOB_STATE[state_str]
TASK_STATUS_COUNT = "taskStatusCount"
if TASK_STATUS_COUNT in status:
for name, status in status[TASK_STATUS_COUNT].items():
role, _, idx = name.rpartition("-")
state_str = next(iter(status["phase"].keys()))
state = TASK_STATE[state_str]
if role not in roles:
roles[role] = Role(name=role, num_replicas=0, image="")
roles_statuses[role] = RoleStatus(role, [])
roles[role].num_replicas += 1
roles_statuses[role].replicas.append(
ReplicaStatus(id=int(idx), role=role, state=state, hostname="")
)
else:
app_state = AppState.UNKNOWN
return DescribeAppResponse(
app_id=app_id,
roles=list(roles.values()),
roles_statuses=list(roles_statuses.values()),
state=app_state,
)