in torchx/schedulers/local_scheduler.py [0:0]
def kill(self) -> None:
"""
terminates all procs associated with this app,
and closes any resources (e.g. log file handles)
safe to call multiple times
The termination consists of two stages:
1. Send SIGTERM signal to the child processes and wait for them to terminate.
2. If timeout passed and child processes are still alive, terminate them via SIGKILL.
"""
# Stage #1: SIGTERM
for replicas in self.role_replicas.values():
for r in replicas:
r.terminate()
timeout = 10 # seconds
end = time.monotonic() + timeout
for replicas in self.role_replicas.values():
for r in replicas:
time_to_wait = end - time.monotonic()
if time_to_wait <= 0:
break
try:
r.proc.wait(time_to_wait)
except subprocess.TimeoutExpired:
# Ignore the timeout expired exception, since
# the child process will be forcefully terminated via SIGKILL
pass
# Stage #2: SIGKILL
for replicas in self.role_replicas.values():
for r in replicas:
if r.proc.poll() is None:
r.proc.kill()
for replicas in self.role_replicas.values():
for r in replicas:
r.proc.wait()
r.terminate()