in torchx/apps/utils/process_monitor.py [0:0]
def main(argv: List[str]) -> None:
args = parse_args(argv)
start_time = time.time()
if args.start_on_file:
fs, path = fsspec.core.url_to_fs(args.start_on_file)
while True:
if fs.exists(path):
print(f"{args.start_on_file} exists, starting process...")
break
if args.timeout:
elapsed_time = time.time() - start_time
if elapsed_time > args.timeout:
print("reached timeout before launching, terminating...")
sys.exit(TIMEOUT_EXIT_CODE)
time.sleep(args.poll_rate)
p = subprocess.Popen([args.entrypoint] + args.args)
print(f"started process {p.pid}")
while True:
try:
p.wait(args.poll_rate)
print(f"process exited with exit code {p.returncode}")
sys.exit(p.returncode)
except subprocess.TimeoutExpired:
if args.timeout:
elapsed_time = time.time() - start_time
if elapsed_time > args.timeout:
print("reached timeout, terminating...")
break
if args.exit_on_file:
fs, path = fsspec.core.url_to_fs(args.exit_on_file)
if fs.exists(path):
print(f"{args.exit_on_file} exists, terminating...")
break
p.terminate()
print("issued terminate, waiting for exit...")
try:
p.wait(args.kill_timeout)
except subprocess.TimeoutExpired:
print("reached safe termination timeout, killing...")
p.kill()
p.wait()
print(f"process exited with exit code {p.returncode}")
sys.exit(p.returncode)