in torchx/schedulers/local_scheduler.py [0:0]
def close(self) -> None:
"""
terminates all procs associated with this app,
and closes any resources (e.g. log file handles)
and if log_dir has been specified,
writes a SUCCESS file indicating that the log files
have been flushed and closed and ready to read.
NOT safe to call multiple times!
"""
self.kill()
def _fmt_io_filename(std_io: Optional[BinaryIO]) -> str:
if std_io:
return std_io.name
else:
return "<CONSOLE>"
# drop a SUCCESS file in the log dir to signal that
# the log file handles have all been closed properly
# and that they can reliably be read
roles_info = {}
for role_name, replicas in self.role_replicas.items():
replicas_info = []
for replica in replicas:
replica_info = {
"replica_id": replica.replica_id,
"pid": replica.proc.pid,
"exitcode": replica.proc.returncode,
"stdout": _fmt_io_filename(replica.stdout),
"stderr": _fmt_io_filename(replica.stderr),
"error_file": replica.error_file,
}
replicas_info.append(replica_info)
roles_info[role_name] = replicas_info
app_info = {
"app_id": self.id,
"log_dir": self.log_dir,
"final_state": self.state.name,
"last_updated": self.last_updated,
"roles": roles_info,
}
info_str = json.dumps(app_info, indent=2)
with open(os.path.join(self.log_dir, "SUCCESS"), "w") as fp:
fp.write(info_str)
log.debug(f"Successfully closed app_id: {self.id}.\n{info_str}")