in src/sagemaker_training/torch_distributed.py [0:0]
def run(self, capture_error=True, wait=True):
"""
Run the process.
Args:
capture_error (bool): A boolean indicating whether to direct stderr to a stream
that can later be read. Defaults to True.
Returns:
process (subprocess.Popen): The spawned process.
"""
cmd = self._create_command()
logging_config.log_script_invocation(cmd, self._env_vars)
if wait:
process_spawned = process.check_error(
cmd,
errors.ExecuteUserScriptError,
self._processes_per_host,
capture_error=capture_error,
cwd=environment.code_dir,
)
else:
process_spawned = process.create(
cmd,
errors.ExecuteUserScriptError,
self._processes_per_host,
capture_error=capture_error,
cwd=environment.code_dir,
)
return process_spawned