in azure-slurm/slurmcc/topology.py [0:0]
def check_sharp_hello(self):
"""
Executes the sharp_hello command on the first host in self.hosts and logs the output.
This method constructs a command to run the `sharp_hello` executable located in the
`sharp_cmd_path` directory on the first host in self.hosts. It then executes
this command in parallel using `slutil.srun`.
The standard output of the command is logged at the debug level. If the command
fails (i.e., the exit code is not 0), the standard error output is logged at the
error level, and the program exits with the same exit code. If the command succeeds,
a debug message is logged indicating success, and the method returns 0.
Returns:
int: 0 if the sharp_hello command passes successfully.
Raises:
SystemExit: If the sharp_hello command fails, the program exits with the
corresponding exit code.
"""
cmd = f"{self.sharp_cmd_path}sharp/bin/sharp_hello"
try:
output = slutil.srun([self.hosts[0]],cmd, partition=self.partition)
log.debug(output.stdout)
except slutil.SrunExitCodeException as e:
log.error("SHARP is disabled on cluster")
if e.stderr_content:
log.error(e.stderr_content)
log.error(e.stderr)
sys.exit(e.returncode)
except subprocesslib.TimeoutExpired:
sys.exit(1)
if output.returncode==0:
log.debug("sharp_hello command passed")
return 0