in azure-slurm/slurmcc/topology.py [0:0]
def retrieve_guids(self) -> None:
"""
Retrieve GUIDs (Globally Unique Identifiers) from the hosts.
This method runs a command on self.hosts to retrieve the Port GUIDs
from the InfiniBand status. The command extracts the GUIDs using a series
of shell commands and processes the output to map each GUID to its
corresponding host.
The GUIDs are stored in the `guid_to_host_map` attribute.
"""
cmd = (
'ibstatus | grep mlx5_ib | cut -d" " -f3 | '
'xargs -I% ibstat "%" | grep "Port GUID" | cut -d: -f2 | '
'while IFS= read -r line; do echo \"$(hostname): $line\"; done'
)
try:
output = slutil.srun(self.hosts, cmd, shell=True, partition=self.partition)
except slutil.SrunExitCodeException as e:
log.error("Error running retrieve_guids command on hosts")
if e.stderr_content:
log.error(e.stderr_content)
log.error(e.stderr)
sys.exit(e.returncode)
except subprocesslib.TimeoutExpired:
sys.exit(1)
lines=output.stdout.split('\n')[:-1]
for line in lines:
# Querying GUIDs from ibstat will have pattern 0x0099999999999999,
# but Sharp will return 0x99999999999999
# - So we need to remove the leading 00 after 0x
node,guid = line.split(':')
self.guid_to_host_map[guid.replace('0x00', '0x').strip()]=node.strip()