in src/common/schedulers/slurm_commands.py [0:0]
def _parse_nodes_info(slurm_node_info: str) -> List[SlurmNode]:
"""Parse slurm node info into SlurmNode objects."""
# [ec2-user@ip-10-0-0-58 ~]$ /opt/slurm/bin/scontrol show nodes compute-dy-c5xlarge-[1-3],compute-dy-c5xlarge-50001\
# | awk 'BEGIN{{RS="\n\n" ; ORS="######\n";}} {{print}}' | grep -oP "^(NodeName=\S+)|(NodeAddr=\S+)
# |(NodeHostName=\S+)|(?<!Next)(State=\S+)|(Partitions=\S+)|(SlurmdStartTime=\S+)|(LastBusyTime=\\S+)
# |(ReservationName=\S+)|(Reason=.*)|(######)"
# NodeName=compute-dy-c5xlarge-1
# NodeAddr=1.2.3.4
# NodeHostName=compute-dy-c5xlarge-1
# State=IDLE+CLOUD+POWER
# Partitions=compute,compute2
# SlurmdStartTime=2023-01-26T09:57:15
# Reason=some reason
# ReservationName=root_1
# ######
# NodeName=compute-dy-c5xlarge-2
# NodeAddr=1.2.3.4
# NodeHostName=compute-dy-c5xlarge-2
# State=IDLE+CLOUD+POWER
# Partitions=compute,compute2
# SlurmdStartTime=2023-01-26T09:57:15
# Reason=(Code:InsufficientInstanceCapacity)Failure when resuming nodes
# ######
# NodeName=compute-dy-c5xlarge-3
# NodeAddr=1.2.3.4
# NodeHostName=compute-dy-c5xlarge-3
# State=IDLE+CLOUD+POWER
# Partitions=compute,compute2
# SlurmdStartTime=2023-01-26T09:57:15
# ######
# NodeName=compute-dy-c5xlarge-50001
# NodeAddr=1.2.3.4
# NodeHostName=compute-dy-c5xlarge-50001
# State=IDLE+CLOUD+POWER
# SlurmdStartTime=None
# ######
map_slurm_key_to_arg = {
"NodeName": "name",
"NodeAddr": "nodeaddr",
"NodeHostName": "nodehostname",
"State": "state",
"Partitions": "partitions",
"Reason": "reason",
"SlurmdStartTime": "slurmdstarttime",
"LastBusyTime": "lastbusytime",
"ReservationName": "reservation_name",
}
date_fields = ["SlurmdStartTime", "LastBusyTime"]
node_info = slurm_node_info.split("######\n")
slurm_nodes = []
for node in node_info:
lines = node.splitlines()
kwargs = {}
for line in lines:
key, value = line.split("=", 1)
if key in date_fields:
if value not in ["None", "Unknown"]:
value = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S").astimezone(tz=timezone.utc)
else:
value = None
kwargs[map_slurm_key_to_arg[key]] = value
if lines:
try:
if is_static_node(kwargs["name"]):
node = StaticNode(**kwargs)
slurm_nodes.append(node)
else:
node = DynamicNode(**kwargs)
slurm_nodes.append(node)
except InvalidNodenameError:
log.warning("Ignoring node %s because it has an invalid name", kwargs["name"])
return slurm_nodes