in src/slurm_plugin/instance_manager.py [0:0]
def _parse_nodes_resume_list(self, node_list: List[str]) -> defaultdict[str, defaultdict[str, List[str]]]:
"""
Parse out which launch configurations (queue/compute resource) are requested by slurm nodes from NodeName.
Valid NodeName format: {queue_name}-{st/dy}-{compute_resource_name}-{number}
Sample NodeName: queue1-st-computeres1-2
"""
nodes_to_launch = defaultdict(lambda: defaultdict(list))
logger.debug("Nodes already assigned to running instances: %s", self.nodes_assigned_to_instances)
for node in node_list:
try:
queue_name, node_type, compute_resource_name = parse_nodename(node)
if node in self.nodes_assigned_to_instances.get(queue_name, {}).get(compute_resource_name, []):
# skip node for which there is already an instance assigned (oversubscribe case)
logger.info("Discarding NodeName already assigned to running instance: %s", node)
else:
nodes_to_launch[queue_name][compute_resource_name].append(node)
except (InvalidNodenameError, KeyError):
logger.warning("Discarding NodeName with invalid format: %s", node)
self._update_failed_nodes({node}, "InvalidNodenameError")
logger.debug("Launch configuration requested by nodes = %s", nodes_to_launch)
return nodes_to_launch