in community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py [0:0]
def _allocate_nodes_to_placements(nodes: List[str], excl_job_id:Optional[int], lkp: util.Lookup) -> List[PlacementAndNodes]:
# canned result for no placement policies created
no_pp = [PlacementAndNodes(placement=None, nodes=nodes)]
if excl_job_id and len(nodes) < 2:
return no_pp # don't create placement_policy for just one node
model = nodes[0]
nodeset = lkp.node_nodeset(model)
if lkp.is_flex_node(model):
return no_pp # TODO(FLEX): Add support for workload policies
if lkp.node_is_tpu(model):
return no_pp
if not (nodeset.enable_placement and valid_placement_node(model)):
return no_pp
max_count = calculate_chunk_size(nodeset, lkp)
name_prefix = f"{lkp.cfg.slurm_cluster_name}-slurmgcp-managed-{nodeset.nodeset_name}"
if excl_job_id: # simply chunk given nodes by max size of placement
return [
PlacementAndNodes(placement=f"{name_prefix}-{excl_job_id}-{i}", nodes=chunk)
for i, chunk in enumerate(chunked(nodes, n=max_count))
]
# split whole nodeset (not only nodes to resume) into chunks of max size of placement
# create placements (most likely already exists) placements for requested nodes
chunks = collections.defaultdict(list) # chunk_id -> nodes
invalid = []
for node in nodes:
try:
chunk = lkp.node_index(node) // max_count
chunks[chunk].append(node)
except:
invalid.append(node)
placements = [
# NOTE: use 0 instead of job_id for consistency with previous SlurmGCP behavior
PlacementAndNodes(placement=f"{name_prefix}-0-{c_id}", nodes=c_nodes)
for c_id, c_nodes in chunks.items()
]
if invalid:
placements.append(PlacementAndNodes(placement=None, nodes=invalid))
log.error(f"Could not find placement for nodes with unexpected names: {to_hostlist(invalid)}")
return placements