in src/slurm_plugin/slurm_resources.py [0:0]
def is_bootstrap_failure(self, ec2_instance_missing_max_count, nodes_without_backing_instance_count_map: dict):
"""Check if a slurm node has boostrap failure."""
if self.is_static_nodes_in_replacement and not self.is_backing_instance_valid(
ec2_instance_missing_max_count=ec2_instance_missing_max_count,
nodes_without_backing_instance_count_map=nodes_without_backing_instance_count_map,
log_warn_if_unhealthy=False,
):
# Node is currently in replacement and no backing instance
logger.warning(
"Node bootstrap error: Node %s is currently in replacement and no backing instance, node state: %s",
self,
self.state_string,
)
return True
# Replacement timeout expires for node in replacement
elif self.is_bootstrap_timeout():
logger.warning(
"Node bootstrap error: Replacement timeout expires for node %s in replacement, node state: %s",
self,
self.state_string,
)
return True
elif self.is_failing_health_check and self.is_static_nodes_in_replacement:
logger.warning(
"Node bootstrap error: Node %s failed during bootstrap when performing health check, node state: %s",
self,
self.state_string,
)
return True
return False