in src/slurm_plugin/slurm_resources.py [0:0]
def is_bootstrap_failure(self):
"""Check if a slurm node has boostrap failure."""
# no backing instance + [working state]# in node state
if (self.is_configuring_job() or self.is_poweing_up_idle()) and not self.is_backing_instance_valid(
log_warn_if_unhealthy=False
):
logger.warning(
"Node bootstrap error: Node %s is in power up state without valid backing instance, node state: %s",
self,
self.state_string,
)
return True
# Dynamic node in DOWN+CLOUD+POWERED_DOWN+NOT_RESPONDING state
elif self.is_resume_failed() and self.is_nodeaddr_set():
# We need to check if nodeaddr is set to avoid counting powering up nodes as bootstrap failure nodes during
# cluster start/stop.
logger.warning(
"Node bootstrap error: Resume timeout expires for node %s, node state: %s", self, self.state_string
)
return True
elif self.is_failing_health_check and self.is_powering_up():
logger.warning(
"Node bootstrap error: Node %s failed during bootstrap when performing health check, node state: %s",
self,
self.state_string,
)
return True
return False