def get_node_action()

in community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py [0:0]


def get_node_action(nodename: str) -> NodeAction:
    """Determine node/instance status that requires action"""
    lkp = lookup()
    state = lkp.node_state(nodename)

    if lkp.node_is_fr(nodename):
        fr = lkp.future_reservation(lkp.node_nodeset(nodename))
        assert fr
        if action := get_fr_action(fr, state):
            return action
        
    
    if lkp.is_flex_node(nodename):
        return _find_flex_node_actions(nodename, state, lkp)

    if lkp.node_is_dyn(nodename):
        return _find_dynamic_node_status()

    if lkp.node_is_tpu(nodename):
        return _find_tpu_node_action(nodename, state)

    # split below is workaround for VMs whose hostname is FQDN
    inst = lkp.instance(nodename.split(".")[0])
    power_flags = frozenset(
        ("POWER_DOWN", "POWERING_UP", "POWERING_DOWN", "POWERED_DOWN")
    ) & (state.flags if state is not None else set())

    if (state is None) and (inst is None):
        # Should never happen
        return NodeActionUnknown(None, None)

    if inst is None:
        assert state is not None # to keep type-checker happy
        if "POWERING_UP" in state.flags:
            return NodeActionUnchanged()
        if state.base == "DOWN" and "POWERED_DOWN" in state.flags:
            return NodeActionIdle()
        if "POWERING_DOWN" in state.flags:
            return NodeActionIdle()
        if "COMPLETING" in state.flags:
            return NodeActionDown(reason="Unbacked instance")
        if state.base != "DOWN" and not power_flags:
            return NodeActionDown(reason="Unbacked instance")
        if state.base == "DOWN" and not power_flags:
            return NodeActionPowerDown()
        if "POWERED_DOWN" in state.flags and lkp.is_static_node(nodename):
            return NodeActionPowerUp()
    elif (
        state is not None
        and "POWERED_DOWN" not in state.flags
        and "POWERING_DOWN" not in state.flags
        and inst.status == "TERMINATED"
    ):
        if inst.scheduling.preemptible:
            return NodeActionPrempt()
        if state.base != "DOWN":
            return NodeActionDown(reason="Instance terminated")
    elif (state is None or "POWERED_DOWN" in state.flags) and inst.status == "RUNNING":
        log.info("%s is potential orphan node", nodename)
        threshold = timedelta(seconds=90)
        age = util.now() - inst.creation_timestamp
        log.info(f"{nodename} state: {state}, age: {age}")
        if age < threshold:
            log.info(f"{nodename} not marked as orphan, it started less than {threshold.seconds}s ago ({age.seconds}s)")
            return NodeActionUnchanged()
        return NodeActionDelete()
    elif state is None:
        # if state is None here, the instance exists but it's not in Slurm
        return NodeActionUnknown(slurm_state=state, instance_state=inst.status)

    return NodeActionUnchanged()