def _run_computemgtd()

in src/slurm_plugin/computemgtd.py [0:0]


def _run_computemgtd(config_file):
    """Run computemgtd actions."""
    # Initial default heartbeat time as computemgtd startup time
    last_heartbeat = datetime.now(tz=timezone.utc)
    log.info("Initializing clustermgtd heartbeat to be computemgtd startup time: %s", last_heartbeat)
    computemgtd_config = _load_daemon_config(config_file)
    reload_config_counter = RELOAD_CONFIG_ITERATIONS
    while True:
        # Get current time
        current_time = datetime.now(tz=timezone.utc)

        if reload_config_counter <= 0:
            try:
                computemgtd_config = _load_daemon_config(config_file)
                reload_config_counter = RELOAD_CONFIG_ITERATIONS
            except Exception as e:
                log.warning("Unable to reload daemon config, using previous one.\nException: %s", e)
        else:
            reload_config_counter -= 1

        # Check heartbeat
        try:
            last_heartbeat = get_clustermgtd_heartbeat(computemgtd_config.clustermgtd_heartbeat_file_path)
            log.info("Latest heartbeat from clustermgtd: %s", last_heartbeat)
        except Exception as e:
            log.warning(
                "Unable to retrieve clustermgtd heartbeat. Using last known heartbeat: %s with exception: %s",
                last_heartbeat,
                e,
            )
        if expired_clustermgtd_heartbeat(last_heartbeat, current_time, computemgtd_config.clustermgtd_timeout):
            if computemgtd_config.disable_computemgtd_actions:
                log.info("All computemgtd actions currently disabled")
            elif _is_self_node_down(computemgtd_config.nodename):
                _self_terminate()

        sleep_remaining_loop_time(computemgtd_config.loop_time, current_time)