def main()

in src/slurm_plugin/suspend.py [0:0]


def main():
    default_log_file = "/var/log/parallelcluster/slurm_suspend.log"
    logging.basicConfig(
        filename=default_log_file,
        level=logging.INFO,
        format="%(asctime)s - [%(name)s:%(funcName)s] - %(levelname)s - %(message)s",
    )
    log.info("SuspendProgram startup.")
    parser = argparse.ArgumentParser()
    parser.add_argument("nodes", help="Nodes to release")
    args = parser.parse_args()
    config_file = os.environ.get("CONFIG_FILE", os.path.join(CONFIG_FILE_DIR, "parallelcluster_slurm_suspend.conf"))
    suspend_config = SlurmSuspendConfig(config_file)
    try:
        # Configure root logger
        fileConfig(suspend_config.logging_config, disable_existing_loggers=False)
    except Exception as e:
        log.warning(
            "Unable to configure logging from %s, using default settings and writing to %s.\nException: %s",
            suspend_config.logging_config,
            default_log_file,
            e,
        )

    log.info("Suspending following nodes. Clustermgtd will cleanup orphaned instances: %s", args.nodes)
    current_time = datetime.now(tz=timezone.utc)
    if not is_clustermgtd_heartbeat_valid(
        current_time, suspend_config.clustermgtd_timeout, suspend_config.clustermgtd_heartbeat_file_path
    ):
        log.error(
            "No valid clustermgtd heartbeat detected, clustermgtd is down! "
            "Please check clustermgtd log for error.\n"
            "Nodes will be reset to POWER_SAVE state after SuspendTimeout. "
            "The backing EC2 instances may not be correctly terminated.\n"
            "Please check and terminate any orphaned instances in EC2!"
        )
    else:
        log.info("SuspendProgram finished. Nodes will be available after SuspendTimeout")