def main()

in cookbooks/aws-parallelcluster-slurm/files/default/config_slurm/scripts/health_check_manager.py [0:0]


def main():
    default_log_file = "/var/log/parallelcluster/slurm_health_check.log"
    logging.basicConfig(
        filename=default_log_file,
        level=logging.INFO,
        format="%(asctime)s - [%(filename)s:%(funcName)s] - %(levelname)s - JobID %(job_id)s - %(message)s",
    )

    try:
        args = _parse_arguments()
        # Override global log object
        global log  # pylint: disable=W0603
        log = logging.LoggerAdapter(log, {"job_id": args.job_id})
        log.info("HealthCheckManager startup.")

        config_file = os.environ.get("CONFIG_FILE", os.path.join(CONFIG_FILE_DIR, "health_check_manager.conf"))
        health_check_manager_config = HealthCheckManagerConfig(config_file)
        try:
            # Configure root logger
            fileConfig(health_check_manager_config.logging_config, disable_existing_loggers=False)
        except Exception as err:
            if hasattr(err, "message"):
                err = err.message
            log.warning(
                "Unable to configure logging from %s, using default settings and writing to %s.\nException: %s",
                health_check_manager_config.logging_config,
                default_log_file,
                err,
            )
        log.info(f"HealthCheckManager config: {health_check_manager_config}")
        exit_code = _execute_health_checks(health_check_manager_config, args)
        log.info(f"HealthCheckManager finished with exit code '{exit_code}'.")
        raise SystemExit(exit_code)

    except Exception as err:
        if hasattr(err, "message"):
            err = err.message
        log.exception("Encountered exception when running Health Check Manager, exiting gracefully: %s", err)
        raise SystemExit(0)