def _execute_health_checks()

in cookbooks/aws-parallelcluster-slurm/files/default/config_slurm/scripts/health_check_manager.py [0:0]


def _execute_health_checks(health_check_manager_config: HealthCheckManagerConfig, args: argparse.Namespace) -> int:
    """Execute all Health Check."""
    health_check_conf = HealthCheckConfigLoader().load_configuration(health_check_manager_config, args)

    event_publisher = _get_event_publisher(args)

    exit_code_sum = 0

    for health_check in health_check_conf.health_checks:
        if health_check.is_enabled:
            try:
                log.info(
                    "Executing Health Check '%s' for queue '%s' and compute resource '%s'",
                    health_check.name,
                    health_check_conf.queue_name,
                    health_check_conf.compute_resource_name,
                )

                # The command in this subprocess call is built as literal
                result = subprocess.run(
                    health_check.check_path,
                    timeout=health_check_manager_config.health_check_timeout,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    encoding="utf-8",
                    check=False,
                    shell=False,  # nosec B603
                )
                exit_code_sum += result.returncode
                if result.stdout:
                    output = f":\n{result.stdout}"
                else:
                    output = " empty"
                log.info("Output of Health Check '%s' execution is%s", health_check.name, output)
                publish_health_check_result(
                    event_publisher, args.job_id, health_check.name, result.returncode, result.stdout
                )
            except (subprocess.SubprocessError, OSError) as err:
                if hasattr(err, "message"):
                    err = err.message
                log.error(
                    "Failure when executing Health Check '%s' for queue '%s' and compute resource '%s', with error: %s",
                    health_check.name,
                    health_check_conf.queue_name,
                    health_check_conf.compute_resource_name,
                    err,
                )
                publish_health_check_exception(event_publisher, args.job_id, health_check.name, err)
    if not health_check_conf.health_checks:
        log.info("No Health Check enabled found")

    return exit_code_sum