def _handle_protected_mode_process()

in src/slurm_plugin/clustermgtd.py [0:0]


    def _handle_protected_mode_process(self, active_nodes, partitions_name_map):
        """Handle the process of entering protected mode."""
        # Handle successfully launched nodes
        if self._partitions_protected_failure_count_map:
            self._handle_successfully_launched_nodes(partitions_name_map)
        self._handle_bootstrap_failure_nodes(active_nodes)

        # Enter protected mode
        # We will put a partition into inactive state only if the partition satisfies the following:
        # Partition is not INACTIVE
        # Partition bootstrap failure count above threshold
        # Partition does not have job running
        if self._partitions_protected_failure_count_map:
            log.info(
                "Partitions bootstrap failure count: %s, cluster will be set into protected mode if "
                "protected failure count reaches threshold %s",
                self._partitions_protected_failure_count_map,
                self._config.protected_failure_count,
            )

        partitions_to_disable = []
        bootstrap_failure_partitions_have_jobs = []
        for part_name, failures in self._partitions_protected_failure_count_map.items():
            part = partitions_name_map.get(part_name)
            if part and not part.is_inactive() and sum(failures.values()) >= self._config.protected_failure_count:
                if part.has_running_job():
                    bootstrap_failure_partitions_have_jobs.append(part_name)
                else:
                    partitions_to_disable.append(part_name)

        if bootstrap_failure_partitions_have_jobs:
            log.info(
                "Bootstrap failure partitions %s currently have jobs running, not disabling them",
                bootstrap_failure_partitions_have_jobs,
            )
            if not partitions_to_disable:
                log.info("Not entering protected mode since active jobs are running in bootstrap failure partitions")
        elif partitions_to_disable:
            self._enter_protected_mode(partitions_to_disable)
        if ComputeFleetStatus.is_protected(self._compute_fleet_status):
            log.warning(
                "Cluster is in protected mode due to failures detected in node provisioning. "
                "Please investigate the issue and then use 'pcluster update-compute-fleet --status START_REQUESTED' "
                "command to re-enable the fleet."
            )