in cli/src/pcluster/config/cluster_config.py [0:0]
def _register_validators(self, context: ValidatorContext = None): # noqa: C901
super()._register_validators(context)
self._register_validator(
MixedSecurityGroupOverwriteValidator,
head_node_security_groups=self.head_node.networking.security_groups,
queues=self.scheduling.queues,
)
if self.login_nodes:
self._register_login_node_validators()
if self.scheduling.settings and self.scheduling.settings.dns and self.scheduling.settings.dns.hosted_zone_id:
self._register_validator(
HostedZoneValidator,
hosted_zone_id=self.scheduling.settings.dns.hosted_zone_id,
cluster_vpc=self.vpc_id,
cluster_name=self.cluster_name,
)
instance_types_data = self.get_instance_types_data()
self._register_validator(MultiNetworkInterfacesInstancesValidator, queues=self.scheduling.queues)
checked_images = []
capacity_reservation_id_max_count_map = {}
total_max_compute_nodes = 0
for index, queue in enumerate(self.scheduling.queues):
queue_image = self.image_dict[queue.name]
if index == 0:
# Execute LaunchTemplateValidator only for the first queue
self._register_validator(
ComputeResourceLaunchTemplateValidator,
queue=queue,
ami_id=queue_image,
root_volume_device_name=AWSApi.instance().ec2.describe_image(queue_image).device_name,
tags=self.get_tags(),
imds_support=self.imds.imds_support,
)
ami_volume_size = AWSApi.instance().ec2.describe_image(queue_image).volume_size
root_volume = queue.compute_settings.local_storage.root_volume
root_volume_size = root_volume.size
if root_volume_size is None: # If root volume size is not specified, it will be the size of the AMI.
root_volume_size = ami_volume_size
self._register_validator(
RootVolumeSizeValidator, root_volume_size=root_volume_size, ami_volume_size=ami_volume_size
)
self._register_validator(
EbsVolumeTypeSizeValidator, volume_type=root_volume.volume_type, volume_size=root_volume_size
)
self._register_validator(
EbsVolumeIopsValidator,
volume_type=root_volume.volume_type,
volume_size=root_volume_size,
volume_iops=root_volume.iops,
)
if queue_image not in checked_images and queue.queue_ami:
checked_images.append(queue_image)
self._register_validator(AmiOsCompatibleValidator, os=self.image.os, image_id=queue_image)
for compute_resource in queue.compute_resources:
total_max_compute_nodes += compute_resource.max_count
self._register_validator(
InstanceArchitectureCompatibilityValidator,
instance_type_info_list=list(compute_resource.instance_type_info_map.values()),
architecture=self.head_node.architecture,
)
self._register_validator(
EfaOsArchitectureValidator,
efa_enabled=compute_resource.efa.enabled,
os=self.image.os,
architecture=self.head_node.architecture,
)
self._register_validator(
PlacementGroupCapacityTypeValidator,
capacity_type=queue.capacity_type,
placement_group_enabled=queue.is_placement_group_enabled_for_compute_resource(compute_resource),
)
# The validation below has to be in cluster config class instead of queue class
# to make sure the subnet APIs are cached by previous validations.
cr_target = compute_resource.capacity_reservation_target or queue.capacity_reservation_target
if cr_target:
if cr_target.capacity_reservation_id:
# increment counter of number of instances used for a given capacity reservation
# to verify to not exceed instance count when considering all the configured compute resources
num_of_instances_in_capacity_reservation = capacity_reservation_id_max_count_map.get(
cr_target.capacity_reservation_id, 0
)
capacity_reservation_id_max_count_map[cr_target.capacity_reservation_id] = (
num_of_instances_in_capacity_reservation + compute_resource.max_count
)
self._register_validator(
CapacityReservationValidator,
capacity_reservation_id=cr_target.capacity_reservation_id,
instance_types=compute_resource.instance_types,
is_flexible=compute_resource.is_flexible(),
subnet=queue.networking.subnet_ids[0],
capacity_type=queue.capacity_type,
os=self.image.os,
)
self._register_validator(
CapacityReservationResourceGroupValidator,
capacity_reservation_resource_group_arn=cr_target.capacity_reservation_resource_group_arn,
instance_types=compute_resource.instance_types,
subnet_ids=queue.networking.subnet_ids,
queue_name=queue.name,
subnet_id_az_mapping=queue.networking.subnet_id_az_mapping,
)
self._register_validator(
PlacementGroupCapacityReservationValidator,
placement_group=queue.get_placement_group_settings_for_compute_resource(compute_resource).get(
"key"
),
odcr=cr_target,
subnet=queue.networking.subnet_ids[0],
instance_types=compute_resource.instance_types,
multi_az_enabled=queue.multi_az_enabled,
subnet_id_az_mapping=queue.networking.subnet_id_az_mapping,
)
for instance_type in compute_resource.instance_types:
if self.scheduling.settings.enable_memory_based_scheduling:
self._register_validator(
InstanceTypeMemoryInfoValidator,
instance_type=instance_type,
instance_type_data=instance_types_data[instance_type],
)
self._register_validator(
InstanceTypeBaseAMICompatibleValidator,
instance_type=instance_type,
image=queue_image,
)
self._register_validator(
InstanceTypeOSCompatibleValidator,
instance_type=instance_type,
os=self.image.os,
)
self._register_validator(
InstanceTypeAcceleratorManufacturerValidator,
instance_type=instance_type,
instance_type_data=instance_types_data[instance_type],
)
self._register_validator(
InstanceTypePlacementGroupValidator,
instance_type=instance_type,
instance_type_data=instance_types_data[instance_type],
placement_group_enabled=queue.is_placement_group_enabled_for_compute_resource(compute_resource),
)
if isinstance(compute_resource, SlurmFlexibleComputeResource):
validator_args = dict(
queue_name=queue.name,
multiaz_queue=queue.multi_az_enabled,
capacity_type=queue.capacity_type,
allocation_strategy=queue.allocation_strategy,
compute_resource_name=compute_resource.name,
instance_types_info=compute_resource.instance_type_info_map,
disable_simultaneous_multithreading=compute_resource.disable_simultaneous_multithreading,
efa_enabled=compute_resource.efa and compute_resource.efa.enabled,
placement_group_enabled=queue.is_placement_group_enabled_for_compute_resource(compute_resource),
memory_scheduling_enabled=self.scheduling.settings.enable_memory_based_scheduling,
)
flexible_instance_types_validators = [
InstancesCPUValidator,
InstancesAcceleratorsValidator,
InstancesEFAValidator,
InstancesNetworkingValidator,
InstancesAllocationStrategyValidator,
InstancesMemorySchedulingWarningValidator,
]
for validator in flexible_instance_types_validators:
self._register_validator(validator, **validator_args)
self._register_validator(
ComputeResourceTagsValidator,
queue_name=queue.name,
compute_resource_name=compute_resource.name,
cluster_tags=self.get_tags(),
queue_tags=queue.get_tags(),
compute_resource_tags=compute_resource.get_tags(),
)
self._register_validator(
HeadNodeMemorySizeValidator,
head_node_instance_type=self.head_node.instance_type,
total_max_compute_nodes=total_max_compute_nodes,
)
if self.shared_storage:
for storage in self.shared_storage:
if isinstance(storage, SharedEbs):
self._register_validator(
SharedEbsPerformanceBottleNeckValidator,
total_max_compute_nodes=total_max_compute_nodes,
)
for capacity_reservation_id, num_of_instances in capacity_reservation_id_max_count_map.items():
self._register_validator(
CapacityReservationSizeValidator,
capacity_reservation_id=capacity_reservation_id,
num_of_instances=num_of_instances,
)