in community/front-end/ofe/website/ghpcfe/views/clusters.py [0:0]
def form_valid(self, form):
logger.info("In form_valid")
context = self.get_context_data()
mountpoints = context["mountpoints_formset"]
partitions = context["cluster_partitions_formset"]
if self.object.status == "n":
# If creating a new cluster generate unique cloud id.
unique_str = secrets.token_hex(4)
self.object.cloud_id = self.object.name + "-" + unique_str
suffix = self.object.cloud_id.split("-")[-1]
self.object.cloud_id = self.object.name + "-" + suffix
self.object.cloud_region = self.object.subnet.cloud_region
machine_info = cloud_info.get_machine_types(
"GCP",
self.object.cloud_credential.detail,
self.object.cloud_region,
self.object.cloud_zone,
)
disk_info = {
x["name"]: x
for x in cloud_info.get_disk_types(
"GCP",
self.object.cloud_credential.detail,
self.object.cloud_region,
self.object.cloud_zone,
)
if x["name"].startswith("pd-")
}
if self.object.status != "n" and self.object.status != "r":
form.add_error(None, "It is not newly created cluster or it is not running yet.")
return self.form_invalid(form)
# Verify Disk Types & Sizes
try:
my_info = disk_info[self.object.controller_disk_type]
if self.object.controller_disk_size < my_info["minSizeGB"]:
form.add_error(
"controller_disk_size",
"Minimum Disk Size for "
f"{self.object.controller_disk_type} is "
f"{my_info['minSizeGB']}"
)
return self.form_invalid(form)
if self.object.controller_disk_size > my_info["maxSizeGB"]:
form.add_error(
"controller_disk_size",
"Maximum Disk Size for "
f"{self.object.controller_disk_type} is "
f"{my_info['maxSizeGB']}"
)
return self.form_invalid(form)
except KeyError:
form.add_error("controller_disk_type", "Invalid Disk Type")
return self.form_invalid(form)
try:
my_info = disk_info[self.object.login_node_disk_type]
if self.object.login_node_disk_size < my_info["minSizeGB"]:
form.add_error(
"login_node_disk_size",
"Minimum Disk Size for "
f"{self.object.login_node_disk_type} is "
f"{my_info['minSizeGB']}"
)
return self.form_invalid(form)
if self.object.login_node_disk_size > my_info["maxSizeGB"]:
form.add_error(
"login_node_disk_size",
"Maximum Disk Size for "
f"{self.object.login_node_disk_type} is "
f"{my_info['maxSizeGB']}"
)
return self.form_invalid(form)
except KeyError:
form.add_error("login_node_disk_type", "Invalid Disk Type")
return self.form_invalid(form)
# Verify formset validity (surprised there's no method to do this)
for formset, formset_name in [
(mountpoints, "mountpoints"),
(partitions, "partitions"),
]:
if not formset.is_valid():
form.add_error(None, f"Error in {formset_name} section")
return self.form_invalid(form)
# Get the existing MountPoint objects associated with the cluster
existing_mount_points = MountPoint.objects.filter(cluster=self.object)
# Iterate through the existing mount points and check if they are in the updated formset
for mount_point in existing_mount_points:
if not any(mount_point_form.instance == mount_point for mount_point_form in mountpoints.forms):
# The mount point is not in the updated formset, so delete it
mount_point_path = mount_point.mount_path
mount_point_id = mount_point.pk
logger.info(f"Deleting mount point: {mount_point_path}, ID: {mount_point_id}")
mount_point.delete()
# Get the existing ClusterPartition objects associated with the cluster
existing_partitions = ClusterPartition.objects.filter(cluster=self.object)
logger.info(f"Processing total {len(partitions.forms)} partition forms.")
logger.info(f"Existing number of partitions is {len(partitions.forms)}.")
for partition in existing_partitions:
#logger.info(f"Checking existing partition: {partition.name}")
found = False
for partition_form in partitions.forms:
#logger.info(f"Checking form for partition: {partition_form.instance.name}")
if partition_form.instance == partition:
found = True
delete_status = partition_form.cleaned_data.get('DELETE', False)
if delete_status:
# Log the intent to delete then delete the partition
logger.info(f"Partition: {partition.name} (ID: {partition.pk}) marked for deletion.")
partition.delete()
else:
logger.info(f"No deletion requested for existing partition: {partition.name}.")
if not found:
# Log if no corresponding form was found for the partition
logger.info(f"No form found for Partition: {partition.name}.")
try:
with transaction.atomic():
# Save the modified Cluster object
self.object.save()
self.object = form.save()
mountpoints.instance = self.object
mountpoints.save()
partitions.instance = self.object
parts = partitions.save()
try:
total_nodes_requested = {}
for part in parts:
part.vCPU_per_node = machine_info[part.machine_type]["vCPU"] // (1 if part.enable_hyperthreads else 2)
cpu_count = machine_info[part.machine_type]["vCPU"]
logger.info(f"{part.machine_type} CPU Count: {cpu_count}")
# Tier1 networking validation
if part.enable_tier1_networking == True:
logger.info("User selected Tier1 networking, checking if nodes in partition are compatible.")
tier_1_supported_prefixes = ["n2-", "n2d-", "c2-", "c2d-", "c3-", "c3d-", "m3-", "z3-"]
is_tier_1_compatible = any(part.machine_type.startswith(prefix) for prefix in tier_1_supported_prefixes)
if not(cpu_count >= 30 and is_tier_1_compatible):
raise ValidationError(f"VM type {part.machine_type} is not compatible with Tier 1 networking.")
# Validate GPU choice
if part.GPU_type:
try:
accel_info = machine_info[part.machine_type]["accelerators"][part.GPU_type]
if (
part.GPU_per_node < accel_info["min_count"]
or part.GPU_per_node > accel_info["max_count"]
):
raise ValidationError(
"Invalid number of GPUs of type " f"{part.GPU_type}"
)
except KeyError as err:
raise ValidationError(f"Invalid GPU type {part.GPU_type}") from err
# Add validation for machine_type and disk_type combinations here
invalid_combinations = [
("c3-", "pd-standard"),
("h3-", "pd-standard"),
("h3-", "pd-ssd"),
]
for machine_prefix, disk_type in invalid_combinations:
if part.machine_type.startswith(machine_prefix) and part.boot_disk_type == disk_type:
logger.info("invalid disk")
raise ValidationError(
f"Invalid combination: machine_type {part.machine_type} cannot be used with disk_type {disk_type}."
)
# Sum the total nodes for each reservation
if part.reservation_name:
if part.reservation_name not in total_nodes_requested:
total_nodes_requested[part.reservation_name] = 0
total_nodes_requested[part.reservation_name] += part.dynamic_node_count + part.static_node_count
# Validate total requested nodes against available nodes
for reservation_name, requested_nodes in total_nodes_requested.items():
reservation = cloud_info.get_vm_reservations(
"GCP",
self.object.cloud_credential.detail,
None,
self.object.cloud_zone
)
matching_reservation = reservation.get(reservation_name)
available_nodes = int(matching_reservation["instanceProperties"].get("availableCount", 0))
if requested_nodes > available_nodes:
raise ValidationError(f"Reservation {reservation_name} does not have enough available nodes."
f"Requested: {requested_nodes}, Available: {available_nodes}"
)
except KeyError as err:
raise ValidationError("Error in Partition - invalid machine type: " f"{part.machine_type}") from err
# Continue with saving the 'parts' if no validation errors were raised
parts = partitions.save()
except ValidationError as ve:
form.add_error(None, ve)
return self.form_invalid(form)
msg = (
"Provisioning a new cluster. This may take up to 15 minutes."
)
if self.object.status == "r":
msg = "Reconfiguring running cluster, this may take few minutes."
messages.success(self.request, msg)
# Be kind... Check filesystems to verify all in the same zone as us.
for mp in self.object.mount_points.exclude(
export__filesystem__impl_type=FilesystemImpl.BUILT_IN
):
if mp.export.filesystem.cloud_zone != self.object.cloud_zone:
messages.warning(
self.request,
"Possibly expensive: Filesystem "
f"{mp.export.filesystem.name} is in a different zone "
f"({mp.export.filesystem.cloud_zone}) than the cluster!",
)
return super().form_valid(form)