sub capture()

in managementnode/lib/VCL/Module/Provisioning/xCAT.pm [466:629]


sub capture {
	my $self = shift;
	if (ref($self) !~ /xCAT/i) {
		notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
		return;
	}
	
	my $image_name          = $self->data->get_image_name();
	my $computer_node_name  = $self->data->get_computer_node_name();
	
	# Get the image repository path
	my $image_repository_path = $self->get_image_repository_directory_path($image_name);
	if (!$image_repository_path) {
		notify($ERRORS{'CRITICAL'}, 0, "xCAT image repository information could not be determined");
		return;
	}
	my $capture_done_file_path = "$image_repository_path/$image_name.img.capturedone";
	my $capture_failed_file_path = "$image_repository_path/$image_name.img.capturefailed";
	
	# Print some preliminary information
	notify($ERRORS{'OK'}, 0, "attempting to capture image '$image_name' on $computer_node_name");
	
	# Check if pre_capture() subroutine has been implemented by the OS module
	if ($self->os->can("pre_capture")) {
		# Call OS pre_capture() - it should perform all OS steps necessary to capture an image
		# pre_capture() should shut down the computer when it is done
		if (!$self->os->pre_capture({end_state => 'off'})) {
			notify($ERRORS{'WARNING'}, 0, "OS module pre_capture() failed");
			return;
		}
	
		# The OS module should turn the computer power off
		# Wait up to 2 minutes for the computer's power status to be off
		if ($self->_wait_for_off($computer_node_name, 120)) {
			notify($ERRORS{'OK'}, 0, "computer $computer_node_name power is off");
		}
		else {
			notify($ERRORS{'WARNING'}, 0, "$computer_node_name power is still on, turning computer off");
	
			# Attempt to power off computer
			if ($self->power_off()) {
				notify($ERRORS{'OK'}, 0, "$computer_node_name was powered off");
			}
			else {
				notify($ERRORS{'WARNING'}, 0, "failed to power off $computer_node_name");
				return;
			}
		}
	}
	else {
		notify($ERRORS{'WARNING'}, 0, "OS module does implement a pre_capture() subroutine");
		return;
	}
	
	# Set the xCAT nodetype to the new image for the node
	$self->_edit_nodetype($computer_node_name, $image_name) || return;

	# Create the .tmpl file for the image
	$self->_create_template($image_name) || return;

	# Edit xCAT's nodelist table to set the correct node groups
	$self->_edit_nodelist($computer_node_name, $image_name) || return;

	# Call xCAT's nodeset to configure xCAT to save image on next reboot
	$self->_nodeset($computer_node_name, 'image') || return;
	
	# Power on the node in order to capture the image
	if (!$self->power_on()) {
		notify($ERRORS{'WARNING'}, 0, "failed to power on computer before monitoring image capture");
		return;
	}

	
	my $nochange_timeout_minutes = 20;
	my $nochange_timeout_seconds = ($nochange_timeout_minutes * 60);
	my $monitor_delay_seconds = 30;
	
	my $monitor_start_time = time;
	my $last_change_time = $monitor_start_time;
	my $nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds);
	
	# Sanity check, timeout the monitoring after 4 hours
	my $overall_timeout_hours = 6;
	my $overall_timeout_minutes = ($overall_timeout_hours * 60);
	my $overall_timeout_time = ($monitor_start_time + $overall_timeout_minutes * 60);
	
	my $previous_status;
	my $previous_image_size = 0;
	my $current_time;
	MONITOR_CAPTURE: while (($current_time = time) < $nochange_timeout_time && $current_time < $overall_timeout_time) {
		my $total_elapsed_seconds = ($current_time - $monitor_start_time);
		my $nochange_elapsed_seconds = ($current_time - $last_change_time);
		my $nochange_remaining_seconds = ($nochange_timeout_time - $current_time);
		my $overall_remaining_seconds = ($overall_timeout_time - $current_time);
		notify($ERRORS{'DEBUG'}, 0, "monitoring capture of $image_name on $computer_node_name:\n" .
			"seconds since monitor start/until unconditional timeout: $total_elapsed_seconds/$overall_remaining_seconds\n" .
			"seconds since last change/until no change timeout: $nochange_elapsed_seconds/$nochange_remaining_seconds"
		);
		
		if ($self->mn_os->file_exists($capture_done_file_path)) {
			notify($ERRORS{'OK'}, 0, "capture of $image_name on $computer_node_name complete, file exists: $capture_done_file_path");
			$self->mn_os->delete_file($capture_done_file_path);
			last MONITOR_CAPTURE;
		}
		elsif ($self->mn_os->file_exists($capture_failed_file_path)) {
			notify($ERRORS{'WARNING'}, 0, "failed to capture $image_name on $computer_node_name, file exists: $capture_failed_file_path");
			$self->mn_os->delete_file($capture_failed_file_path);
			return;
		}
		
		# Check if the image size has changed
		my $current_image_size = $self->get_image_size($image_name);
		if ($current_image_size ne $previous_image_size) {
			notify($ERRORS{'DEBUG'}, 0, "size of $image_name changed: $previous_image_size --> $current_image_size, reset monitoring timeout to $nochange_timeout_seconds seconds");
			
			# Set previous image size to the current image size
			$previous_image_size = $current_image_size;
			
			$last_change_time = $current_time;
			$nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds);
		}
		else {
			# Get the current status of the node
			my $current_status = $self->_nodestat($computer_node_name);
			# Set previous status to current status if this is the first iteration
			$previous_status = $current_status if !defined($previous_status);
			if ($current_status ne $previous_status) {
				
				# If the node status changed to 'boot' and the image size > 0, assume image capture complete
				if ($current_status =~ /boot/ && $current_image_size > 0) {
					notify($ERRORS{'DEBUG'}, 0, "image capture appears to be complete, node status changed: $previous_status --> $current_status, image size > 0: $current_image_size");
					last MONITOR_CAPTURE;
				}
				
				notify($ERRORS{'DEBUG'}, 0, "status of $computer_node_name changed: $previous_status --> $current_status, reset monitoring timeout to $nochange_timeout_seconds seconds");
				
				# Set previous status to the current status
				$previous_status = $current_status;
				
				$last_change_time = $current_time;
				$nochange_timeout_time = ($last_change_time + $nochange_timeout_seconds);
			}
		}
		
		notify($ERRORS{'DEBUG'}, 0, "sleeping for $monitor_delay_seconds seconds");
		sleep $monitor_delay_seconds;
	}
	
	# Check if timeout was reached
	if ($current_time >= $nochange_timeout_time) {
		notify($ERRORS{'WARNING'}, 0, "failed to capture $image_name on $computer_node_name, timed out because no progress was detected for $nochange_timeout_minutes minutes");
		return;
	}
	elsif ($current_time >= $overall_timeout_time) {
		notify($ERRORS{'CRITICAL'}, 0, "failed to capture $image_name on $computer_node_name, timed out because capture took longer than $overall_timeout_hours hours");
		return;
	}
	
	# Set the permissions on the captured image files
	$self->mn_os->set_file_permissions("$image_repository_path/$image_name\*", 644, 1);
	
	notify($ERRORS{'OK'}, 0, "successfully captured $image_name on $computer_node_name");
	return 1;
}