You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vcl.apache.org by ar...@apache.org on 2013/09/05 21:52:30 UTC

svn commit: r1520403 - in /vcl/trunk/managementnode/lib/VCL: Module.pm Module/OS/Linux.pm Module/Provisioning/xCAT.pm Module/State.pm image.pm inuse.pm utils.pm

Author: arkurth
Date: Thu Sep  5 19:52:30 2013
New Revision: 1520403

URL: http://svn.apache.org/r1520403
Log:
VCL-16
Added wait_for_child_reservations_to_exit subroutine to State.pm. This is called by cluster parent reservations when they exit the inuse processing.

Added State.pm::state_exit. This is currently called from inuse.pm whenever it exits. It makes sure an 'exited' computerloadlog entry is added when reservation processes exit. It also calls wait_for_child_reservations_to_exit if the parent of a cluster request.

Updated inuse.pm to call state_exit instead of exit.

VCL-699
Moved creation of mn_os object from State.pm::initialize to Module.pm::new. This allows non-state objects access to the MN OS functions.  Changed mn_os from being stored in $self for each object to $ENV{mn_os}.

VCL-682
Reordered xCAT.pm::node_status to immediately check currentimage.txt for 'new' reservations instead of first checking if the computer is powered on, etc. The xCAT steps were overloading the management node for large cluster reservations.

Other
Fixed typo in utils.pm - sleep_uninterrupted subroutine name was misspelled.

Removed chunk of code from image.pm which was supposed to change the request state to 'completed'. This is not a valid state name and was not doing anything. The state is properly changed later on.

Modified:
    vcl/trunk/managementnode/lib/VCL/Module.pm
    vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
    vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm
    vcl/trunk/managementnode/lib/VCL/Module/State.pm
    vcl/trunk/managementnode/lib/VCL/image.pm
    vcl/trunk/managementnode/lib/VCL/inuse.pm
    vcl/trunk/managementnode/lib/VCL/utils.pm

Modified: vcl/trunk/managementnode/lib/VCL/Module.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module.pm Thu Sep  5 19:52:30 2013
@@ -171,7 +171,7 @@ sub new {
 		$self->{$arg_key} = $args->{$arg_key};
 		#notify($ERRORS{'DEBUG'}, 0, "set '$arg_key' key for $class object from arguments");
 	}
-
+	
 	# Bless the object as the class which new was called with
 	bless $self, $class;
 	
@@ -195,6 +195,19 @@ sub new {
 		notify($ERRORS{'DEBUG'}, 0, ref($self) . " object created, address: $address");
 	}
 	
+	# Create a management node OS object
+	# Check to make sure the object currently being created is not a MN OS object to avoid endless loop
+	if (!$self->isa('VCL::Module::OS::Linux::ManagementNode')) {
+		if (my $mn_os = $self->create_mn_os_object()) {
+			$self->set_mn_os($mn_os);
+			$self->data->set_mn_os($mn_os);
+		}
+		else {
+			notify($ERRORS{'WARNING'}, 0, "failed to create management node OS object");
+			return;
+		}
+	}
+	
 	# Check if not running in setup mode and if initialize() subroutine is defined for this module
 	if (!$SETUP_MODE && $self->can("initialize")) {
 		# Call the initialize() subroutine, if it returns 0, return 0
@@ -671,14 +684,14 @@ sub mn_os {
 		$display_warning = 1;
 	}
 	
-	if (!$self->{mn_os}) {
+	if (!$ENV{mn_os}) {
 		if ($display_warning) {
-			notify($ERRORS{'WARNING'}, 0, "unable to return management node OS object, \$self->{mn_os} is not set");
+			notify($ERRORS{'WARNING'}, 0, "unable to return management node OS object, \$ENV{mn_os} is not set");
 		}
 		return;
 	}
 	else {
-		return $self->{mn_os};
+		return $ENV{mn_os};
 	}
 }
 
@@ -804,7 +817,7 @@ sub set_mn_os {
 	my $type = ref($self);
 	my $mn_os_address = sprintf('%x', $mn_os);
 	notify($ERRORS{'DEBUG'}, 0, "storing reference to managment node OS object (address: $mn_os_address) in this $type object (address: $address)");
-	$self->{mn_os} = $mn_os;
+	$ENV{mn_os} = $mn_os;
 }
 
 #/////////////////////////////////////////////////////////////////////////////

Modified: vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Thu Sep  5 19:52:30 2013
@@ -149,7 +149,7 @@ sub get_init_modules {
 		# initialize will check the computer to determine if it contains the corresponding Linux init daemon installed
 		# If not installed, the constructor will return false
 		my $init;
-		eval { $init = ($init_perl_package)->new({data_structure => $self->data, os => $self}) };
+		eval { $init = ($init_perl_package)->new({data_structure => $self->data, os => $self, mn_os => $self->mn_os}) };
 		if ($init) {
 			my @required_commands = eval "@" . $init_perl_package . "::REQUIRED_COMMANDS";
 			if ($EVAL_ERROR) {
@@ -530,7 +530,12 @@ sub update_public_hostname {
 	}
 	
 	# Set the node's hostname to public hostname
-	my $hostname_command = "hostname -v $public_hostname; sed -i -e \"/^HOSTNAME=/d\" /etc/sysconfig/network; echo \"HOSTNAME=$public_hostname\" >> /etc/sysconfig/network";
+	my $network_file_path = '/etc/sysconfig/network';
+	if (!$self->file_exists($network_file_path)) {
+		return 1;
+	}
+	
+	my $hostname_command = "hostname -v $public_hostname; sed -i -e \"/^HOSTNAME=/d\" $network_file_path; echo \"HOSTNAME=$public_hostname\" >> $network_file_path";
 	my ($hostname_exit_status, $hostname_output) = $self->execute($hostname_command);
 	if (!defined($hostname_output)) {
 		notify($ERRORS{'WARNING'}, 0, "failed to SSH command to set hostname on $computer_node_name to $public_hostname, command: '$hostname_command'");
@@ -951,15 +956,15 @@ sub grant_access {
 
 #/////////////////////////////////////////////////////////////////////////////
 
-=head2 sync_date
+=head2 synchronize_time
 
- Parameters  : called as an object
- Returns     : 1 - success , 0 - failure
- Description : updates and sets date on node
+ Parameters  : none
+ Returns     : boolean
+ Description : 
 
 =cut
 
-sub sync_date {
+sub synchronize_time {
 	my $self = shift;
 	if (ref($self) !~ /linux/i) {
 		notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
@@ -967,48 +972,63 @@ sub sync_date {
 	}
 	
 	my $computer_node_name = $self->data->get_computer_node_name();
+	my $management_node_hostname = $self->data->get_management_node_hostname();
 	
-	# get Throttle source value from database if set
-	my $time_source;
-	my $variable_name        = "timesource|" . $self->data->get_management_node_hostname();
+	my $variable_name = "timesource|$management_node_hostname";
 	my $variable_name_global = "timesource|global";
+	
+	my $time_source_variable;
 	if ($self->data->is_variable_set($variable_name)) {
-		# fetch variable
-		$time_source = $self->data->get_variable($variable_name);
-		notify($ERRORS{'DEBUG'}, 0, "time_source is $time_source  set for $variable_name");
+		$time_source_variable = $self->data->get_variable($variable_name);
+		notify($ERRORS{'DEBUG'}, 0, "retrieved time source variable '$variable_name': $time_source_variable");
 	}
 	elsif ($self->data->is_variable_set($variable_name_global)) {
-		# fetch variable
-		$time_source = $self->data->get_variable($variable_name_global);
-		notify($ERRORS{'DEBUG'}, 0, "time_source is $time_source  set for $variable_name");
+		$time_source_variable = $self->data->get_variable($variable_name_global);
+		notify($ERRORS{'DEBUG'}, 0, "retrieved global time source variable '$variable_name_global': $time_source_variable");
 	}
 	else {
-		notify($ERRORS{'DEBUG'}, 0, "time_source is not set for $variable_name or $variable_name_global not able update time");
+		notify($ERRORS{'DEBUG'}, 0, "unable to sync time, neither '$variable_name' or '$variable_name_global' time source variable is set in database");
 		return;
 	}
 	
-	# Replace commas with single whitespace
-	$time_source =~ s/,/ /g;
-	
-	# Assemble the time command
-	my $time_command = "rdate -s $time_source";
-	
-	my ($exit_status, $output) = $self->execute($time_command, 1, 180);
-	
-	# update ntpservers file
+	# Split the time source variable into an array
+	my @time_sources = split(/[,; ]+/, $time_source_variable);
 	
-	my @time_array = split(/ /, $time_source);
-	my $ntp_command = "cp /dev/null /etc/ntp/ntpservers; ";
-	foreach my $i (@time_array) {
-		$ntp_command .= "echo $i >> /etc/ntp/ntpservers; ";
+	# Assemble the rdate command
+	# Ubuntu doesn't accept multiple servers in a single command
+	my $rdate_command;
+	for my $time_source (@time_sources) {
+		$rdate_command .= "rdate -s $time_source || ";
+	}
+	$rdate_command =~ s/[ \|]+$//g;
+	my ($rdate_exit_status, $rdate_output) = $self->execute($rdate_command, 0, 180);
+	if (!defined($rdate_output)) {
+		notify($ERRORS{'WARNING'}, 0, "failed to execute rdate command to synchronize time on $computer_node_name");
+		return;
+	}
+	elsif (grep(/not found/i, @$rdate_output)) {
+		notify($ERRORS{'DEBUG'}, 0, "unable to synchronize time on $computer_node_name, rdate is not installed");
+	}
+	elsif ($rdate_exit_status > 0) {
+		notify($ERRORS{'WARNING'}, 0, "failed to synchronize time on $computer_node_name using rdate, exit status: $rdate_exit_status, command:\n$rdate_command\noutput:\n" . join("\n", @$rdate_output));
+	}
+	else {
+		notify($ERRORS{'DEBUG'}, 0, "synchronized time on $computer_node_name using rdate");
 	}
 	
-	($exit_status, $output) = $self->execute($ntp_command, 1, 180);
-	
-	$self->restart_service('ntpd');
+	# Check if the ntpd service exists before attempting to configure it
+	if (!$self->service_exists('ntpd')) {
+		notify($ERRORS{'DEBUG'}, 0, "skipping ntpd configuration, ntpd service does not exist");
+		return 1;
+	}
 	
-	return 1;
+	# Update ntpservers file
+	my $ntpservers_contents = join("\n", @time_sources);
+	if (!$self->create_text_file('/etc/ntp/ntpservers', $ntpservers_contents)) {
+		return;
+	}
 	
+	return $self->restart_service('ntpd');
 }
 
 #/////////////////////////////////////////////////////////////////////////////
@@ -2371,13 +2391,16 @@ sub create_user {
 	my ($useradd_exit_status, $useradd_output) = $self->execute($useradd_command);
 	
 	# Check if the output indicates that the user already exists
-	if ($useradd_output && grep(/exists/, @$useradd_output)) {
+	# useradd: warning: the home directory already exists
+	# useradd: user ibuser exists
+	if ($useradd_output && grep(/ exists(\s|$)/i, @$useradd_output)) {
 		if (!$self->delete_user($user_login_id)) {
 			notify($ERRORS{'WARNING'}, 0, "failed to add user '$user_login_id' to $computer_node_name, user with same name already exists and could not be deleted");
 			return;
 		}
 		($useradd_exit_status, $useradd_output) = $self->execute($useradd_command);
 	}
+	
 	if (!defined($useradd_output)) {
 		notify($ERRORS{'WARNING'}, 0, "failed to execute command to add user '$user_login_id' to $computer_node_name: '$useradd_command'");
 		return;
@@ -2493,7 +2516,7 @@ sub delete_user {
 	# Make sure the user exists
 	if (!$self->user_exists($username)) {
 		notify($ERRORS{'DEBUG'}, 0, "user NOT deleted from $computer_node_name because it does not exist: $username");
-		#return 1;
+		return 1;
 	}
 	
 	# Check if the user is logged in
@@ -2517,9 +2540,9 @@ sub delete_user {
 		my @exclude_list = $self->get_exclude_list();
 
 		if (!(grep(/\/home\/$username/, @exclude_list))) {
-			notify($ERRORS{'DEBUG'}, 0, "home directory will be deleted: $home_directory_path");
-			$userdel_command .= ' -r -f';
-		}
+		notify($ERRORS{'DEBUG'}, 0, "home directory will be deleted: $home_directory_path");
+		$userdel_command .= ' -r';
+	}
 	}
 	$userdel_command .= " $username";
 	
@@ -2532,8 +2555,9 @@ sub delete_user {
 	elsif (grep(/does not exist/i, @$userdel_output)) {
 		notify($ERRORS{'DEBUG'}, 0, "user '$username' NOT deleted from $computer_node_name because it does not exist");
 	}
-	elsif (grep(/warning/i, @$userdel_output)) {
-		notify($ERRORS{'WARNING'}, 0, "warning from to delete user cmd for '$username' from $computer_node_name, command: '$userdel_command', output:\n" . join("\n", @$userdel_output));
+	elsif (grep(/userdel: /i, @$userdel_output)) {
+		notify($ERRORS{'WARNING'}, 0, "failed to delete user '$username' from $computer_node_name, command: '$userdel_command', exit status: $userdel_exit_status, output:\n" . join("\n", @$userdel_output));
+		return;
 	}
 	else {
 		notify($ERRORS{'OK'}, 0, "deleted user '$username' from $computer_node_name");
@@ -3196,7 +3220,7 @@ sub enable_firewall_port {
 	# Check to see if this distro has iptables
 	# If not return 1 so it does not fail
 	if (!($self->service_exists("iptables"))) {
-		notify($ERRORS{'WARNING'}, 0, "iptables does not exist on this OS");
+		notify($ERRORS{'DEBUG'}, 0, "iptables does not exist on this OS");
 		return 1;
 	}
 	
@@ -4402,7 +4426,6 @@ sub configure_default_sshd {
 	my $computer_node_name = $self->data->get_computer_node_name();
 	
 	# Stop existing external sshd process if it is running
-	$self->stop_service('ext_sshd');
 	if (!$self->stop_external_sshd()) {
 		notify($ERRORS{'WARNING'}, 0, "unable to configure default sshd state, problem occurred attempting to kill external sshd process");
 		return;

Modified: vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module/Provisioning/xCAT.pm Thu Sep  5 19:52:30 2013
@@ -637,15 +637,56 @@ sub node_status {
 	}
 	
 	# Get the computer name argument
-	my $computer_node_name = shift || $self->data->get_computer_node_name();
-	if (!$computer_node_name) {
-		notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified");
-		return;
+	my $computer_node_name = shift;
+	
+	if ($computer_node_name) {
+		notify($ERRORS{'DEBUG'}, 0, "checking status of node specified by argument: $computer_node_name");
+	}
+	else {
+		$computer_node_name = $self->data->get_computer_node_name();
+		if ($computer_node_name) {
+			notify($ERRORS{'DEBUG'}, 0, "checking status of node assigned to reservation: $computer_node_name");
+		}
+		else {
+			notify($ERRORS{'WARNING'}, 0, "computer name argument was not specified and could not be retrieved from the DataStructure object");
+			return;
+		}
 	}
-	notify($ERRORS{'DEBUG'}, 0, "checking status of node: $computer_node_name");
 	
+	my $request_state_name = $self->data->get_request_state_name(0);
 	my $image_name = $self->data->get_image_name(0);
 	
+	# If request state is 'new' and necessary objects are available, go straight to checking currentimage.txt
+	# Calling the xCAT functions can overload the management node when several computers are being loaded concurrently (cluster requests)
+	if ($image_name && $request_state_name && $request_state_name eq 'new' && $self->os(0)) {
+		# Check image name reported from OS
+		my $current_image_name = $self->os->get_current_image_info('current_image_name');
+		if (!defined($current_image_name)) {
+			my $return_value = 'RELOAD';
+			notify($ERRORS{'WARNING'}, 0, "unable to determine status of $computer_node_name, failed to retrieve current image name from OS, returning '$return_value'");
+			return 'RELOAD';
+		}
+		
+		if ($current_image_name ne $image_name) {
+			my $return_value = 'RELOAD';
+			notify($ERRORS{'DEBUG'}, 0, "current image reported by OS '$current_image_name' does NOT match the reservation image name: '$image_name', returning '$return_value'"); 
+			return $return_value;
+		}
+		
+		# Check if currentimage.txt contains a 'vcld_post_load' line
+		my $vcld_post_load_status = $self->data->get_computer_currentimage_vcld_post_load(0);
+		if ($vcld_post_load_status) {
+			my $return_value = 'READY';
+			notify($ERRORS{'DEBUG'}, 0, "OS module post_load tasks have been completed on VM $computer_node_name, returning '$return_value'");
+			return $return_value;
+		}
+		else {
+			my $return_value = 'POST_LOAD';
+			notify($ERRORS{'OK'}, 0, "OS module post_load tasks have not been completed on VM $computer_node_name, returning '$return_value'");
+			return $return_value;
+		}
+	}
+	
 	# Check if the node is powered on
 	my $power_status = $self->power_status($computer_node_name);
 	if (!defined($power_status)) {
@@ -716,8 +757,9 @@ sub node_status {
 	# Check image name reported from OS
 	my $current_image_name = $os->get_current_image_info('current_image_name');
 	if (!defined($current_image_name)) {
-		notify($ERRORS{'WARNING'}, 0, "unable to determine status of $computer_node_name, failed to retrieve current image name from OS");
-		return;
+		my $return_value = 'UNRESPONSIVE';
+		notify($ERRORS{'WARNING'}, 0, "unable to determine status of $computer_node_name, failed to retrieve current image name from OS, returning '$return_value'");
+		return $return_value;
 	}
 	
 	# Check if OS's current image matches the reservation image name
@@ -2424,7 +2466,7 @@ sub DESTROY {
 		my $request_state_name = $self->data->get_request_state_name(0);
 		
 		if (!defined($node) || !defined($request_state_name)) {
-			notify($ERRORS{'WARNING'}, 0, "skipping xCAT DESTROY tasks, unable to retrieve node name and request state name from DataStructure");
+			notify($ERRORS{'DEBUG'}, 0, "skipping xCAT DESTROY tasks, unable to retrieve node name and request state name from DataStructure");
 		}
 		elsif ($request_state_name =~ /^(new|reload|image)$/) {
 			notify($ERRORS{'DEBUG'}, 0, "request state is '$request_state_name', attempting to set nodeset state of $node to 'boot'");

Modified: vcl/trunk/managementnode/lib/VCL/Module/State.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/Module/State.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/Module/State.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/Module/State.pm Thu Sep  5 19:52:30 2013
@@ -123,17 +123,6 @@ sub initialize {
 	# Set the parent PID and this process's PID in the hash
 	set_hash_process_id($self->data->get_request_data);
 	
-	# Create a management node OS object
-	# Check to make sure the object currently being created is not a MN OS object to avoid endless loop
-	if (my $mn_os = $self->create_mn_os_object()) {
-		$self->set_mn_os($mn_os);
-		$self->data->set_mn_os($mn_os);
-	}
-	else {
-		notify($ERRORS{'WARNING'}, 0, "failed to create management node OS object");
-		return;
-	}
-	
 	# Create an OS object
 	if (my $os = $self->create_os_object()) {
 		$self->set_os($os);
@@ -187,7 +176,7 @@ sub initialize {
 		if ($reservation_count > 1) {
 			# Wait for all child processes to begin
 			if (!$self->wait_for_child_reservations_to_begin('begin', 60, 3)) {
-				$self->reservation_failed("child reservation processes failed begin");
+				$self->reservation_failed("child reservation processes failed to begin");
 			}
 		}
 		
@@ -459,6 +448,129 @@ sub wait_for_child_reservations_to_begin
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 wait_for_child_reservations_to_exit
+
+ Parameters  : $total_wait_seconds (optional), $attempt_delay_seconds (optional)
+ Returns     : boolean
+ Description : Loops until an 'exited' computerloadlog entry exists for all
+               child reservations. Returns false if the loop times out. The
+               default $total_wait_seconds value is 300 seconds. The default
+               $attempt_delay_seconds value is 15 seconds.
+
+=cut
+
+sub wait_for_child_reservations_to_exit {
+	my $self = shift;
+	if (ref($self) !~ /VCL/) {
+		notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object");
+		return;
+	}
+	
+	my $total_wait_seconds = shift || 300;
+	my $attempt_delay_seconds = shift || 15;
+	
+	my $request_id = $self->data->get_request_id();
+	my $request_state_name = $self->data->get_request_state_name();
+	
+	return $self->code_loop_timeout(
+		\&does_loadstate_entry_exist,
+		[$self, 'exited', 1],
+		"waiting for child reservation processes to exit", $total_wait_seconds, $attempt_delay_seconds
+	);
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
+=head2 state_exit
+
+ Parameters  : $request_state_name_new (optional), $computer_state_name_new (optional), $request_log_ending (optional)
+ Returns     : none, exits
+ Description : Performs common tasks before a reservation process exits and then
+               exits.
+
+=cut
+
+sub state_exit {
+	my $self = shift;
+	if (ref($self) !~ /VCL/) {
+		notify($ERRORS{'CRITICAL'}, 0, "subroutine can only be called as a class method of a VCL object");
+		return;
+	}
+	
+	my ($request_state_name_new, $computer_state_name_new, $request_log_ending) = @_;
+	
+	my $request_id                 = $self->data->get_request_id();
+	my $request_logid              = $self->data->get_request_log_id();
+	my $reservation_id             = $self->data->get_reservation_id();
+	my @reservation_ids            = $self->data->get_reservation_ids();
+	my $reservation_count          = $self->data->get_reservation_count();
+	my $is_parent_reservation      = $self->data->is_parent_reservation();
+	my $request_state_name_old     = $self->data->get_request_state_name();
+	my $request_laststate_name_old = $self->data->get_request_laststate_name();
+	my $computer_id                = $self->data->get_computer_id();
+	my $computer_state_name_old    = $self->data->get_computer_state_name();
+	my $computer_shortname         = $self->data->get_computer_short_name();
+	
+	# If new request state name argument was not supplied, set it back to the previous state
+	if (!$request_state_name_new) {
+		$request_state_name_new = $request_state_name_old;
+	}
+	
+	if ($is_parent_reservation) {
+		# If parent of a cluster request, wait for child processes to exit before switching the state
+		if ($reservation_count > 1) {
+			$self->wait_for_child_reservations_to_exit();
+		}
+		
+		# Never set request state to failed if previous state is image
+		if ($request_state_name_old eq 'image' && $request_state_name_new !~ /(complete|maintenance)/) {
+			notify($ERRORS{'CRITICAL'}, 0, "previous request state is $request_state_name_old, not setting request state to $request_state_name_new, setting request and computer state to maintenance");
+			$request_state_name_new = 'maintenance';
+			$computer_state_name_new = 'maintenance';
+		}
+		elsif ($request_state_name_old eq 'inuse' && $request_state_name_new !~ /(inuse|timeout|maintenance)/) {
+			notify($ERRORS{'CRITICAL'}, 0, "previous request state is $request_state_name_old, not setting request state to $request_state_name_new, setting request and computer state to inuse");
+			$request_state_name_new = 'inuse';
+			$computer_state_name_new = 'inuse';
+		}
+		
+		# Update the reservation.lastcheck time to now if the next request state is inuse
+		# Do this to ensure that reservations are not processed again quickly after this process exits
+		# For cluster requests, the parent may have had to wait a while for child processes to exit
+		# Resetting reservation.lastcheck causes reservations to wait the full interval between inuse checks
+		if ($request_state_name_new =~ /(inuse)/) {
+			update_reservation_lastcheck(@reservation_ids);
+		}
+		
+		# Update the request state
+		if (!update_request_state($request_id, $request_state_name_new, $request_state_name_old)) {
+			notify($ERRORS{'CRITICAL'}, 0, "failed to change request state: $request_state_name_old/$request_laststate_name_old --> $request_state_name_new/$request_state_name_old");
+		}
+		
+		# Update log.ending if this is the parent reservation and argument was supplied
+		if ($request_log_ending && !update_log_ending($request_logid, $request_log_ending)) {
+			notify($ERRORS{'CRITICAL'}, 0, "failed to set log ending to $request_log_ending, log ID: $request_logid");
+		}
+	}
+	
+	# Update the computer state if argument was supplied
+	if ($computer_state_name_new) {
+		if ($computer_state_name_new eq $computer_state_name_old) {
+			notify($ERRORS{'DEBUG'}, 0, "state of computer $computer_shortname not updated, already set to $computer_state_name_old");
+		}
+		elsif (!update_computer_state($computer_id, $computer_state_name_new)) {
+			notify($ERRORS{'CRITICAL'}, 0, "failed update state of computer $computer_shortname: $computer_state_name_old->$computer_state_name_new");
+		}
+	}
+	
+	# Insert a computerloadlog 'exited' entry
+	# This is used by the parent cluster reservation
+	insertloadlog($reservation_id, $computer_id, "exited", "vcld process exiting");
+	exit;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 =head2 DESTROY
 
  Parameters  : none

Modified: vcl/trunk/managementnode/lib/VCL/image.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/image.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/image.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/image.pm Thu Sep  5 19:52:30 2013
@@ -226,15 +226,7 @@ END
 	if ($create_image_result) {
 		# Success
 		notify($ERRORS{'OK'}, 0, "$image_name image files successfully saved");
-
-		# Update the request state to completed, laststate to image
-		if (update_request_state($request_id, "completed", "image")) {
-			notify($ERRORS{'OK'}, 0, "request state updated to completed, laststate to image");
-		}
-		else {
-			notify($ERRORS{'CRITICAL'}, 0, "unable to update request state to completed, laststate to image");
-		}
-
+		
 		# Get the new image size
 		my $image_size_new;
 		if ($image_size_new = $self->provisioner->get_image_size($image_name)) {
@@ -354,20 +346,12 @@ END
 	}
 	
 	# Insert reload request data into the datbase
-	if (insert_reload_request($request_data)) {
-		notify($ERRORS{'OK'}, 0, "inserted reload request into database for computer id=$computer_id");
-
-		# Switch the request state to complete, leave the computer state as is, update log ending to EOR, exit
-		switch_state($request_data, 'complete', '', 'EOR', '1');
-	}
-	else {
+	if (!insert_reload_request($request_data)) {
 		notify($ERRORS{'CRITICAL'}, 0, "failed to insert reload request into database for computer id=$computer_id");
-
-		# Switch the request and computer states to failed, set log ending to failed, exit
-		switch_state($request_data, 'failed', 'failed', 'failed', '1');
 	}
-
-	notify($ERRORS{'OK'}, 0, "exiting");
+	
+	# Switch the request state to complete, leave the computer state as is, update log ending to EOR, exit
+	switch_state($request_data, 'complete', '', 'EOR', '1');
 	exit;
 } ## end sub reservation_successful
 

Modified: vcl/trunk/managementnode/lib/VCL/inuse.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/inuse.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/inuse.pm Thu Sep  5 19:52:30 2013
@@ -117,7 +117,7 @@ sub process {
 	my $computer_id             = $self->data->get_computer_id();
 	my $computer_short_name     = $self->data->get_computer_short_name();
 	my $connect_timeout_seconds = $self->data->get_variable('connecttimeout') || (15 * 60);
-	
+
 	# Make sure connect timeout is long enough
 	# It has to be a bit longer than the ~5 minute period between inuse checks due to cluster reservations
 	# If too short, a user may be connected to one computer in a cluster and another inuse process times out before the connected computer is checked
@@ -140,8 +140,8 @@ sub process {
 		else {
 			notify($ERRORS{'CRITICAL'}, 0, "'$request_state_name' operation requested, " . ref($self->os) . " does not implement a 'reboot' subroutine");
 		}
-		switch_state($request_data, 'inuse', 'inuse');
-		exit;
+		
+		$self->state_exit('inuse', 'inuse');
 	}
 	
 	# Check if server reservation has been modified
@@ -149,8 +149,8 @@ sub process {
 		if (!$self->os->manage_server_access()) {
 			notify($ERRORS{'CRITICAL'}, 0, "failed to update server access");
       }
-		switch_state($request_data, 'inuse', 'inuse');
-      exit;
+		
+		$self->state_exit('inuse', 'inuse');
 	}
 	
 	# Remove rows from computerloadlog for this reservation, don't remove the loadstate=begin row
@@ -182,7 +182,7 @@ sub process {
 		# Only 1 reservation needs to handle the end time countdown
 		if (!$is_parent_reservation) {
 			notify($ERRORS{'OK'}, 0, "request end time countdown handled by parent reservation, exiting");
-			exit;
+			$self->state_exit();
 		}
 		
 		my $sleep_seconds = ($request_remaining_seconds - $end_time_notify_seconds);
@@ -214,7 +214,7 @@ sub process {
 			notify($ERRORS{'OK'}, 0, "minutes until end of end of request: $request_remaining_minutes");
 			
 			# Check if user deleted the request
-			exit if is_request_deleted($request_id);
+			$self->state_exit() if is_request_deleted($request_id);
 			
 			# Check if this is an imaging request, causes process to exit if state or laststate = image
 			$self->_check_imaging_request();
@@ -226,8 +226,7 @@ sub process {
 			# Check if the user extended the request
 			if ($current_request_end_epoch_seconds > $request_end_epoch_seconds) {
 				notify($ERRORS{'OK'}, 0, "user extended request, end time: $request_end --> $current_request_end, returning request to inuse state");
-				switch_state($request_data, 'inuse', 'inuse');
-				exit;
+				$self->state_exit('inuse', 'inuse');
 			}
 			
 			# Notify user when 5 or 10 minutes remain
@@ -249,13 +248,11 @@ sub process {
 			notify($ERRORS{'OK'}, 0, "initiating image auto-capture process");
 			if (!$self->_start_imaging_request()) {
 				notify($ERRORS{'CRITICAL'}, 0, "failed to initiate image auto-capture process, changing request and computer state to maintenance");
-				switch_state($request_data, 'maintenance', 'maintenance');
-				exit;
+				$self->state_exit('maintenance', 'maintenance');
 			}
 		}
 		
-		switch_state($request_data, 'timeout', 'timeout', 'EOR', 1);
-		exit;
+		$self->state_exit('timeout', 'timeout', 'EOR');
 	}
 	
 	# If duration is greater than 24 hours perform end time notice checks
@@ -282,8 +279,7 @@ sub process {
 			"notify time      : $end_time_notify_string\n" . 
 			"connect timeout  : $connect_timeout_string"
 		);
-		switch_state($request_data, 'inuse', 'inuse');
-		exit;
+		$self->state_exit('inuse', 'inuse');
 	}
 	
 	# Check if the computer is responding to SSH
@@ -291,8 +287,7 @@ sub process {
 	# This prevents a reservatino from timing out if the user is actually connected but SSH from the management node isn't working
 	if (!$self->os->is_ssh_responding()) {
 		notify($ERRORS{'OK'}, 0, "$computer_short_name is not responding to SSH, skipping user connection check");
-		switch_state($request_data, 'inuse', 'inuse');
-		exit;
+		$self->state_exit('inuse', 'inuse');
 	}
 	
 	# Update the firewall if necessary - this is what allows a user to click Connect from different locations
@@ -319,18 +314,18 @@ sub process {
 			notify($ERRORS{'OK'}, 0, "never detected user connection, skipping timeout, request duration: $request_duration_hours hours");
 		}
 		else {
-			exit if is_request_deleted($request_id);
+			$self->state_exit() if is_request_deleted($request_id);
 			
 			# Update reservation lastcheck, otherwise request will be processed immediately again
 			update_reservation_lastcheck($reservation_id);
 			
 			if ($request_laststate_name eq 'reserved') {
 				$self->_notify_user_no_login();
-				switch_state($request_data, 'timeout', 'timeout', 'nologin', 1);
+				$self->state_exit('timeout', 'timeout', 'nologin');
 			}
 			else {
 				$self->_notify_user_timeout();
-				switch_state($request_data, 'timeout', 'timeout', 'timeout', 1);
+				$self->state_exit('timeout', 'timeout', 'timeout');
 			}
 		}
 	}
@@ -344,8 +339,7 @@ sub process {
 		}
 	}
 	
-	switch_state($request_data, 'inuse', 'inuse');
-	exit;
+	$self->state_exit('failed', 'inuse');
 }
 
 #/////////////////////////////////////////////////////////////////////////////
@@ -377,7 +371,7 @@ sub user_connected {
 	my $computer_short_name = $self->data->get_computer_short_name();
 	
 	# Check if user deleted the request
-	exit if is_request_deleted($request_id);
+	$self->state_exit() if is_request_deleted($request_id);
 	
 	# Check if this is an imaging request, causes process to exit if state or laststate = image
 	$self->_check_imaging_request();
@@ -450,7 +444,7 @@ sub _notify_user_endtime {
 	my $user_emailnotices               = $self->data->get_user_emailnotices();
 	my $user_imtype_name                = $self->data->get_user_imtype_name();
 	my $user_im_id                      = $self->data->get_user_im_id();
-	my $request_forimaging 		    = $self->_check_imaging_request();	
+	my $request_forimaging 		         = $self->_check_imaging_request();	
 	my $request_id                      = $self->data->get_request_id();
 	
 	my $message;
@@ -565,7 +559,7 @@ sub _notify_user_disconnect {
 	my $user_imtype_name                = $self->data->get_user_imtype_name();
 	my $user_im_id                      = $self->data->get_user_im_id();
 	my $is_parent_reservation           = $self->data->is_parent_reservation();
-	my $request_forimaging		    = $self->_check_imaging_request();
+	my $request_forimaging              = $self->_check_imaging_request();
 	
 	my $disconnect_string;
 	if ($disconnect_time == 0) {
@@ -926,9 +920,10 @@ sub _check_imaging_request {
 	my $imaging_result = is_request_imaging($request_id);
 	if ($imaging_result eq 'image') {
 		notify($ERRORS{'OK'}, 0, "image creation process has begun, exiting");
-		exit;
+		$self->state_exit();
 	}
 }
+
 #/////////////////////////////////////////////////////////////////////////////
 
 =head2 _start_imaging_request

Modified: vcl/trunk/managementnode/lib/VCL/utils.pm
URL: http://svn.apache.org/viewvc/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1520403&r1=1520402&r2=1520403&view=diff
==============================================================================
--- vcl/trunk/managementnode/lib/VCL/utils.pm (original)
+++ vcl/trunk/managementnode/lib/VCL/utils.pm Thu Sep  5 19:52:30 2013
@@ -106,6 +106,7 @@ our @EXPORT = qw(
   disablesshd
   escape_file_path
   format_data
+  format_hash_keys
   format_number
   get_affiliation_info
   get_array_summary_string
@@ -210,7 +211,7 @@ our @EXPORT = qw(
   setup_get_hash_choice
   setup_get_input_string
   setup_print_wrap
-  sleep_uninterupted
+  sleep_uninterrupted
   sort_by_file_name
   stopwatch
   string_to_ascii
@@ -6873,8 +6874,6 @@ EOF
 
 =cut
 
-#/////////////////////////////////////////////////////////////////////////////
-
 sub switch_state {
 	my ($request_data, $request_state_name_new, $computer_state_name_new, $request_log_ending, $exit) = @_;
 
@@ -8285,6 +8284,87 @@ sub format_data {
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 format_hash_keys
+
+ Parameters  : $hash_ref, $level (optional), $parent_keys (optional)
+ Returns     : hash reference
+ Description : 
+
+=cut
+
+sub format_hash_keys {
+	my ($hash_ref, $display_parent_keys, $display_values_hashref, $parent_keys) = @_;
+	if (!$hash_ref) {
+		notify($ERRORS{'WARNING'}, 0, "hash reference argument was not supplied");
+		return;
+	}
+	elsif (!ref($hash_ref) || ref($hash_ref) ne 'HASH') {
+		notify($ERRORS{'WARNING'}, 0, "first argument is not a hash reference");
+		return;
+	}
+	
+	my $return_string;
+	if ($return_string) {
+		$return_string .= "\n";
+	}
+	else {
+		$return_string = '';
+	}
+	
+	if (!defined($parent_keys)) {
+		$parent_keys = [];
+	}
+	
+	my $level = scalar(@$parent_keys);
+	
+	# Add specific values specified in $display_values_hashref to the return string
+	if (@$parent_keys && $display_values_hashref) {
+		my $parent_key = @$parent_keys[-1];
+		for my $key (sort { lc($a) cmp lc($b) } keys %$hash_ref) {
+			my $value = $hash_ref->{$key} || '<NULL>';
+			next if ref($value);
+			for my $display_parent_key (sort { lc($a) cmp lc($b) } keys %$display_values_hashref) {
+				my $display_key = $display_values_hashref->{$display_parent_key};
+				next if ($parent_key ne $display_parent_key || $key ne $display_key);
+				$return_string .= '-' x ($level * 3);
+				$return_string .= join('', map { "{$_}" } @$parent_keys) if ($display_parent_keys);
+				$return_string .= "{$key} => '$value'";
+				$return_string .= "\n";
+			}
+		}
+	}
+	
+	for my $key (sort { lc($a) cmp lc($b) } keys %$hash_ref) {
+		my $value = $hash_ref->{$key};
+		my $type = ref($value);
+		if (!$type) {
+			next;
+		}
+		
+		$return_string .= '-' x ($level * 3);
+		
+		if ($type eq 'HASH') {
+			$return_string .= join('', map { "{$_}" } @$parent_keys) if ($display_parent_keys);
+			$return_string .= "{$key}";
+			$return_string .= "\n";
+			
+			push @$parent_keys, $key;
+			$return_string .= format_hash_keys($value, $display_parent_keys, $display_values_hashref, $parent_keys);
+			pop @$parent_keys;
+		}
+		elsif ($type eq 'ARRAY') {
+			$return_string .= "[$key]\n";
+		}
+		else {
+			$return_string .= "<$type: $key>\n";
+		}
+	}
+	
+	return $return_string;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 =head2 get_caller_trace
 
  Parameters  : $level_limit - number of previous calls to return
@@ -8592,8 +8672,7 @@ sub reservations_ready {
  Description : Retrieves the computerloadlog entries for all reservations
                belonging to the request. A hash is constructed with keys set to
                the reservation IDs. The data of each key is a reference to an
-               array containing the computerloadstate names, sorted from newest
-               computerloadlog entry to oldest.
+               array containing the computerloadstate names.
 
 =cut
 
@@ -8621,7 +8700,7 @@ AND computerloadlog.loadstateid = comput
 WHERE
 request.id = $request_id
 AND reservation.requestid = request.id
-ORDER BY computerloadlog.timestamp DESC
+ORDER BY computerloadlog.timestamp ASC
 EOF
 
 	my @rows = database_select($select_statement);
@@ -8634,7 +8713,14 @@ EOF
 		push @{$computerloadlog_info->{$reservation_id}}, $loadstatename if defined($loadstatename);
 	}
 	
-	notify($ERRORS{'DEBUG'}, 0, "retrieved computerloadlog info for request $request_id:\n" . format_data($computerloadlog_info));
+	my $computerloadlog_string = '';
+	for my $reservation_id (keys %$computerloadlog_info) {
+		$computerloadlog_string .= "$reservation_id: ";
+		$computerloadlog_string .= join(', ', @{$computerloadlog_info->{$reservation_id}});
+		$computerloadlog_string .= "\n";
+	}
+	
+	notify($ERRORS{'DEBUG'}, 0, "retrieved computerloadstate names for request $request_id:\n$computerloadlog_string");
 	return $computerloadlog_info;
 }
 
@@ -10844,7 +10930,7 @@ EOF
 
 #/////////////////////////////////////////////////////////////////////////////
 
-=head2 sleep_uninterupted
+=head2 sleep_uninterrupted
 
  Parameters  : $seconds
  Returns     : none
@@ -10856,7 +10942,7 @@ EOF
 
 =cut
 
-sub sleep_uninterupted {
+sub sleep_uninterrupted {
 	my $seconds = shift;
 	my $start_time = Time::HiRes::time;
 	my $end_time = ($start_time + $seconds);