You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vcl.apache.org by fa...@apache.org on 2011/05/25 19:31:49 UTC

svn commit: r1127593 - in /incubator/vcl/trunk/managementnode: bin/vcld lib/VCL/Module/OS.pm lib/VCL/Module/OS/Linux.pm lib/VCL/inuse.pm

Author: fapeeler
Date: Wed May 25 17:31:48 2011
New Revision: 1127593

URL: http://svn.apache.org/viewvc?rev=1127593&view=rev
Log:
VCL-463

Server loads
ability to process a reboot
inuse module is calling os->reboot routine
each OS module will be responsible for handling it

If os module cannot reboot the machine then inuse module will return to inuse state 
without any action.



Modified:
    incubator/vcl/trunk/managementnode/bin/vcld
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
    incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm

Modified: incubator/vcl/trunk/managementnode/bin/vcld
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/bin/vcld (original)
+++ incubator/vcl/trunk/managementnode/bin/vcld Wed May 25 17:31:48 2011
@@ -202,7 +202,7 @@ sub main () {
 			$ENV{state}     = $request_state_name;
 
 			# Make sure the request state is valid
-			if ($request_state_name !~ /inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse/) {
+			if ($request_state_name !~ /inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse|rebootsoft|reboothard|reinstall/) {
 				notify($ERRORS{'WARNING'}, $LOGFILE, "assigned request in unsupported state: $request_state_name");
 				next REQUEST;
 			}
@@ -497,7 +497,7 @@ sub make_new_child {
 	}
 
 	# The imageinuse state is now handled by inuse.pm
-	if ($state =~ /^(imageinuse)$/) {
+	if ($state =~ /^(imageinuse|rebootsoft|reboothard|reinstall)$/) {
 		notify($ERRORS{'DEBUG'}, $LOGFILE, "request will be processed by inuse.pm");
 		$state_module = "VCL::inuse";
 	}

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Wed May 25 17:31:48 2011
@@ -220,6 +220,86 @@ sub get_current_image_name {
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 wait_for_reboot
+
+ Parameters  : Maximum number of seconds to wait (optional), delay between attempts (optional)
+ Returns     : If computer is pingable before the maximum amount of time has elapsed: 1
+ Description : 
+
+=cut
+
+sub wait_for_reboot {
+        my $self = shift;
+        if (ref($self) !~ /VCL::Module/i) {
+                notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
+                return;
+        }
+	
+        my $computer_node_name   = $self->data->get_computer_node_name();
+
+	# Make multiple attempts to wait for the reboot to complete
+        my $wait_attempt_limit = shift;
+	
+	if (!defined($wait_attempt_limit)) {
+                $wait_attempt_limit = 2;
+        }
+	
+        WAIT_ATTEMPT:
+        for (my $wait_attempt = 1; $wait_attempt <= $wait_attempt_limit; $wait_attempt++) {
+                if ($wait_attempt > 1) {
+                        # Computer did not become fully responsive on previous wait attempt
+                        notify($ERRORS{'OK'}, 0, "$computer_node_name reboot failed to complete on previous attempt, attempting hard power reset");
+                        # Call provisioning module's power_reset() subroutine
+                        if ($self->provisioner->power_reset()) {
+                                notify($ERRORS{'OK'}, 0, "reboot attempt $wait_attempt/$wait_attempt_limit: initiated power reset on $computer_node_name");
+                        }
+                        else {
+                                notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name");
+                                return 0;
+                        }
+                } ## end if ($wait_attempt > 1)
+
+                # Wait maximum of 3 minutes for the computer to become unresponsive
+                if (!$self->wait_for_no_ping(180, 3)) {
+                        # Computer never stopped responding to ping
+                        notify($ERRORS{'WARNING'}, 0, "$computer_node_name never became unresponsive to ping");
+                        next WAIT_ATTEMPT;
+                }
+
+                # Computer is unresponsive, reboot has begun
+                # Wait for 5 seconds before beginning to check if computer is back online
+                notify($ERRORS{'DEBUG'}, 0, "$computer_node_name reboot has begun, sleeping for 5 seconds");
+                sleep 5;
+
+                # Wait maximum of 6 minutes for the computer to come back up
+                if (!$self->wait_for_ping(360, 5)) {
+                        # Check if the computer was ever offline, it should have been or else reboot never happened
+                        notify($ERRORS{'WARNING'}, 0, "$computer_node_name never responded to ping");
+                        next WAIT_ATTEMPT;
+                }
+
+                notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is pingable, waiting for ssh to respond");
+
+                # Wait maximum of 3 minutes for ssh to respond
+                if (!$self->wait_for_ssh(180, 5)) {
+                        notify($ERRORS{'WARNING'}, 0, "ssh never responded on $computer_node_name");
+                        next WAIT_ATTEMPT;
+                }
+
+                notify($ERRORS{'DEBUG'}, 0, "$computer_node_name responded to ssh");
+
+                return 1;
+        } ## end for (my $wait_attempt = 1; $wait_attempt <=...
+
+        # If loop completed, maximum number of reboot attempts was reached
+        notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, made $wait_attempt_limit attempts");
+        return 0;
+
+
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 =head2 wait_for_ping
 
  Parameters  : Maximum number of seconds to wait (optional), delay between attempts (optional)

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Wed May 25 17:31:48 2011
@@ -2787,6 +2787,111 @@ sub get_public_ip_address {
 
 #/////////////////////////////////////////////////////////////////////////////
 
+
+=head2 reboot
+
+ Parameters  : $wait_for_reboot
+ Returns     : 
+ Description : 
+
+=cut
+
+sub reboot {
+        my $self = shift;
+        if (ref($self) !~ /linux/i) {
+                notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
+                return;
+        }
+
+        my $management_node_keys = $self->data->get_management_node_keys();
+        my $computer_node_name   = $self->data->get_computer_node_name();
+
+        # Check if an argument was supplied
+        my $wait_for_reboot = shift;
+        if (!defined($wait_for_reboot) || $wait_for_reboot !~ /0/) {
+                notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and waiting for ssh to become active");
+                $wait_for_reboot = 1;
+        }
+        else {
+                notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and NOT waiting");
+                $wait_for_reboot = 0;
+        }
+
+        my $reboot_start_time = time();
+        notify($ERRORS{'DEBUG'}, 0, "reboot will be attempted on $computer_node_name");
+
+        # Check if computer responds to ssh before preparing for reboot
+        if ($self->wait_for_ssh(0)) {
+
+                # Check if shutdown exists on the computer
+                my $reboot_command;
+                if ( $self->file_exists("/sbin/shutdown")) {
+                        $reboot_command = "/sbin/shutdown -r now";
+                }
+                else {
+                        notify($ERRORS{'WARNING'}, 0, "reboot not attempted, /sbin/shutdown did not exists on OS");
+                        return 0;
+                }
+
+                my ($reboot_exit_status, $reboot_output) = run_ssh_command($computer_node_name, $management_node_keys, $reboot_command);
+                if (!defined($reboot_output)) {
+                        notify($ERRORS{'WARNING'}, 0, "failed to execute ssh command to reboot $computer_node_name");
+                        return;
+                }
+
+                if ($reboot_exit_status == 0) {
+                        notify($ERRORS{'OK'}, 0, "executed reboot command on $computer_node_name");
+                }
+                else {
+                        notify($ERRORS{'WARNING'}, 0, "failed to reboot $computer_node_name, attempting power reset, output:\n" . join("\n", @$reboot_output));
+
+                        # Call provisioning module's power_reset() subroutine
+                        if ($self->provisioner->power_reset()) {
+                                notify($ERRORS{'OK'}, 0, "initiated power reset on $computer_node_name");
+                        }
+                        else {
+                                notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name");
+                                return;
+                        }
+                }
+        }
+        else {
+                # Computer did not respond to ssh
+                notify($ERRORS{'WARNING'}, 0, "$computer_node_name did not respond to ssh, graceful reboot cannot be performed, attempting hard reset");
+
+                # Call provisioning module's power_reset() subroutine
+                if ($self->provisioner->power_reset()) {
+                        notify($ERRORS{'OK'}, 0, "initiated power reset on $computer_node_name");
+                }
+                else {
+                        notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name");
+                        return 0;
+                }
+        } ## end else [ if ($self->wait_for_ssh(0))
+	
+	my $wait_attempt_limit = 2;
+        # Check if wait for reboot is set
+        if (!$wait_for_reboot) {
+                return 1;
+        }
+	else {
+		if($self->wait_for_reboot($wait_attempt_limit)){
+			# Reboot was successful, calculate how long reboot took
+                	my $reboot_end_time = time();
+                	my $reboot_duration = ($reboot_end_time - $reboot_start_time);
+                	notify($ERRORS{'OK'}, 0, "reboot complete on $computer_node_name, took $reboot_duration seconds");
+			return 1;
+		}
+		else {
+        		notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, made $wait_attempt_limit attempts");
+			return 0;
+		}
+	}
+
+} ## end sub reboot
+
+#/////////////////////////////////////////////////////////////////////////////
+
 1;
 __END__
 

Modified: incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm Wed May 25 17:31:48 2011
@@ -122,7 +122,32 @@ sub process {
 	my $reservation_count     = $self->data->get_reservation_count();
 	my $is_parent_reservation = $self->data->is_parent_reservation();
 	my $identity_key          = $self->data->get_image_identity();
+	my $request_state_name    = $self->data->get_request_state_name();
 
+	if ($request_state_name =~ /reboot|rebootsoft|reboothard/) {
+		notify($ERRORS{'OK'}, 0, "this is a 'reboot' request");
+		if($self->os->can('reboot')){
+			if($self->os->reboot()){
+				notify($ERRORS{'OK'}, 0, "successfuly rebooted $computer_nodename");
+			
+			}
+			else {
+				notify($ERRORS{'WARNING'}, 0, "failed to reboot $computer_nodename");
+				#do not fail request or machine
+			}
+			# Put this request back into the inuse state
+                       	if (update_request_state($request_id, "inuse", "inuse")) {
+                               	notify($ERRORS{'OK'}, 0, "request state set back to inuse");
+                        }
+                        else {
+                               	notify($ERRORS{'WARNING'}, 0, "unable to set request state back to inuse");
+                        }
+			notify($ERRORS{'OK'}, 0, "exiting");
+        		exit;
+		}
+		
+	}
+	
 	# Set the user connection timeout limit in minutes
 	my $connect_timeout_limit = 15;