You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vcl.apache.org by fa...@apache.org on 2011/05/25 19:31:49 UTC
svn commit: r1127593 - in /incubator/vcl/trunk/managementnode: bin/vcld
lib/VCL/Module/OS.pm lib/VCL/Module/OS/Linux.pm lib/VCL/inuse.pm
Author: fapeeler
Date: Wed May 25 17:31:48 2011
New Revision: 1127593
URL: http://svn.apache.org/viewvc?rev=1127593&view=rev
Log:
VCL-463
Server loads
ability to process a reboot
inuse module is calling os->reboot routine
each OS module will be responsible for handling it
If os module cannot reboot the machine then inuse module will return to inuse state
without any action.
Modified:
incubator/vcl/trunk/managementnode/bin/vcld
incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm
Modified: incubator/vcl/trunk/managementnode/bin/vcld
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/bin/vcld (original)
+++ incubator/vcl/trunk/managementnode/bin/vcld Wed May 25 17:31:48 2011
@@ -202,7 +202,7 @@ sub main () {
$ENV{state} = $request_state_name;
# Make sure the request state is valid
- if ($request_state_name !~ /inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse/) {
+ if ($request_state_name !~ /inuse|reserved|deleted|timeout|reclaim|reload|new|tomaintenance|image|imageprep|makeproduction|imageinuse|complete|failed|pending|maintenance|tovmhostinuse|rebootsoft|reboothard|reinstall/) {
notify($ERRORS{'WARNING'}, $LOGFILE, "assigned request in unsupported state: $request_state_name");
next REQUEST;
}
@@ -497,7 +497,7 @@ sub make_new_child {
}
# The imageinuse state is now handled by inuse.pm
- if ($state =~ /^(imageinuse)$/) {
+ if ($state =~ /^(imageinuse|rebootsoft|reboothard|reinstall)$/) {
notify($ERRORS{'DEBUG'}, $LOGFILE, "request will be processed by inuse.pm");
$state_module = "VCL::inuse";
}
Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Wed May 25 17:31:48 2011
@@ -220,6 +220,86 @@ sub get_current_image_name {
#/////////////////////////////////////////////////////////////////////////////
+=head2 wait_for_reboot
+
+ Parameters : Maximum number of seconds to wait (optional), delay between attempts (optional)
+ Returns : If computer is pingable before the maximum amount of time has elapsed: 1
+ Description :
+
+=cut
+
+sub wait_for_reboot {
+ my $self = shift;
+ if (ref($self) !~ /VCL::Module/i) {
+ notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
+ return;
+ }
+
+ my $computer_node_name = $self->data->get_computer_node_name();
+
+ # Make multiple attempts to wait for the reboot to complete
+ my $wait_attempt_limit = shift;
+
+ if (!defined($wait_attempt_limit)) {
+ $wait_attempt_limit = 2;
+ }
+
+ WAIT_ATTEMPT:
+ for (my $wait_attempt = 1; $wait_attempt <= $wait_attempt_limit; $wait_attempt++) {
+ if ($wait_attempt > 1) {
+ # Computer did not become fully responsive on previous wait attempt
+ notify($ERRORS{'OK'}, 0, "$computer_node_name reboot failed to complete on previous attempt, attempting hard power reset");
+ # Call provisioning module's power_reset() subroutine
+ if ($self->provisioner->power_reset()) {
+ notify($ERRORS{'OK'}, 0, "reboot attempt $wait_attempt/$wait_attempt_limit: initiated power reset on $computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name");
+ return 0;
+ }
+ } ## end if ($wait_attempt > 1)
+
+ # Wait maximum of 3 minutes for the computer to become unresponsive
+ if (!$self->wait_for_no_ping(180, 3)) {
+ # Computer never stopped responding to ping
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name never became unresponsive to ping");
+ next WAIT_ATTEMPT;
+ }
+
+ # Computer is unresponsive, reboot has begun
+ # Wait for 5 seconds before beginning to check if computer is back online
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name reboot has begun, sleeping for 5 seconds");
+ sleep 5;
+
+ # Wait maximum of 6 minutes for the computer to come back up
+ if (!$self->wait_for_ping(360, 5)) {
+ # Check if the computer was ever offline, it should have been or else reboot never happened
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name never responded to ping");
+ next WAIT_ATTEMPT;
+ }
+
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name is pingable, waiting for ssh to respond");
+
+ # Wait maximum of 3 minutes for ssh to respond
+ if (!$self->wait_for_ssh(180, 5)) {
+ notify($ERRORS{'WARNING'}, 0, "ssh never responded on $computer_node_name");
+ next WAIT_ATTEMPT;
+ }
+
+ notify($ERRORS{'DEBUG'}, 0, "$computer_node_name responded to ssh");
+
+ return 1;
+ } ## end for (my $wait_attempt = 1; $wait_attempt <=...
+
+ # If loop completed, maximum number of reboot attempts was reached
+ notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, made $wait_attempt_limit attempts");
+ return 0;
+
+
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
=head2 wait_for_ping
Parameters : Maximum number of seconds to wait (optional), delay between attempts (optional)
Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Linux.pm Wed May 25 17:31:48 2011
@@ -2787,6 +2787,111 @@ sub get_public_ip_address {
#/////////////////////////////////////////////////////////////////////////////
+
+=head2 reboot
+
+ Parameters : $wait_for_reboot
+ Returns :
+ Description :
+
+=cut
+
+sub reboot {
+ my $self = shift;
+ if (ref($self) !~ /linux/i) {
+ notify($ERRORS{'CRITICAL'}, 0, "subroutine was called as a function, it must be called as a class method");
+ return;
+ }
+
+ my $management_node_keys = $self->data->get_management_node_keys();
+ my $computer_node_name = $self->data->get_computer_node_name();
+
+ # Check if an argument was supplied
+ my $wait_for_reboot = shift;
+ if (!defined($wait_for_reboot) || $wait_for_reboot !~ /0/) {
+ notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and waiting for ssh to become active");
+ $wait_for_reboot = 1;
+ }
+ else {
+ notify($ERRORS{'DEBUG'}, 0, "rebooting $computer_node_name and NOT waiting");
+ $wait_for_reboot = 0;
+ }
+
+ my $reboot_start_time = time();
+ notify($ERRORS{'DEBUG'}, 0, "reboot will be attempted on $computer_node_name");
+
+ # Check if computer responds to ssh before preparing for reboot
+ if ($self->wait_for_ssh(0)) {
+
+ # Check if shutdown exists on the computer
+ my $reboot_command;
+ if ( $self->file_exists("/sbin/shutdown")) {
+ $reboot_command = "/sbin/shutdown -r now";
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot not attempted, /sbin/shutdown did not exists on OS");
+ return 0;
+ }
+
+ my ($reboot_exit_status, $reboot_output) = run_ssh_command($computer_node_name, $management_node_keys, $reboot_command);
+ if (!defined($reboot_output)) {
+ notify($ERRORS{'WARNING'}, 0, "failed to execute ssh command to reboot $computer_node_name");
+ return;
+ }
+
+ if ($reboot_exit_status == 0) {
+ notify($ERRORS{'OK'}, 0, "executed reboot command on $computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "failed to reboot $computer_node_name, attempting power reset, output:\n" . join("\n", @$reboot_output));
+
+ # Call provisioning module's power_reset() subroutine
+ if ($self->provisioner->power_reset()) {
+ notify($ERRORS{'OK'}, 0, "initiated power reset on $computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name");
+ return;
+ }
+ }
+ }
+ else {
+ # Computer did not respond to ssh
+ notify($ERRORS{'WARNING'}, 0, "$computer_node_name did not respond to ssh, graceful reboot cannot be performed, attempting hard reset");
+
+ # Call provisioning module's power_reset() subroutine
+ if ($self->provisioner->power_reset()) {
+ notify($ERRORS{'OK'}, 0, "initiated power reset on $computer_node_name");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed, failed to initiate power reset on $computer_node_name");
+ return 0;
+ }
+ } ## end else [ if ($self->wait_for_ssh(0))
+
+ my $wait_attempt_limit = 2;
+ # Check if wait for reboot is set
+ if (!$wait_for_reboot) {
+ return 1;
+ }
+ else {
+ if($self->wait_for_reboot($wait_attempt_limit)){
+ # Reboot was successful, calculate how long reboot took
+ my $reboot_end_time = time();
+ my $reboot_duration = ($reboot_end_time - $reboot_start_time);
+ notify($ERRORS{'OK'}, 0, "reboot complete on $computer_node_name, took $reboot_duration seconds");
+ return 1;
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "reboot failed on $computer_node_name, made $wait_attempt_limit attempts");
+ return 0;
+ }
+ }
+
+} ## end sub reboot
+
+#/////////////////////////////////////////////////////////////////////////////
+
1;
__END__
Modified: incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm?rev=1127593&r1=1127592&r2=1127593&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/inuse.pm Wed May 25 17:31:48 2011
@@ -122,7 +122,32 @@ sub process {
my $reservation_count = $self->data->get_reservation_count();
my $is_parent_reservation = $self->data->is_parent_reservation();
my $identity_key = $self->data->get_image_identity();
+ my $request_state_name = $self->data->get_request_state_name();
+ if ($request_state_name =~ /reboot|rebootsoft|reboothard/) {
+ notify($ERRORS{'OK'}, 0, "this is a 'reboot' request");
+ if($self->os->can('reboot')){
+ if($self->os->reboot()){
+ notify($ERRORS{'OK'}, 0, "successfuly rebooted $computer_nodename");
+
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "failed to reboot $computer_nodename");
+ #do not fail request or machine
+ }
+ # Put this request back into the inuse state
+ if (update_request_state($request_id, "inuse", "inuse")) {
+ notify($ERRORS{'OK'}, 0, "request state set back to inuse");
+ }
+ else {
+ notify($ERRORS{'WARNING'}, 0, "unable to set request state back to inuse");
+ }
+ notify($ERRORS{'OK'}, 0, "exiting");
+ exit;
+ }
+
+ }
+
# Set the user connection timeout limit in minutes
my $connect_timeout_limit = 15;