You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vcl.apache.org by ar...@apache.org on 2011/08/23 18:04:10 UTC
svn commit: r1160762 - in /incubator/vcl/trunk/managementnode/lib/VCL: Module/OS.pm Module/OS/Windows.pm utils.pm

Author: arkurth
Date: Tue Aug 23 16:04:09 2011
New Revision: 1160762

URL: http://svn.apache.org/viewvc?rev=1160762&view=rev
Log:
VCL-503
Added code to util.pm::run_ssh_command to allow a timeout value to be specified in order to prevent hung SSH processes from hanging for a very long time or indefinitely. The default value for now is 0 meaning that the code never times out the command.

Added utils.pm kill_child_processes subroutine. If the run_ssh_command timeout value is reached, the SSH process is still running so the kill_child_processes subroutine is called to kill all child processes belonging to the reservation process.

Added timeout_seconds argument set to 15 seconds to the run_ssh_command call in OS.pm::is_ssh_responding. Also added 20 second timeout argument to the call to execute qwinsta.exe in Windows.pm::user_logged_in. These calls cause an SSH process to hang most often.

VCL-465
Updated OS.pm::get_public_interface_name to ignore interfaces named 'lo' and 'sit[0-9]'. Changed logic to not ignore interfaces without a bound IP address. This fixes problems where static addresses are being assigned under Linux. The correct public interface does not come up with an autogenerated IP address as it does under Windows and the code was ignoring this interface because of this.

Other
Updated OS.pm::create_text_file to correct the line endings depending on the OS type.

Modified:
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
    incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm
    incubator/vcl/trunk/managementnode/lib/VCL/utils.pm

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS.pm Tue Aug 23 16:04:09 2011
@@ -471,6 +471,7 @@ sub is_ssh_responding {
 			command => "echo testing ssh on $computer_node_name",
 			max_attempts => $max_attempts,
 			output_level => 0,
+			timeout_seconds => 15,
 		});
 		
 		# The exit status will be 0 if the command succeeded
@@ -957,7 +958,11 @@ sub get_public_interface_name {
 		my $description = $network_configuration->{$check_interface_name}{description} || '';
 		
 		# Check if the interface should be ignored based on the name or description
-		if ($check_interface_name =~ /(loopback|vmnet|afs|tunnel|6to4|isatap|teredo)/i) {
+		if ($check_interface_name =~ /^(lo|sit\d)$/i) {
+			notify($ERRORS{'DEBUG'}, 0, "interface '$check_interface_name' ignored because its name is '$1'");
+			next INTERFACE;
+		}
+		elsif ($check_interface_name =~ /(loopback|vmnet|afs|tunnel|6to4|isatap|teredo)/i) {
 			notify($ERRORS{'DEBUG'}, 0, "interface '$check_interface_name' ignored because its name contains '$1'");
 			next INTERFACE;
 		}
@@ -966,15 +971,6 @@ sub get_public_interface_name {
 			next INTERFACE;
 		}
 		
-		# Get the IP addresses assigned to the interface
-		my @check_ip_addresses  = keys %{$network_configuration->{$check_interface_name}{ip_address}};
-		
-		# Ignore interface if it doesn't have an IP address
-		if (!@check_ip_addresses) {
-			notify($ERRORS{'DEBUG'}, 0, "interface '$check_interface_name' ignored because it is not assigned an IP address");
-			next INTERFACE;
-		}
-		
 		# If $public_interface_name hasn't been set yet, set it and continue checking the next interface
 		if (!$public_interface_name) {
 			$public_interface_name = $check_interface_name;
@@ -1501,13 +1497,17 @@ sub get_public_default_gateway {
 
 =head2 create_text_file
 
- Parameters  : $file_path, $file_contents
+ Parameters  : $file_path, $file_contents, $no_correct_line_endings (optional)
  Returns     : boolean
  Description : Creates a text file on the computer. The $file_contents
                string argument is converted to ASCII hex values. These values
                are echo'd on the computer which avoids problems with special
                characters and escaping. If the file already exists it is
                overwritten.
+               The line endings within the $file_contents string are corrected
+               by default to Windows-style (\r\n) or Linux-style (\n) depending
+               on the OS. An optional boolean 3rd argument can be specified to
+               prevent the string from being altered.
 
 =cut
 
@@ -1524,8 +1524,16 @@ sub create_text_file {
 		return;
 	}
 	
-	my $management_node_keys = $self->data->get_management_node_keys();
-	my $computer_node_name   = $self->data->get_computer_node_name();
+	my $computer_node_name = $self->data->get_computer_node_name();
+	my $image_os_type = $self->data->get_image_os_type();
+	
+	# Remove Windows-style carriage returns if the image OS isn't Windows
+	if ($image_os_type =~ /windows/) {
+		$file_contents_string =~ s/\r*\n/\r\n/g;
+	}
+	else {
+		$file_contents_string =~ s/\r//g;
+	}
 	
 	# Convert the string to a string containing the hex value of each character
 	# This is done to avoid problems with special characters in the file contents
@@ -1541,8 +1549,8 @@ sub create_text_file {
 	
 	# Create a command to echo the hex string to the file
 	# Use -e to enable interpretation of backslash escapes
-	my $command .= "echo -e \"$hex_string\" > $file_path";
-	my ($exit_status, $output) = run_ssh_command($computer_node_name, $management_node_keys, $command, '', '', 0);
+	my $command .= "echo -n -e \"$hex_string\" > $file_path";
+	my ($exit_status, $output) = $self->execute($command);
 	if (!defined($output)) {
 		notify($ERRORS{'WARNING'}, 0, "failed to execute ssh command to create file on $computer_node_name: $file_path");
 		return;

Modified: incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/Module/OS/Windows.pm Tue Aug 23 16:04:09 2011
@@ -8038,7 +8038,8 @@ sub user_logged_in {
 	notify($ERRORS{'DEBUG'}, 0, "checking if $username is logged in to $computer_node_name");
 
 	# Run qwinsta.exe to display terminal session information
-	my ($exit_status, $output) = run_ssh_command($computer_node_name, $management_node_keys, "$system32_path/qwinsta.exe");
+	# Set command timeout argument because this command occasionally hangs
+	my ($exit_status, $output) = run_ssh_command($computer_node_name, $management_node_keys, "$system32_path/qwinsta.exe", '', '', 1, 20);
 	if ($exit_status > 0) {
 		notify($ERRORS{'WARNING'}, 0, "failed to run qwinsta.exe on $computer_node_name, exit status: $exit_status, output:\n@{$output}");
 		return;

Modified: incubator/vcl/trunk/managementnode/lib/VCL/utils.pm
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/lib/VCL/utils.pm?rev=1160762&r1=1160761&r2=1160762&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/lib/VCL/utils.pm (original)
+++ incubator/vcl/trunk/managementnode/lib/VCL/utils.pm Tue Aug 23 16:04:09 2011
@@ -164,6 +164,7 @@ our @EXPORT = qw(
   is_valid_ip_address
   isconnected
   isfilelocked
+  kill_child_processes
   kill_reservation_process
   known_hosts
   lockfile
@@ -5393,16 +5394,17 @@ EOF
 
 =head2 run_ssh_command
 
- Parameters  : $node, $identity_path, $command, $user, $port
+ Parameters  : $node, $identity_path, $command, $user, $port, $output_level, $timeout_seconds
 					-or-
 					Hash reference with the following keys:
-					node - node name (required)
-					command - command to be executed remotely (required)
-					identity_paths - string containing paths to identity key files separated by commas (optional)
-					user - user to run remote command as (optional, default is 'root')
-					port - SSH port number (optional, default is 22)
-					output_level - allows the amount of output to be controlled: 0, 1, or 2 (optional)
-					max_attempts - maximum number of SSH attempts to make
+						node - node name (required)
+						command - command to be executed remotely (required)
+						identity_paths - string containing paths to identity key files separated by commas (optional)
+						user - user to run remote command as (optional, default is 'root')
+						port - SSH port number (optional, default is 22)
+						output_level - allows the amount of output to be controlled: 0, 1, or 2 (optional)
+						max_attempts - maximum number of SSH attempts to make
+						timeout_seconds - maximum number seconds SSH process can run before being terminated
  Returns     : If successful: array:
                   $array[0] = the exit status of the command
 					   $array[1] = reference to array containing lines of output
@@ -5412,7 +5414,7 @@ EOF
 =cut
 
 sub run_ssh_command {
-	my ($node, $identity_paths, $command, $user, $port, $output_level) = @_;
+	my ($node, $identity_paths, $command, $user, $port, $output_level, $timeout_seconds) = @_;
 
 	my $max_attempts = 3;
 	
@@ -5426,7 +5428,7 @@ sub run_ssh_command {
 		$port = $arguments->{port} || '22';
 		$output_level = $arguments->{output_level};
 		$max_attempts = $arguments->{max_attempts} || 3;
-		
+		$timeout_seconds = $arguments->{timeout_seconds};
 	}
 	
 	# Determine the output level if it was specified
@@ -5456,7 +5458,8 @@ sub run_ssh_command {
 
 	# Set default values if not passed as an argument
 	$user = "root" if (!$user);
-	$port = 22     if (!$port);
+	$port = 22 if (!$port);
+	$timeout_seconds = 0 if (!$timeout_seconds);
 	$identity_paths = $ENV{management_node_info}{keys} if (!defined $identity_paths || length($identity_paths) == 0);
 	
 	# TODO: Add ssh path to config file and set global variable
@@ -5511,9 +5514,9 @@ sub run_ssh_command {
 	my $ssh_command = "$ssh_path $identity_paths -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectionAttempts=1 -o ConnectTimeout=3 -l $user -p $port -x $node '$command' 2>&1";
 	
 	# Execute the command
-	my $ssh_output;
-	my $ssh_output_formatted;
-	my $attempts        = 0;
+	my $ssh_output = '';
+	my $ssh_output_formatted = '';
+	my $attempts = 0;
 	my $exit_status = 255;
 
 	# Make multiple attempts if failure occurs
@@ -5538,28 +5541,53 @@ sub run_ssh_command {
 			notify($ERRORS{'DEBUG'}, 0, "attempt $attempts/$max_attempts: executing SSH command on $node:\n$ssh_command") if $output_level;
 		}
 		
-		# Execute the command
-		$ssh_output = `$ssh_command`;
-
-		# Bits 0-7 of $? are set to the signal the child process received that caused it to die
-		my $signal_number = $? & 127;
-		
-		# Bit 8 of $? will be true if a core dump occurred
-		my $core_dump = $? & 128;
-		
-		# Bits 9-16 of $? contain the child process exit status
-		$exit_status = $? >> 8;
+		# Enclose SSH command in an eval block and use alarm to eventually timeout the SSH command if it hangs
+		my $start_time = time;
+		eval {
+			# Override the die and alarm handlers
+			local $SIG{__DIE__} = sub{};
+			local $SIG{ALRM} = sub { die "alarm\n" };
+			
+			if ($timeout_seconds) {
+				notify($ERRORS{'DEBUG'}, 0, "waiting up to $timeout_seconds seconds for SSH process to finish");
+				alarm $timeout_seconds;
+			}
+			
+			# Execute the command
+			$ssh_output = `$ssh_command`;
+			
+			# Save the exit status
+			$exit_status = $? >> 8;
+			
+			# Ignore the returned value of $? if it is -1
+			# This likely means a Perl bug was encountered
+			# Assume command was successful
+			if ($? == -1) {
+				notify($ERRORS{'DEBUG'}, 0, "exit status changed from $exit_status to 0, Perl bug likely encountered") if $output_level;
+				$exit_status = 0;
+			}
+			
+			if ($timeout_seconds) {
+				# Cancel the timer
+				alarm 0;
+			}
+		};
+	
+		my $duration = (time - $start_time);
 		
-		# Ignore the returned value of $? if it is -1
-		# This likely means a Perl bug was encountered
-		# Assume command was successful
-		if ($? == -1) {
-			notify($ERRORS{'DEBUG'}, 0, "exit status changed from $exit_status to 0, Perl bug likely encountered") if $output_level;
-			$exit_status = 0;
+		# Check if the timeout was reached
+		if ($EVAL_ERROR && $EVAL_ERROR eq "alarm\n") {
+			notify($ERRORS{'CRITICAL'}, 0, "attempt $attempts/$max_attempts: SSH command timed out after $duration seconds, timeout threshold: $timeout_seconds seconds, command: $node:\n$ssh_command");
+			
+			# Kill the child processes of this reservation process
+			kill_child_processes($PID);
+			next;
+		}
+		elsif ($EVAL_ERROR) {
+			notify($ERRORS{'CRITICAL'}, 0, "attempt $attempts/$max_attempts: eval error was generated attempting to run SSH command: $node:\n$ssh_command, error: $EVAL_ERROR");
+			next;
 		}
 		
-		#notify($ERRORS{'DEBUG'}, 0, "\$?: $?, signal: $signal_number, core dump: $core_dump, exit status: $exit_status");
-
 		# Strip out the key warning message from the output
 		$ssh_output =~ s/\@{10,}.*man-in-the-middle attacks\.//igs;
 		
@@ -10443,6 +10471,66 @@ EOF
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 kill_child_processes
+
+ Parameters  : $parent_pid
+ Returns     : boolean
+ Description : Kills all child processes belonging to the parent PID specified
+               as the argument.
+
+=cut
+
+sub kill_child_processes {
+	my @parent_pids = @_;
+	my $parent_pid = $parent_pids[-1];
+	my $parent_process_string = "parent PID: " . join(">", @parent_pids);
+	
+	# Make sure the parent vcld daemon process didn't call this subroutine for safety
+	# Prevents all reservations being processed from being killed
+	if ($ENV{vcld}) {
+		notify($ERRORS{'CRITICAL'}, 0, "kill_child_processes subroutine called from the parent vcld process, not killing any processes for safety");
+		return;
+	}
+	
+	notify($ERRORS{'DEBUG'}, 0, "$parent_process_string: attempting to kill child processes");
+	
+	my $command = "pgrep -flP $parent_pid | sort -r";
+	my ($exit_status, $output) = run_command($command, 1);
+	
+	for my $line (@$output) {
+		# Make sure the line only contains a PID
+		my ($child_pid, $child_command) = $line =~ /^(\d+)\s+(.*)/;
+		if (!defined($child_pid) || !defined($child_command)) {
+			notify($ERRORS{'WARNING'}, 0, "$parent_process_string: pgrep output line does not contain a PID and command:\nline: '$child_pid'\ncommand: '$command'");
+			next;
+		}
+		elsif ($child_command =~ /$command/) {
+			# Ignore the pgrep command called to determine child processes
+			next;
+		}
+		
+		# Create a string containing the beginning and end of the child process command to make log output more readable
+		my $child_command_summary = join('...', ($child_command =~ /^(.{10,20}).*(.{20,30})$/));
+		
+		notify($ERRORS{'DEBUG'}, 0, "$parent_process_string, found child process: $child_pid '$child_command_summary'");
+		
+		# Recursively kill the child processes of the child process
+		kill_child_processes(@parent_pids, $child_pid);
+		
+		my $kill_count = kill 9, $child_pid;
+		if ($kill_count) {
+			notify($ERRORS{'DEBUG'}, 0, "$parent_process_string, killed child process: $child_pid (kill count: $kill_count)");
+		}
+		else {
+			notify($ERRORS{'WARNING'}, 0, "$parent_process_string, kill command returned 0 attempting to kill child process: $child_pid");
+		}
+	}
+	
+	return 1;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 1;
 __END__