You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vcl.apache.org by ar...@apache.org on 2009/01/30 18:21:22 UTC

svn commit: r739338 - /incubator/vcl/trunk/managementnode/bin/vcld

Author: arkurth
Date: Fri Jan 30 17:21:21 2009
New Revision: 739338

URL: http://svn.apache.org/viewvc?rev=739338&view=rev
Log:
VCL-56
Changed 'use English;' to 'use English qw( -no_match_vars );' in vcld. Documentation for the English module states this improves performance.

VCL-71
Reworked REAPER and HUNTSMAN in vcld. HUNTSMAN was killing all child processes under some circumstances. If kill is issued without -9, any VCL process would attempt to kill all of the child processes of the parent vcld process. I removed this capability all together for safety. Kill or kill -9 now only kills the specified process. REAPER was modified to correctly handle the exit status of the process which was reaped. It had not been setting $? correctly if wait() reaped a process. I also changed the wait() command to waitpid() per recommendations in the Perl books.

VCL-72
Added die_handler and warning_handler subroutines. These catch die and warning signals which weren't caught at all before. A die signal could happen if any part of the code attempted to call an undefined subroutine. When this occurred, the process would exit without sending a notification via notify(). The die_handler subroutine causes a critical message to be sent. The warning_handler subroutine causes warning messages to be logged via notify() when a notify signal occurs.

Made some other minor spelling corrections to vcld.

Modified:
    incubator/vcl/trunk/managementnode/bin/vcld

Modified: incubator/vcl/trunk/managementnode/bin/vcld
URL: http://svn.apache.org/viewvc/incubator/vcl/trunk/managementnode/bin/vcld?rev=739338&r1=739337&r2=739338&view=diff
==============================================================================
--- incubator/vcl/trunk/managementnode/bin/vcld (original)
+++ incubator/vcl/trunk/managementnode/bin/vcld Fri Jan 30 17:21:21 2009
@@ -56,7 +56,7 @@
 use Symbol;
 use POSIX;
 use Getopt::Long;
-use English;
+use English qw( -no_match_vars );
 
 use VCL::utils;
 use VCL::DataStructure;
@@ -86,6 +86,8 @@
 $SIG{QUIT} = \&HUNTSMAN;
 $SIG{HUP}  = \&HUNTSMAN;
 $SIG{TERM} = \&HUNTSMAN;
+$SIG{__WARN__} = \&warning_handler;
+$SIG{__DIE__} = \&die_handler;
 
 # Call main subroutine
 &main();
@@ -126,7 +128,7 @@
 		notify($ERRORS{'OK'}, $LOGFILE, "retrieved management node information from database");
 	}
 	else {
-		notify($ERRORS{'CRITICAL'}, $LOGFILE, "unable to retieve management node information from database");
+		notify($ERRORS{'CRITICAL'}, $LOGFILE, "unable to retrieve management node information from database");
 		exit;
 	}
 
@@ -172,10 +174,10 @@
 		# Get all the requests assigned to this management node
 		# get_management_node_requests() gets a subset of the information available
 		if ($info{request} = {get_management_node_requests($management_node_id)}) {
-			#notify($ERRORS{'DEBUG'}, $LOGFILE, "retieved request information for management node $management_node_id");
+			#notify($ERRORS{'DEBUG'}, $LOGFILE, "retrieved request information for management node $management_node_id");
 		}
 		else {
-			notify($ERRORS{'CRITICAL'}, $LOGFILE, "could not retieve request information for management node $management_node_id");
+			notify($ERRORS{'CRITICAL'}, $LOGFILE, "could not retrieve request information for management node $management_node_id");
 		}
 
 		# See if there's anything to do
@@ -268,17 +270,21 @@
 				}
 				
 				# Make sure reservation is not currently being processed
-				if (reservation_being_processed($reservation_id)) {
+				my $being_processed = reservation_being_processed($reservation_id);
+				if ($being_processed && $request_state_name ne 'deleted') {
 					notify($ERRORS{'WARNING'}, $LOGFILE, "reservation $reservation_id is already being processed");
 					next RESERVATION;
 				}
+				elsif ($being_processed) {
+					notify($ERRORS{'DEBUG'}, $LOGFILE, "$request_state_name processing delayed, reservation $reservation_id is currently being processed");
+				}
 				else {
 					notify($ERRORS{'DEBUG'}, $LOGFILE, "reservation $reservation_id is NOT already being processed");
 				}
 
 				# Get the full set of database data for this request
 				if (%request_info = get_request_info($request_id)) {
-					notify($ERRORS{'DEBUG'}, $LOGFILE, "retieved request information from database");
+					notify($ERRORS{'DEBUG'}, $LOGFILE, "retrieved request information from database");
 
 					# Set request variables that may have changed by other processes to their original values
 					# They may change if this is a cluster reservation
@@ -287,7 +293,7 @@
 					$request_info{preload}         = $request_preload;
 				}
 				else {
-					notify($ERRORS{'WARNING'}, $LOGFILE, "could not retieve request information from database");
+					notify($ERRORS{'WARNING'}, $LOGFILE, "could not retrieve request information from database");
 					next RESERVATION;
 				}
 
@@ -566,8 +572,9 @@
 			sigprocmask(SIG_UNBLOCK, $sigset) or die "can't unblock SIGINT for fork: $!\n";
 			
 			# Parent process records the child's PID and returns
+			# Store the reservation ID so REAPER can clean up the reservation when it dies
 			$child_count++;
-			$child_pids{$pid} = 1;
+			$child_pids{$pid} = $reservation_id;
 			notify($ERRORS{'OK'}, $LOGFILE, "current number of forked kids: $child_count");
 			return;
 		}
@@ -621,23 +628,78 @@
 
 #/////////////////////////////////////////////////////////////////////////////
 
+=head2 warning_handler
+
+ Parameters  : None
+ Returns     : Nothing
+ Description : Handles __WARN__ signals. This signal is generated when warn()
+               is called. This may occur when the VCL code encounters an
+					error such as:
+					Use of uninitialized value in concatenation (.) or string at
+					
+					If the signal isn't handled, the warning message is dumped
+					to STDOUT and will appear in the log file. This handler
+					causes WARN signals to be logged by the notify() subroutine.
+
+=cut
+
+sub warning_handler {
+	# Call notify, passing it a string of whatever is contained in @_
+	notify($ERRORS{'WARNING'}, $LOGFILE, "@_");
+	
+	# Reinstall the signal handler in case of unreliable signals
+	$SIG{__WARN__} = \&warning_handler;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
+=head2 die_handler
+
+ Parameters  : None
+ Returns     : Nothing
+ Description : Handles __DIE__ signals. This signal is generated when die()
+               is called. This may occur when the VCL code encounters an
+					error such as:
+					Uncaught exception from user code:
+               Undefined subroutine ... called at ...
+					
+					If the signal isn't handled, the output is dumped to STDERR
+					and the process exits quietly.
+
+
+=cut
+
+sub die_handler {
+	# Call notify, passing it a string of whatever is contained in @_
+	notify($ERRORS{'CRITICAL'}, $LOGFILE, "@_");
+	
+	# Reinstall the signal handler in case of unreliable signals
+	$SIG{__DIE__} = \&warning_handler;
+	
+	exit;
+}
+
+#/////////////////////////////////////////////////////////////////////////////
+
 =head2 HUNTSMAN
 
- Parameters  : 
- Returns     : 
- Description :
+ Parameters  : None
+ Returns     : Nothing, process exits
+ Description : Signal handler for:
+               $SIG{INT}
+               $SIG{QUIT}
+               $SIG{HUP}
+               $SIG{TERM}
 
 =cut
 
-sub HUNTSMAN {   
-	# Temporarily override the the SIGCHLD signal handler
-	# Set SIGCHLD handler to IGNORE, meaning nothing happens when a child process exits
-	local ($SIG{CHLD}) = 'IGNORE';
+sub HUNTSMAN {
+	my $signal = shift;
 	
-	# Send SIGINT to child processes
-	kill 'INT' => keys %child_pids;
+	local ($SIG{CHLD}) = 'IGNORE';
 	
-	notify($ERRORS{'OK'}, $LOGFILE, "vcld process exiting, pid=$$");
+	# Display a message and exit
+	notify($ERRORS{'DEBUG'}, 0, "HUNTSMAN called: signal: $signal, pid: $PID, process exiting");
 	exit;
 }
 
@@ -661,6 +723,11 @@
 =cut
 
 sub REAPER {
+	my $signal = shift;
+	
+	# Don't overwrite current error
+	local $!;
+
 	# Save the information saved in $? before proceeding
 	# This is done to save the exit status of the child process which died
 	# If you don't save it, wait() will overwrite it
@@ -668,30 +735,39 @@
 	my $child_exit_status = $? >> 8;
 	my $signal_number = $? & 127;
 	my $dumped_core = $? & 128;
-	#notify($ERRORS{'DEBUG'}, 0, "\$?: $?, signal: $signal_number, dumped core: $dumped_core, child exit status: $child_exit_status");
-	
-	# Configure the REAPER() subroutine to handle SIGCHLD signals
-	$SIG{CHLD} = \&REAPER;
-	
-	# Wait for a child process to terminate
-	# Should have already happened since this subroutine is only called when CHLD signals are sent
-	my $dead_pid = wait;
+	#notify($ERRORS{'DEBUG'}, 0, "REAPER called: signal: $signal, initial value of \$?: $status_save");
 	
-	# Check if the child PID hash contains the pid of the process which just died
-	if (exists $child_pids{$dead_pid}) {
-		# Child which died was a VCL state process since its pid is in the hash
-		$child_count--;
-		delete $child_pids{$dead_pid};
-		notify($ERRORS{'OK'}, $LOGFILE, "VCL state process exited, pid=$dead_pid");
-	}
-	else {
-		# Child which died was some other process
-		#notify($ERRORS{'DEBUG'}, $LOGFILE, "child process exited, pid=$dead_pid");
+	# Wait for a child processes to die
+	my $dead_pid = -1;
+	my $wait_pid;
+	while (($wait_pid = waitpid(-1, WNOHANG)) > 0) {
+		$status_save = $?;
+		$child_exit_status = $? >> 8;
+		$signal_number = $? & 127;
+		$dumped_core = $? & 128;
+		$dead_pid = $wait_pid;
+		
+		# Assemble a string containing the dead process info
+		notify($ERRORS{'DEBUG'}, 0, "process reaped: pid: $dead_pid, \$?: $status_save, exit status: $child_exit_status");
+		
+		# Check if the child PID hash contains the pid of the process which just died
+		if (exists $child_pids{$dead_pid}) {
+			my $dead_reservation_id = $child_pids{$dead_pid};
+			notify($ERRORS{'DEBUG'}, 0, "VCL process exited for reservation $dead_reservation_id");
+			
+			# Child which died was a VCL state process since its pid is in the hash
+			$child_count--;
+			delete $child_pids{$dead_pid};
+		}
+		
+		# Reinstall the signal handler in case of unreliable signals
+		$SIG{CHLD} = \&REAPER;
 	}
 	
 	# Set the special $? variable back to the exit status of the child which died
 	# This is useful when utilities such as SSH are run in other places in the code
 	# The code which called the utility can check the exit status to see if it was successful
+	notify($ERRORS{'DEBUG'}, 0, "setting \$? to $status_save, exit status: $child_exit_status");
 	$? = $status_save;
 	
 	return;