You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by "Roy T. Fielding" <fi...@kiwi.ics.uci.edu> on 1997/10/05 08:17:33 UTC

Re: [PATCH] reclaim_child_processes (was Re: Proxy bug?)

>reclaim_child_processes() code is too sensitive probably because of how I
>rearranged it.  I think this below is far more reasonable a delay before
>declaring children too slow to exit.

Just changing the waittime will make it too slow.  It is an exponential
backoff, so the first few tries should not be considered an error at all.
The following will fix it.  Note that I also got pissed off at APLOG_*
in the process.  +1 on anything that improves the APLOG code so that
APLOG_NOTICE is always written to error_log.  I have a patch for that too,
but Dean and/or Ken's patch should be applied first.

....Roy

Index: http_main.c
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_main.c,v
retrieving revision 1.230
diff -u -r1.230 http_main.c
--- http_main.c	1997/10/05 02:06:37	1.230
+++ http_main.c	1997/10/05 06:11:06
@@ -719,7 +719,7 @@
     }
 
     if (!current_conn->keptalive)
-	aplog_error(APLOG_MARK, APLOG_DEBUG, current_conn->server, errstr);
+	aplog_error(APLOG_MARK, APLOG_WARNING, current_conn->server, errstr);
 
     if (timeout_req) {
 	/* Someone has asked for this transaction to just be aborted
@@ -1713,11 +1713,11 @@
     return -1;
 }
 
-static void reclaim_child_processes(int start_tries)
+static void reclaim_child_processes(int terminate)
 {
 #ifndef MULTITHREAD
     int i, status;
-    long int waittime = 4096;	/* in usecs */
+    long int waittime = 1024 * 16;	/* in usecs */
     struct timeval tv;
     int waitret, tries;
     int not_dead_yet;
@@ -1727,17 +1727,14 @@
 
     sync_scoreboard_image();
 
-    tries = 0;
-    for (tries = start_tries; tries < 4; ++tries) {
+    for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
 	/* don't want to hold up progress any more than 
 	 * necessary, but we need to allow children a few moments to exit.
-	 * delay with an exponential backoff.
-	 * Currently set for a maximum wait of a bit over
-	 * four seconds.
+	 * Set delay with an exponential backoff.
 	 */
 	tv.tv_sec = waittime / 1000000;
 	tv.tv_usec = waittime % 1000000;
-	waittime = waittime * 2;
+	waittime = waittime * 4;
 	ap_select(0, NULL, NULL, NULL, &tv);
 
 	/* now see who is done */
@@ -1755,28 +1752,36 @@
 	    }
 	    ++not_dead_yet;
 	    switch (tries) {
-	    case 1:
+	    case 1:     /*  16ms */
+	    case 2:     /*  82ms */
+		break;
+	    case 3:     /* 344ms */
 		/* perhaps it missed the SIGHUP, lets try again */
-		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
+		aplog_error(APLOG_MARK, APLOG_WARNING, server_conf,
 		    "child process %d did not exit, sending another SIGHUP",
 			    pid);
 		kill(pid, SIGHUP);
+		waittime = 1024 * 16;
+		break;
+	    case 4:     /*  16ms */
+	    case 5:     /*  82ms */
+	    case 6:     /* 344ms */
 		break;
-	    case 2:
+	    case 7:     /* 1.4sec */
 		/* ok, now it's being annoying */
-		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
+		aplog_error(APLOG_MARK, APLOG_WARNING, server_conf,
 		   "child process %d still did not exit, sending a SIGTERM",
 			    pid);
 		kill(pid, SIGTERM);
 		break;
-	    case 3:
+	    case 8:     /*  6 sec */
 		/* die child scum */
 		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
 		   "child process %d still did not exit, sending a SIGKILL",
 			    pid);
 		kill(pid, SIGKILL);
 		break;
-	    case 4:
+	    case 9:     /* 14 sec */
 		/* gave it our best shot, but alas...  If this really 
 		 * is a child we are trying to kill and it really hasn't
 		 * exited, we will likely fail to bind to the port
@@ -1952,7 +1957,7 @@
     ap_snprintf(emsg, sizeof(emsg),
 		"httpd: caught %s, attempting to dump core in %s",
 		s, coredump_dir);
-    aplog_error(APLOG_MARK, APLOG_INFO, server_conf, emsg);
+    aplog_error(APLOG_MARK, APLOG_NOTICE, server_conf, emsg);
     chdir(coredump_dir);
     abort();
     exit(1);
@@ -3423,12 +3428,11 @@
 	    hold_off_on_exponential_spawning = 10;
 	}
 
-	aplog_error(APLOG_MARK, APLOG_INFO, server_conf,
-		    "Apache HTTP Server version: %s", SERVER_VERSION);
+	aplog_error(APLOG_MARK, APLOG_NOTICE, server_conf,
+		    "%s configured -- resuming normal operations",
+		    SERVER_VERSION);
 	aplog_error(APLOG_MARK, APLOG_INFO, server_conf,
 		    "Server built: %s", SERVER_BUILT);
-	aplog_error(APLOG_MARK, APLOG_INFO, server_conf,
-		    "Server configured -- resuming normal operations");
 	restart_pending = shutdown_pending = 0;
 
 	while (!restart_pending && !shutdown_pending) {
@@ -3501,7 +3505,7 @@
 	    if (ap_killpg(pgrp, SIGTERM) < 0) {
 		aplog_error(APLOG_MARK, APLOG_WARNING, server_conf, "killpg SIGTERM");
 	    }
-	    reclaim_child_processes(2);		/* Start with SIGTERM */
+	    reclaim_child_processes(1);		/* Start with SIGTERM */
 	    aplog_error(APLOG_MARK, APLOG_NOTICE, server_conf,
 			"httpd: caught SIGTERM, shutting down");
 
@@ -3558,7 +3562,7 @@
 	    if (ap_killpg(pgrp, SIGHUP) < 0) {
 		aplog_error(APLOG_MARK, APLOG_WARNING, server_conf, "killpg SIGHUP");
 	    }
-	    reclaim_child_processes(1);		/* Not when just starting up */
+	    reclaim_child_processes(0);		/* Not when just starting up */
 	    aplog_error(APLOG_MARK, APLOG_NOTICE, server_conf,
 			"SIGHUP received.  Attempting to restart");
 	}