You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by tr...@apache.org on 2004/09/18 02:33:56 UTC
cvs commit: httpd-2.0/server mpm_common.c
trawick 2004/09/17 17:33:56
Modified: . CHANGES
server mpm_common.c
Log:
Unix MPMs: Shut down the server more quickly when child processes are
slow to exit.
Revision Changes Path
1.1589 +3 -0 httpd-2.0/CHANGES
Index: CHANGES
===================================================================
RCS file: /home/cvs/httpd-2.0/CHANGES,v
retrieving revision 1.1588
retrieving revision 1.1589
diff -u -r1.1588 -r1.1589
--- CHANGES 17 Sep 2004 15:39:29 -0000 1.1588
+++ CHANGES 18 Sep 2004 00:33:55 -0000 1.1589
@@ -2,6 +2,9 @@
[Remove entries to the current 2.0 section below, when backported]
+ *) Unix MPMs: Shut down the server more quickly when child processes are
+ slow to exit. [Joe Orton, Jeff Trawick]
+
*) mod_info: Added listing of the Request Hooks and added more build
information like 'httpd -V' contains. Changed output to XHTML.
[Paul Querna]
1.122 +60 -33 httpd-2.0/server/mpm_common.c
Index: mpm_common.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm_common.c,v
retrieving revision 1.121
retrieving revision 1.122
diff -u -r1.121 -r1.122
--- mpm_common.c 14 Aug 2004 10:49:43 -0000 1.121
+++ mpm_common.c 18 Sep 2004 00:33:56 -0000 1.122
@@ -61,22 +61,55 @@
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate)
{
- int i;
- long int waittime = 1024 * 16; /* in usecs */
+ apr_time_t waittime = 1024 * 16;
apr_status_t waitret;
- int tries;
+ int i;
int not_dead_yet;
int max_daemons;
+ apr_time_t starttime = apr_time_now();
+ /* this table of actions and elapsed times tells what action is taken
+ * at which elapsed time from starting the reclaim
+ */
+ struct {
+ enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action;
+ apr_time_t action_time;
+ } action_table[] = {
+ {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
+ * children but take no action against
+ * stragglers
+ */
+ {SEND_SIGTERM, apr_time_from_sec(3)},
+ {SEND_SIGTERM, apr_time_from_sec(5)},
+ {SEND_SIGTERM, apr_time_from_sec(7)},
+ {SEND_SIGKILL, apr_time_from_sec(9)},
+ {GIVEUP, apr_time_from_sec(10)}
+ };
+ int cur_action; /* index of action we decided to take this
+ * iteration
+ */
+ int next_action = 1; /* index of first real action */
ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
- for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
- /* don't want to hold up progress any more than
- * necessary, but we need to allow children a few moments to exit.
- * Set delay with an exponential backoff.
- */
+ do {
apr_sleep(waittime);
+ /* don't let waittime get longer than 1 second; otherwise, we don't
+ * react quickly to the last child exiting, and taking action can
+ * be delayed
+ */
waittime = waittime * 4;
+ if (waittime > apr_time_from_sec(1)) {
+ waittime = apr_time_from_sec(1);
+ }
+
+ /* see what action to take, if any */
+ if (action_table[next_action].action_time <= apr_time_now() - starttime) {
+ cur_action = next_action;
+ ++next_action;
+ }
+ else {
+ cur_action = 0; /* nothing to do */
+ }
/* now see who is done */
not_dead_yet = 0;
@@ -95,32 +128,28 @@
}
++not_dead_yet;
- switch (tries) {
- case 1: /* 16ms */
- case 2: /* 82ms */
- case 3: /* 344ms */
- case 4: /* 16ms */
+ switch(action_table[cur_action].action) {
+ case DO_NOTHING:
break;
-
- case 5: /* 82ms */
- case 6: /* 344ms */
- case 7: /* 1.4sec */
+
+ case SEND_SIGTERM:
/* ok, now it's being annoying */
ap_log_error(APLOG_MARK, APLOG_WARNING,
0, ap_server_conf,
- "child process %ld still did not exit, "
+ "child process %" APR_PID_T_FMT
+ " still did not exit, "
"sending a SIGTERM",
- (long)pid);
+ pid);
kill(pid, SIGTERM);
break;
-
- case 8: /* 6 sec */
- /* die child scum */
+
+ case SEND_SIGKILL:
ap_log_error(APLOG_MARK, APLOG_ERR,
0, ap_server_conf,
- "child process %ld still did not exit, "
+ "child process %" APR_PID_T_FMT
+ " still did not exit, "
"sending a SIGKILL",
- (long)pid);
+ pid);
#ifndef BEOS
kill(pid, SIGKILL);
#else
@@ -133,8 +162,8 @@
kill_thread(pid);
#endif
break;
-
- case 9: /* 14 sec */
+
+ case GIVEUP:
/* gave it our best shot, but alas... If this really
* is a child we are trying to kill and it really hasn't
* exited, we will likely fail to bind to the port
@@ -142,9 +171,10 @@
*/
ap_log_error(APLOG_MARK, APLOG_ERR,
0, ap_server_conf,
- "could not make child process %ld exit, "
+ "could not make child process %" APR_PID_T_FMT
+ " exit, "
"attempting to continue anyway",
- (long)pid);
+ pid);
break;
}
}
@@ -153,11 +183,8 @@
apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART);
#endif
- if (!not_dead_yet) {
- /* nothing left to wait for */
- break;
- }
- }
+ } while (not_dead_yet > 0 &&
+ action_table[cur_action].action != GIVEUP);
}
#endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */