You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Dean Gaudet <dg...@arctic.org> on 1997/09/11 20:46:20 UTC

[PATCH] reliable piped logs (take 2)

This is essentially the same as the last patch except I have removed
the cross-platform issues by protecting the register_other_child
API with NO_OTHER_CHILD, and protecting reliable piped logs with
NO_RELIABLE_PIPED_LOGS.

Systems without waitpid have to define NO_OTHER_CHILD.  WIN32 and EMX
have to define NO_RELIABLE_PIPED_LOGS.

Dean

Index: htdocs/manual/new_features_1_3.html
===================================================================
RCS file: /export/home/cvs/apachen/htdocs/manual/new_features_1_3.html,v
retrieving revision 1.20
diff -u -r1.20 new_features_1_3.html
--- new_features_1_3.html	1997/08/28 15:24:05	1.20
+++ new_features_1_3.html	1997/09/11 18:41:22
@@ -258,6 +258,17 @@
     identifier is available in the environment variable
     <code>UNIQUE_ID</code>.
 
+<li><strong>Reliable Piped Logs</strong><br>
+    On almost all Unix architectures Apache now implements "reliable"
+    piped logs in <a href="mod/mod_log_config.html">mod_log_config</a>.
+    Where reliable means that if the logging child dies for whatever
+    reason, Apache will recover and respawn it without having to restart
+    the entire server.  Furthermore if the logging child becomes "stuck"
+    and isn't reading its pipe frequently enough Apache will also restart
+    it.  This opens up more opportunities for log rotation, hit filtering,
+    real-time splitting of multiple vhosts into separate logs, and
+    asynchronous DNS resolving on the fly.
+
 </ul>
 
 <!--#include virtual="footer.html" -->
Index: src/CHANGES
===================================================================
RCS file: /export/home/cvs/apachen/src/CHANGES,v
retrieving revision 1.432
diff -u -r1.432 CHANGES
--- CHANGES	1997/09/10 17:43:17	1.432
+++ CHANGES	1997/09/11 18:41:26
@@ -1,5 +1,15 @@
 Changes with Apache 1.3b1
 
+  *) API: New register_other_child() API (see http_main.h) which allows
+     modules to register children with the parent for maintenance.  It
+     is disabled by defining NO_OTHER_CHILD.  [Dean Gaudet]
+
+  *) API: New piped_log API (see http_log.h) which implements piped logs,
+     and will use register_other_child to implement reliable piped logs
+     when it is available.  The reliable piped logs part can be disabled
+     by defining NO_RELIABLE_PIPED_LOGS.  At the moment reliable piped
+     logs is only available on Unix. [Dean Gaudet]
+
   *) API: New function child_terminate() triggers the child process to
      exit, while allowing the child finish what it needs to for the
      current request first.  
Index: src/PORTING
===================================================================
RCS file: /export/home/cvs/apachen/src/PORTING,v
retrieving revision 1.11
diff -u -r1.11 PORTING
--- PORTING	1997/09/01 21:47:46	1.11
+++ PORTING	1997/09/11 18:41:26
@@ -256,6 +256,16 @@
        an int *len on some architectures and a size_t *len on others.
        If left undefined apache will default it to int.
 
+      NO_OTHER_CHILD:
+       Do not implement the register_other_child API, usually because
+       certain system calls aren't available.
+
+      NO_RELIABLE_PIPED_LOGS:
+       Do not use reliable piped logs, which happen to also require
+       the register_other_child API.  The reliable piped log code
+       requires another child spawning interface which hasn't been
+       generalised yet.
+
 -----------
 Conclusion:
 -----------
Index: src/main/conf.h
===================================================================
RCS file: /export/home/cvs/apachen/src/main/conf.h,v
retrieving revision 1.132
diff -u -r1.132 conf.h
--- conf.h	1997/09/09 22:33:55	1.132
+++ conf.h	1997/09/11 18:41:26
@@ -442,6 +442,7 @@
 #undef NO_KILLPG
 #define NO_SETSID
 #define NEED_WAITPID
+#define NO_OTHER_CHILD
 #define STDIN_FILENO 0
 #define STDOUT_FILENO 1
 #define STDERR_FILENO 2
@@ -537,6 +538,7 @@
 #include <sys/time.h>     
 #define MAXSOCKETS 4096
 #define HAVE_MMAP
+#define NO_RELIABLE_PIPED_LOGS
     
 #elif defined(__MACHTEN__)
 typedef int rlim_t;
@@ -591,6 +593,7 @@
 #define NO_SETSID
 #define NO_USE_SIGACTION
 #define NEED_WAITPID
+#define NO_OTHER_CHILD
 #define HAVE_SYSLOG
 #include <sys/time.h>
 #include <stdlib.h>
@@ -646,6 +649,7 @@
 #define HAVE_MMAP
 #define MULTITHREAD
 #define HAVE_CANONICAL_FILENAME
+#define NO_RELIABLE_PIPED_LOGS
 typedef int uid_t;
 typedef int gid_t;
 typedef int pid_t;
@@ -897,6 +901,10 @@
 /* so that we can use inline on some critical functions */
 #if !defined(__GNUC__)
 #define inline
+#endif
+
+#ifdef NO_OTHER_CHILD
+#define NO_RELIABLE_PIPED_LOGS
 #endif
 
 /* Finding offsets of elements within structures.
Index: src/main/http_config.h
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_config.h,v
retrieving revision 1.49
diff -u -r1.49 http_config.h
--- http_config.h	1997/09/10 17:43:21	1.49
+++ http_config.h	1997/09/11 18:41:27
@@ -246,7 +246,7 @@
  * handle it back-compatibly, or at least signal an error).
  */
 
-#define MODULE_MAGIC_NUMBER 19970909
+#define MODULE_MAGIC_NUMBER 19970911
 #define STANDARD_MODULE_STUFF MODULE_MAGIC_NUMBER, -1, __FILE__, NULL
 
 /* Generic accessors for other modules to get at their own module-specific
Index: src/main/http_log.c
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_log.c,v
retrieving revision 1.32
diff -u -r1.32 http_log.c
--- http_log.c	1997/09/10 02:35:37	1.32
+++ http_log.c	1997/09/11 18:41:27
@@ -63,6 +63,7 @@
 #include "http_config.h"
 #include "http_core.h"
 #include "http_log.h"
+#include "http_main.h"
 
 #include <stdarg.h>
 
@@ -352,3 +353,200 @@
     exit(1);
 #endif
 }
+
+/* piped log support */
+
+#ifndef NO_RELIABLE_PIPED_LOGS
+/* forward declaration */
+static void piped_log_maintenance (int reason, void *data, int status);
+
+static int piped_log_spawn (piped_log *pl)
+{
+    int pid;
+
+    block_alarms();
+    pid = fork();
+    if (pid == 0) {
+	/* XXX: this needs porting to OS2 and WIN32 */
+	/* XXX: need to check what open fds the logger is actually passed,
+	 * XXX: and CGIs for that matter ... cleanup_for_exec *should*
+	 * XXX: close all the relevant stuff, but hey, it could be broken. */
+	/* we're now in the child */
+	close (STDIN_FILENO);
+	dup2 (pl->fds[0], STDIN_FILENO);
+
+	cleanup_for_exec ();
+	signal (SIGCHLD, SIG_DFL);	/* for HPUX */
+	signal (SIGHUP, SIG_IGN);
+	execl (SHELL_PATH, SHELL_PATH, "-c", pl->program, NULL);
+	fprintf (stderr,
+	    "piped_log_spawn: unable to exec %s -c '%s': %s\n",
+	    SHELL_PATH, pl->program, strerror (errno));
+	exit (1);
+    }
+    if (pid == -1) {
+	fprintf (stderr,
+	    "piped_log_spawn: unable to fork(): %s\n", strerror (errno));
+	unblock_alarms ();
+	return -1;
+    }
+    unblock_alarms();
+    pl->pid = pid;
+    register_other_child (pid, piped_log_maintenance, pl, pl->fds[1]);
+    return 0;
+}
+
+
+static void piped_log_maintenance (int reason, void *data, int status)
+{
+    piped_log *pl = data;
+    int pid;
+
+    switch (reason) {
+    case OC_REASON_DEATH:
+    case OC_REASON_LOST:
+	pl->pid = -1;
+	unregister_other_child (pl);
+	if (pl->program == NULL) {
+	    /* during a restart */
+	    break;
+	}
+	if (piped_log_spawn (pl) == -1) {
+	    /* what can we do?  This could be the error log we're having
+	     * problems opening up... */
+	    fprintf (stderr,
+		"piped_log_maintenance: unable to respawn '%s': %s\n",
+		pl->program, strerror (errno));
+	}
+	break;
+    
+    case OC_REASON_UNWRITABLE:
+	if (pl->pid != -1) {
+	    kill (pl->pid, SIGTERM);
+	}
+	break;
+    
+    case OC_REASON_RESTART:
+	pl->program = NULL;
+	if (pl->pid != -1) {
+	    kill (pl->pid, SIGTERM);
+	}
+	break;
+
+    case OC_REASON_UNREGISTER:
+	break;
+    }
+}
+
+
+static void piped_log_cleanup (void *data)
+{
+    piped_log *pl = data;
+
+    if (pl->pid != -1) {
+	kill (pl->pid, SIGTERM);
+    }
+    unregister_other_child (pl);
+    close (pl->fds[0]);
+    close (pl->fds[1]);
+}
+
+
+static void piped_log_cleanup_for_exec (void *data)
+{
+    piped_log *pl = data;
+
+    close (pl->fds[0]);
+    close (pl->fds[1]);
+}
+
+
+API_EXPORT(piped_log *) open_piped_log (pool *p, const char *program)
+{
+    piped_log *pl;
+    int pid;
+
+    pl = palloc (p, sizeof (*pl));
+    pl->p = p;
+    pl->program = pstrdup (p, program);
+    pl->pid = -1;
+    block_alarms ();
+    if (pipe (pl->fds) == -1) {
+	int save_errno = errno;
+	unblock_alarms();
+	errno = save_errno;
+	return NULL;
+    }
+    register_cleanup (p, pl, piped_log_cleanup, piped_log_cleanup_for_exec);
+    if (piped_log_spawn (pl) == -1) {
+	int save_errno = errno;
+	kill_cleanup (p, pl, piped_log_cleanup);
+	close (pl->fds[0]);
+	close (pl->fds[1]);
+	unblock_alarms ();
+	errno = save_errno;
+	return NULL;
+    }
+    unblock_alarms ();
+    return pl;
+}
+
+API_EXPORT(void) close_piped_log (piped_log *pl)
+{
+    block_alarms ();
+    piped_log_cleanup (pl);
+    kill_cleanup (pl->p, pl, piped_log_cleanup);
+    unblock_alarms ();
+}
+
+#else
+static int piped_log_child (void *cmd)
+{
+    /* Child process code for 'TransferLog "|..."';
+     * may want a common framework for this, since I expect it will
+     * be common for other foo-loggers to want this sort of thing...
+     */
+    int child_pid = 1;
+
+    cleanup_for_exec();
+#ifdef SIGHUP
+    signal (SIGHUP, SIG_IGN);
+#endif
+#if defined(WIN32)
+    child_pid = spawnl (_P_NOWAIT, SHELL_PATH, SHELL_PATH, "/c", (char *)cmd, NULL);
+    return(child_pid);
+#elif defined(__EMX__)
+    /* For OS/2 we need to use a '/' */
+    execl (SHELL_PATH, SHELL_PATH, "/c", (char *)cmd, NULL);
+#else
+    execl (SHELL_PATH, SHELL_PATH, "-c", (char *)cmd, NULL);
+#endif
+    perror ("exec");
+    fprintf (stderr, "Exec of shell for logging failed!!!\n");
+    return(child_pid);
+}
+
+
+API_EXPORT(piped_log *) open_piped_log (pool *p, const char *program)
+{
+    piped_log *pl;
+    FILE *dummy;
+    
+    if (!spawn_child (p, piped_log_child, (void *)program,
+		kill_after_timeout, &dummy, NULL)) {
+	perror ("spawn_child");
+	fprintf (stderr, "Couldn't fork child for piped log process\n");
+	exit (1);
+    }
+    pl = palloc (p, sizeof (*pl));
+    pl->p = p;
+    pl->write_f = dummy;
+    return pl;
+}
+
+
+API_EXPORT(void) close_piped_log (piped_log *pl)
+{
+    pfclose (pl->p, pl->write_f);
+}
+#endif
Index: src/main/http_log.h
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_log.h,v
retrieving revision 1.12
diff -u -r1.12 http_log.h
--- http_log.h	1997/08/27 05:45:37	1.12
+++ http_log.h	1997/09/11 18:41:27
@@ -79,3 +79,23 @@
 API_EXPORT(void) log_reason(const char *reason, const char *fname,
 			    request_rec *r);
 
+typedef struct piped_log {
+    pool *p;
+#ifndef NO_RELIABLE_PIPED_LOGS
+    char *program;
+    int pid;
+    int fds[2];
+#else
+    FILE *write_f;
+#endif
+} piped_log;
+
+API_EXPORT(piped_log *) open_piped_log (pool *p, const char *program);
+API_EXPORT(void) close_piped_log (piped_log *);
+#ifndef NO_RELIABLE_PIPED_LOGS
+#define piped_log_read_fd(pl)	((pl)->fds[0])
+#define piped_log_write_fd(pl)	((pl)->fds[1])
+#else
+#define piped_log_read_fd(pl)	(-1)
+#define piped_log_write_fd(pl)	(fileno((pl)->write_f))
+#endif
Index: src/main/http_main.c
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_main.c,v
retrieving revision 1.216
diff -u -r1.216 http_main.c
--- http_main.c	1997/09/10 17:43:22	1.216
+++ http_main.c	1997/09/11 18:41:30
@@ -234,6 +234,19 @@
 
 int one_process = 0;
 
+#ifndef NO_OTHER_CHILD
+/* used to maintain list of children which aren't part of the scoreboard */
+typedef struct other_child_rec other_child_rec;
+struct other_child_rec {
+    other_child_rec *next;
+    int pid;
+    void (*maintenance)(int, void *, int);
+    void *data;
+    int write_fd;
+};
+static other_child_rec *other_children;
+#endif
+
 pool *pconf;			/* Pool for config stuff */
 pool *ptrans;			/* Pool for per-transaction stuff */
 
@@ -1043,6 +1056,107 @@
 #endif /* ndef NO_LINGCLOSE */
 
 /*****************************************************************
+ * dealing with other children
+ */
+
+#ifndef NO_OTHER_CHILD
+void register_other_child (int pid,
+    void (*maintenance)(int reason, void *, int status),
+    void *data, int write_fd)
+{
+    other_child_rec *ocr;
+
+    ocr = palloc (pconf, sizeof (*ocr));
+    ocr->pid = pid;
+    ocr->maintenance = maintenance;
+    ocr->data = data;
+    ocr->write_fd = write_fd;
+    ocr->next = other_children;
+    other_children = ocr;
+}
+
+/* note that since this can be called by a maintenance function while we're
+ * scanning the other_children list, all scanners should protect themself
+ * by loading ocr->next before calling any maintenance function.
+ */
+void unregister_other_child (void *data)
+{
+    other_child_rec **pocr, *nocr;
+
+    for (pocr = &other_children; *pocr; pocr = &(*pocr)->next) {
+	if ((*pocr)->data == data) {
+	    nocr = (*pocr)->next;
+	    (*(*pocr)->maintenance)(OC_REASON_UNREGISTER, (*pocr)->data, -1);
+	    *pocr = nocr;
+	    /* XXX: um, well we've just wasted some space in pconf ? */
+	    return;
+	}
+    }
+}
+
+/* test to ensure that the write_fds are all still writable, otherwise
+ * invoke the maintenance functions as appropriate */
+static void probe_writable_fds (void)
+{
+    fd_set writable_fds;
+    int fd_max;
+    other_child_rec *ocr, *nocr;
+    struct timeval tv;
+    int rc;
+
+    if (other_children == NULL) return;
+
+    fd_max = 0;
+    FD_ZERO (&writable_fds);
+    for (ocr = other_children; ocr; ocr = ocr->next) {
+	if (ocr->write_fd == -1) continue;
+	FD_SET (ocr->write_fd, &writable_fds);
+	if (ocr->write_fd > fd_max) {
+	    fd_max = ocr->write_fd;
+	}
+    }
+    if (fd_max == 0) return;
+
+    do {
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	rc = ap_select (fd_max + 1, NULL, &writable_fds, NULL, &tv);
+    } while (rc == -1 && errno == EINTR);
+
+    if (rc == -1) {
+	/* XXX: uhh this could be really bad, we could have a bad file
+	 * descriptor due to a bug in one of the maintenance routines */
+	log_unixerr ("probe_writable_fds", "select", "could not probe
+	    writable fds", server_conf);
+	return;
+    }
+    if (rc == 0) return;
+
+    for (ocr = other_children; ocr; ocr = nocr) {
+	nocr = ocr->next;
+	if (ocr->write_fd == -1) continue;
+	if (FD_ISSET (ocr->write_fd, &writable_fds)) continue;
+	(*ocr->maintenance)(OC_REASON_UNWRITABLE, ocr->data, -1);
+    }
+}
+
+/* possibly reap an other_child, return 0 if yes, -1 if not */
+static int reap_other_child (int pid, int status)
+{
+    other_child_rec *ocr, *nocr;
+
+    for (ocr = other_children; ocr; ocr = nocr) {
+	nocr = ocr->next;
+	if (ocr->pid != pid) continue;
+	ocr->pid = -1;
+	(*ocr->maintenance)(OC_REASON_DEATH, ocr->data, status);
+	return 0;
+    }
+    return -1;
+}
+#endif
+
+/*****************************************************************
  *
  * Dealing with the scoreboard... a lot of these variables are global
  * only to avoid getting clobbered by the longjmp() that happens when
@@ -1599,74 +1713,104 @@
 {
 #ifndef MULTITHREAD
     int i, status;
+    long int waittime = 4096; /* in usecs */
+    struct timeval tv;
+    int waitret, tries;
+    int not_dead_yet;
+#ifndef NO_OTHER_CHILD
+    other_child_rec *ocr, *nocr;
+#endif
 
     sync_scoreboard_image();
-    for (i = 0; i < max_daemons_limit; ++i) {
-	int pid = scoreboard_image->servers[i].x.pid;
 
-	if (pid != my_pid && pid != 0) { 
-	    int waitret = 0,
-		tries = start_tries;
-
-	    while (waitret == 0 && tries <= 4) {
-		long int waittime = 4096; /* in usecs */
-		struct timeval tv;
-	    
-		/* don't want to hold up progress any more than 
-		 * necessary, so keep checking to see if the child
-		 * has exited with an exponential backoff.
-		 * Currently set for a maximum wait of a bit over
-		 * four seconds.
-		 */
-		while (((waitret = waitpid(pid, &status, WNOHANG)) == 0) &&
-			 waittime < 3000000) {
-		       tv.tv_sec = waittime / 1000000;
-		       tv.tv_usec = waittime % 1000000;
-		       waittime = waittime * 2;
-		       ap_select(0, NULL, NULL, NULL, &tv);
-		}
-		if (waitret == 0) {
-		    switch (tries) {
-		    case 1:
-			/* perhaps it missed the SIGHUP, lets try again */
-			aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
-				    "child process %d did not exit, sending another SIGHUP",
-				    pid);
-			kill(pid, SIGHUP);
-			break;
-		    case 2:
-			/* ok, now it's being annoying */
-			aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
-				    "child process %d still did not exit, sending a SIGTERM",
-				    pid);
-			kill(pid, SIGTERM);
-			break;
-		    case 3:
-			/* die child scum */
-			aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
-				    "child process %d still did not exit, sending a SIGKILL",
-				    pid);
-			kill(pid, SIGKILL);
-			break;
-		    case 4:
-			/* gave it our best shot, but alas...  If this really 
-			 * is a child we are trying to kill and it really hasn't
-			 * exited, we will likely fail to bind to the port
-			 * after the restart.
-			 */
-			aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
-				    "could not make child process %d exit, attempting to continue anyway",
-				    pid);
-			break;
-		    }
-		}
-		tries++;
+    tries = 0;
+    for(tries = start_tries; tries < 4; ++tries) {
+	/* don't want to hold up progress any more than 
+	* necessary, but we need to allow children a few moments to exit.
+	* delay with an exponential backoff.
+	* Currently set for a maximum wait of a bit over
+	* four seconds.
+	*/
+	tv.tv_sec = waittime / 1000000;
+	tv.tv_usec = waittime % 1000000;
+	waittime = waittime * 2;
+	ap_select(0, NULL, NULL, NULL, &tv);
+
+	/* now see who is done */
+	not_dead_yet = 0;
+	for (i = 0; i < max_daemons_limit; ++i) {
+	    int pid = scoreboard_image->servers[i].x.pid;
+
+	    if (pid == my_pid || pid == 0) continue;
+
+	    waitret = waitpid (pid, &status, WNOHANG);
+	    if (waitret == pid || waitret == -1) {
+		scoreboard_image->servers[i].x.pid = 0;
+		continue;
+	    }
+	    ++not_dead_yet;
+	    switch (tries) {
+	    case 1:
+		/* perhaps it missed the SIGHUP, lets try again */
+		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
+		    "child process %d did not exit, sending another SIGHUP",
+		    pid);
+		kill(pid, SIGHUP);
+		break;
+	    case 2:
+		/* ok, now it's being annoying */
+		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
+		    "child process %d still did not exit, sending a SIGTERM",
+		    pid);
+		kill(pid, SIGTERM);
+		break;
+	    case 3:
+		/* die child scum */
+		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
+		    "child process %d still did not exit, sending a SIGKILL",
+		    pid);
+		kill(pid, SIGKILL);
+		break;
+	    case 4:
+		/* gave it our best shot, but alas...  If this really 
+		    * is a child we are trying to kill and it really hasn't
+		    * exited, we will likely fail to bind to the port
+		    * after the restart.
+		    */
+		aplog_error(APLOG_MARK, APLOG_ERR, server_conf,
+		    "could not make child process %d exit, "
+		    "attempting to continue anyway", pid);
+		break;
+	    }
+	}
+#ifndef NO_OTHER_CHILD
+	for (ocr = other_children; ocr; ocr = nocr) {
+	    nocr = ocr->next;
+	    if (ocr->pid == -1) continue;
+
+	    waitret = waitpid (ocr->pid, &status, WNOHANG);
+	    if (waitret == ocr->pid) {
+		ocr->pid = -1;
+		(*ocr->maintenance)(OC_REASON_DEATH, ocr->data, status);
+	    } else if (waitret == 0) {
+		(*ocr->maintenance)(OC_REASON_RESTART, ocr->data, -1);
+		++not_dead_yet;
+	    } else if (waitret == -1) {
+		/* uh what the heck? they didn't call unregister? */
+		ocr->pid = -1;
+		(*ocr->maintenance)(OC_REASON_LOST, ocr->data, -1);
 	    }
 	}
+#endif
+	if (!not_dead_yet) {
+	    /* nothing left to wait for */
+	    break;
+	}
     }
 #endif /* ndef MULTITHREAD */
 }
 
+
 #if defined(BROKEN_WAIT) || defined(NEED_WAITPID)
 /*
 Some systems appear to fail to deliver dead children to wait() at times.
@@ -1681,9 +1825,9 @@
 
     for (n = 0; n < max_daemons_limit; ++n) {
 	if (scoreboard_image->servers[n].status != SERVER_DEAD
-		&& waitpid (scoreboard_image->servers[n].x.pid, &status, WNOHANG)
-		    == -1
-		&& errno == ECHILD) {
+	    && waitpid (scoreboard_image->servers[n].x.pid, &status, WNOHANG)
+		== -1
+	    && errno == ECHILD) {
 	    sync_scoreboard_image ();
 	    update_child_status (n, SERVER_DEAD, NULL);
 	    ret = 1;
@@ -1697,7 +1841,13 @@
  * a while...
  */
 
-static int wait_or_timeout (void)
+/* number of calls to wait_or_timeout between writable probes */
+#ifndef INTERVAL_OF_WRITABLE_PROBES
+#define INTERVAL_OF_WRITABLE_PROBES 10
+#endif
+static int wait_or_timeout_counter;
+
+static int wait_or_timeout (int *status)
 {
 #ifdef WIN32
 #define MAXWAITOBJ MAXIMUM_WAIT_OBJECTS
@@ -1734,21 +1884,22 @@
 
 #else /* WIN32 */
     struct timeval tv;
-#ifndef NEED_WAITPID
     int ret;
 
-    ret = waitpid (-1, NULL, WNOHANG);
+    ++wait_or_timeout_counter;
+    if (wait_or_timeout_counter == INTERVAL_OF_WRITABLE_PROBES) {
+	wait_or_timeout_counter = 0;
+#ifndef NO_OTHER_CHILD
+	probe_writable_fds();
+#endif
+    }
+    ret = waitpid (-1, status, WNOHANG);
     if (ret == -1 && errno == EINTR) {
 	return -1;
     }
     if (ret > 0) {
 	return ret;
     }
-#else
-    if (reap_children ()) {
-	return -1;
-    }
-#endif
     tv.tv_sec = SCOREBOARD_MAINTENANCE_INTERVAL / 1000000;
     tv.tv_usec = SCOREBOARD_MAINTENANCE_INTERVAL % 1000000;
     ap_select(0, NULL, NULL, NULL, &tv);
@@ -2003,6 +2154,10 @@
     return (suexec_enabled);
 }
 
+/*****************************************************************
+ * Connection structures and accounting...
+ */
+
 /* This hashing function is designed to get good distribution in the cases
  * where the server is handling entire "networks" of servers.  i.e. a
  * whack of /24s.  This is probably the most common configuration for
@@ -3177,7 +3332,8 @@
 
 	while (!restart_pending && !shutdown_pending) {
 	    int child_slot;
-	    int pid = wait_or_timeout();
+	    int status;
+	    int pid = wait_or_timeout (&status);
 
 	    /* XXX: if it takes longer than 1 second for all our children
 	     * to start up and get into IDLE state then we may spawn an
@@ -3201,6 +3357,10 @@
 			/* don't perform idle maintenance yet */
 			continue;
 		    }
+#ifndef NO_OTHER_CHILD
+		} else if (reap_other_child (pid, status) == 0) {
+		    /* handled */
+#endif
 		} else if (is_graceful) {
 		    /* Great, we've probably just lost a slot in the
 		     * scoreboard.  Somehow we don't know about this
Index: src/main/http_main.h
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_main.h,v
retrieving revision 1.17
diff -u -r1.17 http_main.h
--- http_main.h	1997/09/10 17:43:23	1.17
+++ http_main.h	1997/09/11 18:41:30
@@ -97,3 +97,43 @@
 void time_process_request (int child_num, int status);
 unsigned int set_callback_and_alarm(void (*fn)(int), int x);
 int check_alarm(void);
+
+#ifndef NO_OTHER_CHILD
+/*
+ * register an other_child -- a child which the main loop keeps track of
+ * and knows it is different than the rest of the scoreboard.
+ *
+ * pid is the pid of the child.
+ *
+ * maintenance is a function that is invoked with a reason, the data
+ * pointer passed here, and when appropriate a status result from waitpid().
+ *
+ * write_fd is an fd that is probed for writing by select() if it is ever
+ * unwritable, then maintenance is invoked with reason OC_REASON_UNWRITABLE.
+ * This is useful for log pipe children, to know when they've blocked.  To
+ * disable this feature, use -1 for write_fd.
+ */
+API_EXPORT(void) register_other_child (int pid,
+    void (*maintenance)(int reason, void *data, int status), void *data,
+    int write_fd);
+#define OC_REASON_DEATH		0	/* child has died, caller must call
+                                         * unregister still */
+#define OC_REASON_UNWRITABLE	1	/* write_fd is unwritable */
+#define OC_REASON_RESTART	2	/* a restart is occuring, perform
+					 * any necessary cleanup (including
+					 * sending a special signal to child)
+					 */
+#define OC_REASON_UNREGISTER	3	/* unregister has been called, do
+					 * whatever is necessary (including
+					 * kill the child) */
+#define OC_REASON_LOST		4	/* somehow the child exited without
+					 * us knowing ... buggy os? */
+
+/*
+ * unregister an other_child.  Note that the data pointer is used here, and
+ * is assumed to be unique per other_child.  This is because the pid and
+ * write_fd are possibly killed off separately.
+ */
+API_EXPORT(void) unregister_other_child (void *data);
+
+#endif
Index: src/modules/standard/mod_log_config.c
===================================================================
RCS file: /export/home/cvs/apachen/src/modules/standard/mod_log_config.c,v
retrieving revision 1.36
diff -u -r1.36 mod_log_config.c
--- mod_log_config.c	1997/08/23 02:59:45	1.36
+++ mod_log_config.c	1997/09/11 18:41:31
@@ -164,6 +164,7 @@
 #include "httpd.h"
 #include "http_config.h"
 #include "http_core.h" /* For REMOTE_NAME */
+#include "http_log.h"
 
 module MODULE_VAR_EXPORT config_log_module;
 
@@ -680,32 +681,6 @@
 { NULL }
 };
 
-static int config_log_child (void *cmd)
-{
-    /* Child process code for 'TransferLog "|..."';
-     * may want a common framework for this, since I expect it will
-     * be common for other foo-loggers to want this sort of thing...
-     */
-    int child_pid = 1;
-
-    cleanup_for_exec();
-#ifdef SIGHUP
-    signal (SIGHUP, SIG_IGN);
-#endif
-#if defined(WIN32)
-    child_pid = spawnl (_P_NOWAIT, SHELL_PATH, SHELL_PATH, "/c", (char *)cmd, NULL);
-    return(child_pid);
-#elif defined(__EMX__)
-    /* For OS/2 we need to use a '/' */
-    execl (SHELL_PATH, SHELL_PATH, "/c", (char *)cmd, NULL);
-#else
-    execl (SHELL_PATH, SHELL_PATH, "-c", (char *)cmd, NULL);
-#endif
-    perror ("exec");
-    fprintf (stderr, "Exec of shell for logging failed!!!\n");
-    return(child_pid);
-}
-
 static config_log_state *open_config_log (server_rec *s, pool *p,
 				   config_log_state *cls,
 				   array_header *default_format) {
@@ -716,16 +691,13 @@
     }
 
     if (*cls->fname == '|') {
-        FILE *dummy;
-        
-        if (!spawn_child (p, config_log_child, (void *)(cls->fname+1),
-                    kill_after_timeout, &dummy, NULL)) {
-	    perror ("spawn_child");
-            fprintf (stderr, "Couldn't fork child for TransferLog process\n");
-            exit (1);
-        }
+	piped_log *pl;
 
-        cls->log_fd = fileno (dummy);
+	pl = open_piped_log (p, cls->fname + 1);
+	if (pl == NULL) {
+	    exit (1);
+	}
+	cls->log_fd = piped_log_write_fd (pl);
     }
     else {
         char *fname = server_root_relative (p, cls->fname);