You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@httpd.apache.org by Dean Gaudet <dg...@arctic.org> on 1997/07/27 21:12:09 UTC

RFC: register_other_child

Ok this hasn't been tested ... and I haven't implemented any reliable logs
with it yet.  I'm just looking for comments on the api.  Skip down to the
http_main.h diff to see the interface.

The one addition that we haven't talked about before is the "writable fd" 
test.  I see the following modes of failure for a logging child: 

- it dies
- it stops reading

We talked about the first, but not the second.  The approach I use here is
to select() on the pipe to see if it's writable.  If it isn't, then I tell
the child's owner. 

Oh yeah, the below removes reap_children() because it is totally bogus. 
It's supposed to be used when NEED_WAITPID is defined, but it uses waitpid
itself.  This means that UTS21 and NEWSOS have been broken for some time. 

Dean

Index: http_main.c
===================================================================
RCS file: /export/home/cvs/apache/src/http_main.c,v
retrieving revision 1.187
diff -u -r1.187 http_main.c
--- http_main.c	1997/07/24 04:35:47	1.187
+++ http_main.c	1997/07/27 19:06:07
@@ -216,6 +216,20 @@
 
 int one_process = 0;
 
+/* used to maintain list of children which aren't part of the scoreboard */
+typedef struct other_child_rec other_child_rec;
+struct other_child_rec {
+    other_child_rec *next;
+    int pid;
+    void (*maintenance)(int, void *, int);
+    void *data;
+    int write_fd;
+};
+static other_child_rec *other_children;
+
+pool *pconf;			/* Pool for config stuff */
+pool *ptrans;			/* Pool for per-transaction stuff */
+
 /* small utility macros to make things easier to read */
 
 #ifdef WIN32
@@ -745,6 +759,105 @@
 #endif /* ndef NO_LINGCLOSE */
 
 /*****************************************************************
+ * dealing with other children
+ */
+
+void register_other_child (int pid,
+    void (*maintenance)(int reason, void *, int status),
+    void *data, int write_fd)
+{
+    other_child_rec *ocr;
+
+    ocr = palloc (pconf, sizeof (*ocr));
+    ocr->pid = pid;
+    ocr->maintenance = maintenance;
+    ocr->data = data;
+    ocr->write_fd = write_fd;
+    ocr->next = other_children;
+    other_children = ocr;
+}
+
+/* note that since this can be called by a maintenance function while we're
+ * scanning the other_children list, all scanners should protect themself
+ * by loading ocr->next before calling any maintenance function.
+ */
+void unregister_other_child (void *data)
+{
+    other_child_rec **pocr, *nocr;
+
+    for (pocr = &other_children; *pocr; pocr = &(*pocr)->next) {
+	if ((*pocr)->data == data) {
+	    nocr = (*pocr)->next;
+	    (*(*pocr)->maintenance)(OC_REASON_UNREGISTER, (*pocr)->data, -1);
+	    *pocr = nocr;
+	    /* XXX: um, well we've just wasted some space in pconf ? */
+	    return;
+	}
+    }
+}
+
+/* test to ensure that the write_fds are all still writable, otherwise
+ * invoke the maintenance functions as appropriate */
+static void probe_writable_fds (void)
+{
+    fd_set writable_fds;
+    int fd_max;
+    other_child_rec *ocr, *nocr;
+    struct timeval tv;
+    int rc;
+
+    if (other_children == NULL) return;
+
+    fd_max = 0;
+    FD_ZERO (&writable_fds);
+    for (ocr = other_children; ocr; ocr = ocr->next) {
+	if (ocr->write_fd == -1) continue;
+	FD_SET (ocr->write_fd, &writable_fds);
+	if (ocr->write_fd > fd_max) {
+	    fd_max = ocr->write_fd;
+	}
+    }
+    if (fd_max == 0) return;
+
+    do {
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	rc = ap_select (fd_max + 1, NULL, &writable_fds, NULL, &tv);
+    } while (rc == -1 && errno == EINTR);
+
+    if (rc == -1) {
+	/* XXX: uhh this could be really bad, we could have a bad file
+	 * descriptor due to a bug in one of the maintenance routines */
+	log_unixerr ("probe_writable_fds", "select", "could not probe
+	    writable fds", server_conf);
+	return;
+    }
+    if (rc == 0) return;
+
+    for (ocr = other_children; ocr; ocr = nocr) {
+	nocr = ocr->next;
+	if (ocr->write_fd == -1) continue;
+	if (FD_ISSET (ocr->write_fd, &writable_fds)) continue;
+	(*ocr->maintenance)(OC_REASON_UNWRITABLE, ocr->data, -1);
+    }
+}
+
+/* possibly reap an other_child, return 0 if yes, -1 if not */
+static int reap_other_child (int pid, int status)
+{
+    other_child_rec *ocr, *nocr;
+
+    for (ocr = other_children; ocr; ocr = nocr) {
+	nocr = ocr->next;
+	if (ocr->pid != pid) continue;
+	ocr->pid = -1;
+	(*ocr->maintenance)(OC_REASON_DEATH, ocr->data, status);
+	return 0;
+    }
+    return -1;
+}
+
+/*****************************************************************
  *
  * Dealing with the scoreboard... a lot of these variables are global
  * only to avoid getting clobbered by the longjmp() that happens when
@@ -1276,102 +1389,117 @@
     return -1;
 }
 
+
+/* reclaim child processes after a SIGHUP ... the scoreboard is going to
+ * be tossed away in this case */
 static void reclaim_child_processes (void)
 {
 #ifndef MULTITHREAD
     int i, status;
     int my_pid = getpid();
-
+    long int waittime = 4096; /* in usecs */
+    struct timeval tv;
+    int waitret, tries;
+    int not_dead_yet;
+    other_child_rec *ocr, *nocr;
+
+    /* totally ignore these signals while doing this */
+    signal (SIGUSR1, SIG_IGN);
+    signal (SIGHUP, SIG_IGN);
     sync_scoreboard_image();
-    for (i = 0; i < max_daemons_limit; ++i) {
-	int pid = scoreboard_image->servers[i].x.pid;
 
-	if (pid != my_pid && pid != 0) { 
-	    int waitret = 0,
-		tries = 1;
-
-	    while (waitret == 0 && tries <= 4) {
-		long int waittime = 4096; /* in usecs */
-		struct timeval tv;
-	    
-		/* don't want to hold up progress any more than 
-		 * necessary, so keep checking to see if the child
-		 * has exited with an exponential backoff.
-		 * Currently set for a maximum wait of a bit over
-		 * four seconds.
-		 */
-		while (((waitret = waitpid(pid, &status, WNOHANG)) == 0) &&
-			 waittime < 3000000) {
-		       tv.tv_sec = waittime / 1000000;
-		       tv.tv_usec = waittime % 1000000;
-		       waittime = waittime * 2;
-		       ap_select(0, NULL, NULL, NULL, &tv);
-		}
-		if (waitret == 0) {
-		    switch (tries) {
-		    case 1:
-			/* perhaps it missed the SIGHUP, lets try again */
-			log_printf(server_conf, "child process %d did not exit, sending another SIGHUP", pid);
-			kill(pid, SIGHUP);
-			break;
-		    case 2:
-			/* ok, now it's being annoying */
-			log_printf(server_conf, "child process %d still did not exit, sending a SIGTERM", pid);
-			kill(pid, SIGTERM);
-			break;
-		    case 3:
-			/* die child scum */
-			log_printf(server_conf, "child process %d still did not exit, sending a SIGKILL", pid);
-			kill(pid, SIGKILL);
-			break;
-		    case 4:
-			/* gave it our best shot, but alas...  If this really 
-			 * is a child we are trying to kill and it really hasn't
-			 * exited, we will likely fail to bind to the port
-			 * after the restart.
-			 */
-			log_printf(server_conf, "could not make child process %d exit, attempting to continue anyway", pid);
-			break;
-		    }
-		}
-		tries++;
+    tries = 0;
+    for(tries = 0; tries < 4; ++tries) {
+	/* don't want to hold up progress any more than 
+	* necessary, but we need to allow children a few moments to exit.
+	* delay with an exponential backoff.
+	* Currently set for a maximum wait of a bit over
+	* four seconds.
+	*/
+	tv.tv_sec = waittime / 1000000;
+	tv.tv_usec = waittime % 1000000;
+	waittime = waittime * 2;
+	ap_select(0, NULL, NULL, NULL, &tv);
+
+	/* now see who is done */
+	not_dead_yet = 0;
+	for (i = 0; i < max_daemons_limit; ++i) {
+	    int pid = scoreboard_image->servers[i].x.pid;
+
+	    if (pid == my_pid || pid == 0) continue;
+
+	    waitret = waitpid (pid, &status, WNOHANG);
+	    if (waitret == pid || waitret == -1) {
+		scoreboard_image->servers[i].x.pid = 0;
+		continue;
+	    }
+	    ++not_dead_yet;
+	    switch (tries) {
+	    case 1:
+		/* perhaps it missed the SIGHUP, lets try again */
+		log_printf(server_conf, "child process %d did not exit, "
+					"sending another SIGHUP", pid);
+		kill(pid, SIGHUP);
+		break;
+	    case 2:
+		/* ok, now it's being annoying */
+		log_printf(server_conf, "child process %d still did not exit, "
+					"sending a SIGTERM", pid);
+		kill(pid, SIGTERM);
+		break;
+	    case 3:
+		/* die child scum */
+		log_printf(server_conf, "child process %d still did not exit, "
+					"sending a SIGKILL", pid);
+		kill(pid, SIGKILL);
+		break;
+	    case 4:
+		/* gave it our best shot, but alas...  If this really 
+		    * is a child we are trying to kill and it really hasn't
+		    * exited, we will likely fail to bind to the port
+		    * after the restart.
+		    */
+		log_printf(server_conf, "could not make child process %d exit, "
+					"attempting to continue anyway", pid);
+		break;
 	    }
 	}
-    }
-#endif /* ndef MULTITHREAD */
-}
-
-#if defined(BROKEN_WAIT) || defined(NEED_WAITPID)
-/*
-Some systems appear to fail to deliver dead children to wait() at times.
-This sorts them out. In fact, this may have been caused by a race condition
-in wait_or_timeout(). But this routine is still useful for systems with no
-waitpid().
-*/
-int reap_children (void)
-{
-    int status, n;
-    int ret = 0;
-
-    for (n = 0; n < max_daemons_limit; ++n) {
-	if (scoreboard_image->servers[n].status != SERVER_DEAD
-		&& waitpid (scoreboard_image->servers[n].x.pid, &status, WNOHANG)
-		    == -1
-		&& errno == ECHILD) {
-	    sync_scoreboard_image ();
-	    update_child_status (n, SERVER_DEAD, NULL);
-	    ret = 1;
+	for (ocr = other_children; ocr; ocr = nocr) {
+	    nocr = ocr->next;
+	    if (ocr->pid == -1) continue;
+
+	    waitret = waitpid (ocr->pid, &status, WNOHANG);
+	    if (waitret == ocr->pid) {
+		ocr->pid = -1;
+		(*ocr->maintenance)(OC_REASON_DEATH, ocr->data, status);
+	    } else if (waitret == 0) {
+		(*ocr->maintenance)(OC_REASON_RESTART, ocr->data, -1);
+		++not_dead_yet;
+	    } else if (waitret == -1) {
+		/* uh what the heck? they didn't call unregister? */
+		ocr->pid = -1;
+		(*ocr->maintenance)(OC_REASON_LOST, ocr->data, -1);
+	    }
+	}
+	if (!not_dead_yet) {
+	    /* nothing left to wait for */
+	    break;
 	}
     }
-    return ret;
+#endif /* ndef MULTITHREAD */
 }
-#endif
 
 /* Finally, this routine is used by the caretaker process to wait for
  * a while...
  */
 
-static int wait_or_timeout (void)
+/* number of calls to wait_or_timeout between writable probes */
+#ifndef INTERVAL_OF_WRITABLE_PROBES
+#define INTERVAL_OF_WRITABLE_PROBES 10
+#endif
+static int wait_or_timeout_counter;
+
+static int wait_or_timeout (int *status)
 {
 #ifdef WIN32
 #define MAXWAITOBJ MAXIMUM_WAIT_OBJECTS
@@ -1408,21 +1536,20 @@
 
 #else /* WIN32 */
     struct timeval tv;
-#ifndef NEED_WAITPID
     int ret;
 
-    ret = waitpid (-1, NULL, WNOHANG);
+    ++wait_or_timeout_counter;
+    if (wait_or_timeout_counter == INTERVAL_OF_WRITABLE_PROBES) {
+	wait_or_timeout_counter = 0;
+	probe_writable_fds();
+    }
+    ret = waitpid (-1, status, WNOHANG);
     if (ret == -1 && errno == EINTR) {
 	return -1;
     }
     if (ret > 0) {
 	return ret;
     }
-#else
-    if (reap_children ()) {
-	return -1;
-    }
-#endif
     tv.tv_sec = SCOREBOARD_MAINTENANCE_INTERVAL / 1000000;
     tv.tv_usec = SCOREBOARD_MAINTENANCE_INTERVAL % 1000000;
     ap_select(0, NULL, NULL, NULL, &tv);
@@ -1672,12 +1799,8 @@
 
 /*****************************************************************
  * Connection structures and accounting...
- * Should these be global?  Only to this file, at least...
  */
 
-pool *pconf;			/* Pool for config stuff */
-pool *ptrans;			/* Pool for per-transaction stuff */
-
 static server_rec *find_virtual_server (struct in_addr server_ip,
 				unsigned port, server_rec *server)
 {
@@ -2659,7 +2782,8 @@
 
 	while (!restart_pending) {
 	    int child_slot;
-	    int pid = wait_or_timeout ();
+	    int status;
+	    int pid = wait_or_timeout (&status);
 
 	    /* XXX: if it takes longer than 1 second for all our children
 	     * to start up and get into IDLE state then we may spawn an
@@ -2683,6 +2807,8 @@
 			/* don't perform idle maintenance yet */
 			continue;
 		    }
+		} else if (reap_other_child (pid, status) == 0) {
+		    /* handled */
 		} else if (is_graceful) {
 		    /* Great, we've probably just lost a slot in the
 		     * scoreboard.  Somehow we don't know about this
Index: http_main.h
===================================================================
RCS file: /export/home/cvs/apache/src/http_main.h,v
retrieving revision 1.16
diff -u -r1.16 http_main.h
--- http_main.h	1997/07/21 05:53:46	1.16
+++ http_main.h	1997/07/27 19:06:07
@@ -96,3 +96,40 @@
 void time_process_request (int child_num, int status);
 unsigned int set_callback_and_alarm(void (*fn)(int), int x);
 int check_alarm(void);
+
+/*
+ * register an other_child -- a child which the main loop keeps track of
+ * and knows it is different than the rest of the scoreboard.
+ *
+ * pid is the pid of the child.
+ *
+ * maintenance is a function that is invoked with a reason, the data
+ * pointer passed here, and when appropriate a status result from waitpid().
+ *
+ * write_fd is an fd that is probed for writing by select() if it is ever
+ * unwritable, then maintenance is invoked with reason OC_REASON_UNWRITABLE.
+ * This is useful for log pipe children, to know when they've blocked.  To
+ * disable this feature, use -1 for write_fd.
+ */
+API_EXPORT(void) register_other_child (int pid,
+    void (*maintenance)(int reason, void *data, int status), void *data,
+    int write_fd);
+#define OC_REASON_DEATH		0	/* child has died, caller must call
+                                         * unregister still */
+#define OC_REASON_UNWRITABLE	1	/* write_fd is unwritable */
+#define OC_REASON_RESTART	2	/* a restart is occuring, perform
+					 * any necessary cleanup (including
+					 * sending a special signal to child)
+					 */
+#define OC_REASON_UNREGISTER	3	/* unregister has been called, do
+					 * whatever is necessary (including
+					 * kill the child) */
+#define OC_REASON_LOST		4	/* somehow the child exited without
+					 * us knowing ... buggy os? */
+
+/*
+ * unregister an other_child.  Note that the data pointer is used here, and
+ * is assumed to be unique per other_child.  This is because the pid and
+ * write_fd are possibly killed off separately.
+ */
+API_EXPORT(void) unregister_other_child (void *data);

Re: RFC: register_other_child

Posted by Dean Gaudet <dg...@arctic.org>.

Nothing uses BROKEN_WAIT any longer ... 

Yeah you're right I didn't notice the waitpid wrapper.  Ugh the waitpid
wrapper is blocking.  That won't do at all.  Not at all. 

Dean

On Sun, 27 Jul 1997, Ben Laurie wrote:

> Dean Gaudet wrote:
> > 
> > Ok this hasn't been tested ... and I haven't implemented any reliable logs
> > with it yet.  I'm just looking for comments on the api.  Skip down to the
> > http_main.h diff to see the interface.
> > 
> > The one addition that we haven't talked about before is the "writable fd"
> > test.  I see the following modes of failure for a logging child:
> > 
> > - it dies
> > - it stops reading
> > 
> > We talked about the first, but not the second.  The approach I use here is
> > to select() on the pipe to see if it's writable.  If it isn't, then I tell
> > the child's owner.
> > 
> > Oh yeah, the below removes reap_children() because it is totally bogus.
> > It's supposed to be used when NEED_WAITPID is defined, but it uses waitpid
> > itself.  This means that UTS21 and NEWSOS have been broken for some time.
> > 
> > Dean
> 
> Actually, it is supposed to be used when BROKEN_WAIT is defined. I don't
> know how "|| defined(NEED_WAITPID)" snuck in. Getting rid of it
> altogether would seem unwise, as wait() is sometimes, err, broken.
> 
> Besides, using waitpid() when NEED_WAITPID is defined doesn't seem like
> an error - presumably NEED_WAITPID causes a substitute waitpid() to be
> used? (I know; I should check. It's getting late and I'm hungry).
> 
> Cheers,
> 
> Ben.
> 
> -- 
> Ben Laurie                Phone: +44 (181) 994 6435  Email:
> ben@algroup.co.uk
> Freelance Consultant and  Fax:   +44 (181) 994 6472
> Technical Director        URL: http://www.algroup.co.uk/Apache-SSL
> A.L. Digital Ltd,         Apache Group member (http://www.apache.org)
> London, England.          Apache-SSL author
>

Re: RFC: register_other_child

Posted by Ben Laurie <be...@algroup.co.uk>.

Dean Gaudet wrote:
> 
> Ok this hasn't been tested ... and I haven't implemented any reliable logs
> with it yet.  I'm just looking for comments on the api.  Skip down to the
> http_main.h diff to see the interface.
> 
> The one addition that we haven't talked about before is the "writable fd"
> test.  I see the following modes of failure for a logging child:
> 
> - it dies
> - it stops reading
> 
> We talked about the first, but not the second.  The approach I use here is
> to select() on the pipe to see if it's writable.  If it isn't, then I tell
> the child's owner.
> 
> Oh yeah, the below removes reap_children() because it is totally bogus.
> It's supposed to be used when NEED_WAITPID is defined, but it uses waitpid
> itself.  This means that UTS21 and NEWSOS have been broken for some time.
> 
> Dean

Actually, it is supposed to be used when BROKEN_WAIT is defined. I don't
know how "|| defined(NEED_WAITPID)" snuck in. Getting rid of it
altogether would seem unwise, as wait() is sometimes, err, broken.

Besides, using waitpid() when NEED_WAITPID is defined doesn't seem like
an error - presumably NEED_WAITPID causes a substitute waitpid() to be
used? (I know; I should check. It's getting late and I'm hungry).

Cheers,

Ben.

-- 
Ben Laurie                Phone: +44 (181) 994 6435  Email:
ben@algroup.co.uk
Freelance Consultant and  Fax:   +44 (181) 994 6472
Technical Director        URL: http://www.algroup.co.uk/Apache-SSL
A.L. Digital Ltd,         Apache Group member (http://www.apache.org)
London, England.          Apache-SSL author