You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by re...@apache.org on 2001/04/06 22:12:11 UTC

cvs commit: httpd-2.0/server/mpm/prefork prefork.c

rederpj     01/04/06 13:12:11

  Modified:    include  ap_mpm.h scoreboard.h
               modules/generators mod_status.c
               server   mpm_common.c
               server/mpm/prefork prefork.c
  Log:
  Changes required to make prefork clean up idle children properly. There was a window during which a starting worker deadlocks when an idle cleanup arrives before it completes init. Apache then keeps trying to cleanup the same deadlocked worker forever (until higher pids come along, but it still will never reduce below the deadlocked pid). Thus the number of children would not reduce to the correct idle level.
  
  Revision  Changes    Path
  1.25      +1 -1      httpd-2.0/include/ap_mpm.h
  
  Index: ap_mpm.h
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/include/ap_mpm.h,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- ap_mpm.h	2001/03/02 22:46:31	1.24
  +++ ap_mpm.h	2001/04/06 20:11:58	1.25
  @@ -121,7 +121,7 @@
    * predicate indicating if a graceful stop has been requested ...
    * used by the connection loop 
    * @return 1 if a graceful stop has been requested, 0 otherwise
  - * @deffunc int ap_graceful_stop_signalled*void)
  + * @deffunc int ap_graceful_stop_signalled(*void)
    */
   AP_DECLARE(int) ap_graceful_stop_signalled(void);
   
  
  
  
  1.15      +7 -1      httpd-2.0/include/scoreboard.h
  
  Index: scoreboard.h
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/include/scoreboard.h,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -r1.14 -r1.15
  --- scoreboard.h	2001/02/24 03:40:48	1.14
  +++ scoreboard.h	2001/04/06 20:11:59	1.15
  @@ -100,7 +100,8 @@
   #define SERVER_GRACEFUL 8	/* server is gracefully finishing request */
   #define SERVER_ACCEPTING 9	/* thread is accepting connections */
   #define SERVER_QUEUEING	10      /* thread is putting connection on the queue */
  -#define SERVER_NUM_STATUS 11	/* number of status settings */
  +#define SERVER_IDLE_KILL 11     /* Server is cleaning up idle children. */
  +#define SERVER_NUM_STATUS 12	/* number of status settings */
   
   /* A "virtual time" is simply a counter that indicates that a child is
    * making progress.  The parent checks up on each child, and when they have
  @@ -148,6 +149,10 @@
       SB_NOT_SHARED = 2
   } ap_scoreboard_e;
   
  +#define SB_WORKING  0  /* The server is busy and the child is useful. */
  +#define SB_IDLE_DIE 1  /* The server is idle and the child is superfluous. */
  +                       /*   The child should check for this and exit gracefully. */
  +
   /* stuff which is thread/process specific */
   typedef struct {
   #ifdef OPTIMIZE_TIMEOUTS
  @@ -165,6 +170,7 @@
       unsigned long my_bytes_served;
       unsigned long conn_bytes;
       unsigned short conn_count;
  +    unsigned short life_status;    /* Either SB_WORKING or SB_IDLE_DIE */
       apr_time_t start_time;
       apr_time_t stop_time;
   #ifdef HAVE_TIMES
  
  
  
  1.34      +2 -0      httpd-2.0/modules/generators/mod_status.c
  
  Index: mod_status.c
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/modules/generators/mod_status.c,v
  retrieving revision 1.33
  retrieving revision 1.34
  diff -u -r1.33 -r1.34
  --- mod_status.c	2001/02/28 18:41:44	1.33
  +++ mod_status.c	2001/04/06 20:12:02	1.34
  @@ -466,6 +466,7 @@
   	ap_rputs("\"<B><code>D</code></B>\" DNS Lookup,<BR>\n", r);
   	ap_rputs("\"<B><code>L</code></B>\" Logging, \n", r);
   	ap_rputs("\"<B><code>G</code></B>\" Gracefully finishing, \n", r);
  +        ap_rputs("\"<B><code>I</code></B>\" Idle cleanup of worker, \n", r);
   	ap_rputs("\"<B><code>.</code></B>\" Open slot with no current process<P>\n", r);
   	ap_rputs("<P>\n", r);
   	if (!ap_extended_status) {
  @@ -764,6 +765,7 @@
       status_flags[SERVER_BUSY_LOG] = 'L';
       status_flags[SERVER_BUSY_DNS] = 'D';
       status_flags[SERVER_GRACEFUL] = 'G';
  +    status_flags[SERVER_IDLE_KILL] = 'I';
   }
   
   static void register_hooks(apr_pool_t *p)
  
  
  
  1.45      +1 -1      httpd-2.0/server/mpm_common.c
  
  Index: mpm_common.c
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/server/mpm_common.c,v
  retrieving revision 1.44
  retrieving revision 1.45
  diff -u -r1.44 -r1.45
  --- mpm_common.c	2001/03/19 13:07:26	1.44
  +++ mpm_common.c	2001/04/06 20:12:05	1.45
  @@ -126,9 +126,9 @@
               switch (tries) {
               case 1:     /*  16ms */
               case 2:     /*  82ms */
  -                break;
               case 3:     /* 344ms */
               case 4:     /*  16ms */
  +                break;
               case 5:     /*  82ms */
               case 6:     /* 344ms */
               case 7:     /* 1.4sec */
  
  
  
  1.171     +50 -42    httpd-2.0/server/mpm/prefork/prefork.c
  
  Index: prefork.c
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/server/mpm/prefork/prefork.c,v
  retrieving revision 1.170
  retrieving revision 1.171
  diff -u -r1.170 -r1.171
  --- prefork.c	2001/03/02 22:46:32	1.170
  +++ prefork.c	2001/04/06 20:12:09	1.171
  @@ -232,6 +232,7 @@
       if (pchild) {
   	apr_pool_destroy(pchild);
       }
  +    ap_scoreboard_image->servers[my_child_num][0].life_status = SB_WORKING;
       chdir_for_gprof();
       exit(code);
   }
  @@ -360,20 +361,18 @@
    * Connection structures and accounting...
    */
   
  -static void just_die(int sig)
  +static void please_die_gracefully(int sig)
   {
  -    clean_child_exit(0);
  -}
  -
  -static int volatile deferred_die;
  -static int volatile usr1_just_die;
  -
  -static void usr1_handler(int sig)
  -{
  -    if (usr1_just_die) {
  -	just_die(sig);
  +    /* clean_child_exit(0); */
  +    ap_scoreboard_image->servers[my_child_num][0].life_status = SB_IDLE_DIE;
  +    if (sig == SIGHUP) {
  +        (void) ap_update_child_status(AP_CHILD_THREAD_FROM_ID(my_child_num),
  +                                      SERVER_GRACEFUL, (request_rec *) NULL);
       }
  -    deferred_die = 1;
  +    else {
  +        (void) ap_update_child_status(AP_CHILD_THREAD_FROM_ID(my_child_num),
  +                                      SERVER_IDLE_KILL, (request_rec *) NULL);
  +    }
   }
   
   /* volatile just in case */
  @@ -520,16 +519,16 @@
   static int requests_this_child;
   static fd_set main_fds;
   
  +#define I_AM_TO_SHUTDOWN()                                                   \
  +(ap_scoreboard_image->servers[my_child_num][0].life_status != SB_WORKING)
  +   
   int ap_graceful_stop_signalled(void)
   {
  -    ap_sync_scoreboard_image();
  -    if (deferred_die ||
  -	ap_scoreboard_image->global.running_generation != ap_my_generation) {
  -	return 1;
  -    }
  +    /* not ever called anymore... */
       return 0;
   }
   
  +
   static void child_main(int child_num_arg)
   {
       ap_listen_rec *lr;
  @@ -540,9 +539,9 @@
       apr_status_t stat = APR_EINIT;
       int sockdes;
   
  +    my_child_num = child_num_arg;
       ap_my_pid = getpid();
       csd = NULL;
  -    my_child_num = child_num_arg;
       requests_this_child = 0;
       last_lr = NULL;
   
  @@ -565,16 +564,16 @@
   
       (void) ap_update_child_status(AP_CHILD_THREAD_FROM_ID(my_child_num), SERVER_READY, (request_rec *) NULL);
   
  -    apr_signal(SIGHUP, just_die);
  -    apr_signal(SIGTERM, just_die);
  +    apr_signal(SIGHUP, please_die_gracefully);
  +    apr_signal(SIGTERM, please_die_gracefully);
   
  -    while (!ap_graceful_stop_signalled()) {
  +    ap_sync_scoreboard_image();
  +    while (!I_AM_TO_SHUTDOWN()) {
   
   	/* Prepare to receive a SIGWINCH due to graceful restart so that
   	 * we can exit cleanly.
   	 */
  -	usr1_just_die = 1;
  -	apr_signal(SIGWINCH, usr1_handler);
  +	apr_signal(SIGWINCH, please_die_gracefully);
   
   	/*
   	 * (Re)initialize this child to a pre-connection state.
  @@ -654,9 +653,9 @@
   	    /* if we accept() something we don't want to die, so we have to
   	     * defer the exit
   	     */
  -	    usr1_just_die = 0;
   	    for (;;) {
  -		if (deferred_die) {
  +                ap_sync_scoreboard_image();
  +		if (I_AM_TO_SHUTDOWN()) {
   		    /* we didn't get a socket, and we were told to die */
   		    clean_child_exit(0);
   		}
  @@ -763,10 +762,10 @@
   		}
   	    }
   
  -	    if (ap_graceful_stop_signalled()) {
  +            ap_sync_scoreboard_image();
  +	    if (I_AM_TO_SHUTDOWN()) {
   		clean_child_exit(0);
   	    }
  -	    usr1_just_die = 1;
   	}
   
   	SAFE_ACCEPT(accept_mutex_off());	/* unlock after "accept" */
  @@ -791,12 +790,15 @@
                            "(currently %d)", 
                            sockdes, FD_SETSIZE);
   	    apr_socket_close(csd);
  +            ap_sync_scoreboard_image();
   	    continue;
           }
   
   #ifdef TPF
  -	if (sockdes == 0)                   /* 0 is invalid socket for TPF */
  -	    continue;
  +	if (sockdes == 0) {                  /* 0 is invalid socket for TPF */
  +	    ap_sync_scoreboard_image();
  +            continue;
  +        }
   #endif
   
   	ap_sock_disable_nagle(csd);
  @@ -807,6 +809,8 @@
               ap_process_connection(current_conn);
               ap_lingering_close(current_conn);
           }
  +        
  +        ap_sync_scoreboard_image();
       }
       clean_child_exit(0);
   }
  @@ -821,12 +825,13 @@
       }
   
       if (one_process) {
  -	apr_signal(SIGHUP, just_die);
  -	apr_signal(SIGINT, just_die);
  +	apr_signal(SIGHUP, please_die_gracefully);
  +	apr_signal(SIGINT, please_die_gracefully);
   #ifdef SIGQUIT
   	apr_signal(SIGQUIT, SIG_DFL);
   #endif
  -	apr_signal(SIGTERM, just_die);
  +	apr_signal(SIGTERM, please_die_gracefully);
  +        ap_scoreboard_image->servers[slot][0].life_status = SB_WORKING;
   	child_main(slot);
       }
   
  @@ -870,13 +875,14 @@
   	}
   #endif
   	RAISE_SIGSTOP(MAKE_CHILD);
  -	/* Disable the restart signal handlers and enable the just_die stuff.
  +	/* Disable the restart signal handlers and enable the please_die_gracefully stuff.
   	 * Note that since restart() just notes that a restart has been
   	 * requested there's no race condition here.
   	 */
  -	apr_signal(SIGHUP, just_die);
  -	apr_signal(SIGWINCH, just_die);
  -	apr_signal(SIGTERM, just_die);
  +	apr_signal(SIGHUP, please_die_gracefully);
  +	apr_signal(SIGWINCH, please_die_gracefully);
  +	apr_signal(SIGTERM, please_die_gracefully);
  +        ap_scoreboard_image->servers[slot][0].life_status = SB_WORKING;
   	child_main(slot);
       }
   
  @@ -1066,6 +1072,7 @@
   
   int ap_mpm_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
   {
  +    int index;
       int remaining_children_to_start;
   
       pconf = _pconf;
  @@ -1236,11 +1243,12 @@
       ++ap_my_generation;
       ap_scoreboard_image->global.running_generation = ap_my_generation;
       update_scoreboard_global();
  +    
  +    for (index = 0; index < ap_daemons_limit; ++index) {
  +        ap_scoreboard_image->servers[index][0].life_status = SB_IDLE_DIE;
  +    }
   
       if (is_graceful) {
  -#ifndef SCOREBOARD_FILE
  -	int i;
  -#endif
   	ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_NOTICE, 0, ap_server_conf,
   		    "SIGWINCH received.  Doing graceful restart");
   
  @@ -1255,9 +1263,9 @@
   	    * corruption too easily.
   	    */
   	ap_sync_scoreboard_image();
  -	for (i = 0; i < ap_daemons_limit; ++i) {
  -	    if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) {
  -		ap_scoreboard_image->servers[i][0].status = SERVER_GRACEFUL;
  +	for (index = 0; index < ap_daemons_limit; ++index) {
  +	    if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
  +		ap_scoreboard_image->servers[index][0].status = SERVER_GRACEFUL;
   	    }
   	}
   #endif
  
  
  

Re: cvs commit: httpd-2.0/server/mpm/prefork prefork.c

Posted by "Paul J. Reder" <re...@raleigh.ibm.com>.
Bill Stoddard wrote:
> 
> What is this change about?
> 
> >   --- mpm_common.c 2001/03/19 13:07:26 1.44
> >   +++ mpm_common.c 2001/04/06 20:12:05 1.45
> >   @@ -126,9 +126,9 @@
> >                switch (tries) {
> >                case 1:     /*  16ms */
> >                case 2:     /*  82ms */
> >   -                break;
> >                case 3:     /* 344ms */
> >                case 4:     /*  16ms */
> >   +                break;
> >                case 5:     /*  82ms */
> >                case 6:     /* 344ms */
> >                case 7:     /* 1.4sec */
> >
> 
> Bill

Because the idle cleanup no longer happens immediately based on a signal but is
now delayed based on a flag. Some of the kids were taking slightly longer to go
away and were triggering the annoying logs and additional kills that resulted
from this code. By moving it slightly the kids had time to go away naturally.

To be honest though, this was put in with the first patch. After the optimizations
suggested by Greg Ames I am not entirely sure this is still needed (I forgot to
test without it), but I suspect it is still needed because the delay comes not from
the checking overhead but from the delay while the child finishes its work before
checking the flag again.

-- 
Paul J. Reder
-----------------------------------------------------------
"The strength of the Constitution lies entirely in the determination of each
citizen to defend it.  Only if every single citizen feels duty bound to do
his share in this defense are the constitutional rights secure."
-- Albert Einstein

Re: cvs commit: httpd-2.0/server/mpm/prefork prefork.c

Posted by Bill Stoddard <bi...@wstoddard.com>.
What is this change about?

>   --- mpm_common.c 2001/03/19 13:07:26 1.44
>   +++ mpm_common.c 2001/04/06 20:12:05 1.45
>   @@ -126,9 +126,9 @@
>                switch (tries) {
>                case 1:     /*  16ms */
>                case 2:     /*  82ms */
>   -                break;
>                case 3:     /* 344ms */
>                case 4:     /*  16ms */
>   +                break;
>                case 5:     /*  82ms */
>                case 6:     /* 344ms */
>                case 7:     /* 1.4sec */
>   

Bill