You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by tr...@apache.org on 2004/09/18 02:33:56 UTC

cvs commit: httpd-2.0/server mpm_common.c

trawick     2004/09/17 17:33:56

  Modified:    .        CHANGES
               server   mpm_common.c
  Log:
  Unix MPMs: Shut down the server more quickly when child processes are
  slow to exit.
  
  Revision  Changes    Path
  1.1589    +3 -0      httpd-2.0/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/CHANGES,v
  retrieving revision 1.1588
  retrieving revision 1.1589
  diff -u -r1.1588 -r1.1589
  --- CHANGES	17 Sep 2004 15:39:29 -0000	1.1588
  +++ CHANGES	18 Sep 2004 00:33:55 -0000	1.1589
  @@ -2,6 +2,9 @@
   
     [Remove entries to the current 2.0 section below, when backported]
   
  +  *) Unix MPMs: Shut down the server more quickly when child processes are
  +     slow to exit.  [Joe Orton, Jeff Trawick]
  +
     *) mod_info: Added listing of the Request Hooks and added more build 
        information like 'httpd -V' contains. Changed output to XHTML. 
        [Paul Querna]
  
  
  
  1.122     +60 -33    httpd-2.0/server/mpm_common.c
  
  Index: mpm_common.c
  ===================================================================
  RCS file: /home/cvs/httpd-2.0/server/mpm_common.c,v
  retrieving revision 1.121
  retrieving revision 1.122
  diff -u -r1.121 -r1.122
  --- mpm_common.c	14 Aug 2004 10:49:43 -0000	1.121
  +++ mpm_common.c	18 Sep 2004 00:33:56 -0000	1.122
  @@ -61,22 +61,55 @@
   #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
   void ap_reclaim_child_processes(int terminate)
   {
  -    int i;
  -    long int waittime = 1024 * 16;      /* in usecs */
  +    apr_time_t waittime = 1024 * 16;
       apr_status_t waitret;
  -    int tries;
  +    int i;
       int not_dead_yet;
       int max_daemons;
  +    apr_time_t starttime = apr_time_now();
  +    /* this table of actions and elapsed times tells what action is taken
  +     * at which elapsed time from starting the reclaim
  +     */
  +    struct {
  +        enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action;
  +        apr_time_t action_time;
  +    } action_table[] = {
  +        {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
  +                          * children but take no action against
  +                          * stragglers
  +                          */
  +        {SEND_SIGTERM, apr_time_from_sec(3)},
  +        {SEND_SIGTERM, apr_time_from_sec(5)},
  +        {SEND_SIGTERM, apr_time_from_sec(7)},
  +        {SEND_SIGKILL, apr_time_from_sec(9)},
  +        {GIVEUP,       apr_time_from_sec(10)}
  +    };
  +    int cur_action;      /* index of action we decided to take this
  +                          * iteration
  +                          */
  +    int next_action = 1; /* index of first real action */
   
       ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
   
  -    for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
  -        /* don't want to hold up progress any more than
  -         * necessary, but we need to allow children a few moments to exit.
  -         * Set delay with an exponential backoff.
  -         */
  +    do {
           apr_sleep(waittime);
  +        /* don't let waittime get longer than 1 second; otherwise, we don't
  +         * react quickly to the last child exiting, and taking action can
  +         * be delayed
  +         */
           waittime = waittime * 4;
  +        if (waittime > apr_time_from_sec(1)) {
  +            waittime = apr_time_from_sec(1);
  +        }
  +
  +        /* see what action to take, if any */
  +        if (action_table[next_action].action_time <= apr_time_now() - starttime) {
  +            cur_action = next_action;
  +            ++next_action;
  +        }
  +        else {
  +            cur_action = 0; /* nothing to do */
  +        }
   
           /* now see who is done */
           not_dead_yet = 0;
  @@ -95,32 +128,28 @@
               }
   
               ++not_dead_yet;
  -            switch (tries) {
  -            case 1:     /*  16ms */
  -            case 2:     /*  82ms */
  -            case 3:     /* 344ms */
  -            case 4:     /*  16ms */
  +            switch(action_table[cur_action].action) {
  +            case DO_NOTHING:
                   break;
  -
  -            case 5:     /*  82ms */
  -            case 6:     /* 344ms */
  -            case 7:     /* 1.4sec */
  +                
  +            case SEND_SIGTERM:
                   /* ok, now it's being annoying */
                   ap_log_error(APLOG_MARK, APLOG_WARNING,
                                0, ap_server_conf,
  -                             "child process %ld still did not exit, "
  +                             "child process %" APR_PID_T_FMT
  +                             " still did not exit, "
                                "sending a SIGTERM",
  -                             (long)pid);
  +                             pid);
                   kill(pid, SIGTERM);
                   break;
  -
  -            case 8:     /*  6 sec */
  -                /* die child scum */
  +                
  +            case SEND_SIGKILL:
                   ap_log_error(APLOG_MARK, APLOG_ERR,
                                0, ap_server_conf,
  -                             "child process %ld still did not exit, "
  +                             "child process %" APR_PID_T_FMT
  +                             "  still did not exit, "
                                "sending a SIGKILL",
  -                             (long)pid);
  +                             pid);
   #ifndef BEOS
                   kill(pid, SIGKILL);
   #else
  @@ -133,8 +162,8 @@
                   kill_thread(pid);
   #endif
                   break;
  -
  -            case 9:     /* 14 sec */
  +                
  +            case GIVEUP:
                   /* gave it our best shot, but alas...  If this really
                    * is a child we are trying to kill and it really hasn't
                    * exited, we will likely fail to bind to the port
  @@ -142,9 +171,10 @@
                    */
                   ap_log_error(APLOG_MARK, APLOG_ERR,
                                0, ap_server_conf,
  -                             "could not make child process %ld exit, "
  +                             "could not make child process %" APR_PID_T_FMT
  +                             " exit, "
                                "attempting to continue anyway",
  -                             (long)pid);
  +                             pid);
                   break;
               }
           }
  @@ -153,11 +183,8 @@
           apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART);
   #endif
   
  -        if (!not_dead_yet) {
  -            /* nothing left to wait for */
  -            break;
  -        }
  -    }
  +    } while (not_dead_yet > 0 &&
  +             action_table[cur_action].action != GIVEUP);
   }
   #endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */