You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by tr...@apache.org on 2012/07/19 23:31:52 UTC

svn commit: r1363557 - in /httpd/httpd/trunk: CHANGES docs/log-message-tags/next-number server/mpm/event/event.c server/mpm/worker/worker.c

Author: trawick
Date: Thu Jul 19 21:31:52 2012
New Revision: 1363557

URL: http://svn.apache.org/viewvc?rev=1363557&view=rev
Log:
mpm_event, mpm_worker: Remain active amidst prevalent child process
resource shortages.

This is a somewhat different direction than r168182 ("transient thread
creation errors shouldn't take down the whole server").

r168182: If APEXIT_CHILDSICK is received and there aren't any
         active children at the time, exit.

Now:     If APEXIT_CHILDSICK is received and we never successfully
         initialized a child, exit.

The issue seen with the r168182 handling is that it is rather easy
to be left with no active child processes (which causes the server
to exit completely) during a resource shortage that lasts for some
measurable period of time, as contrasted with a resource shortage 
that results in only a handful of allocation failures.

Now the server will remain active, though as long as the resource
shortage exists children may continually fail and the parent will
try once per second to create a replacement.  The existing logic
to reduce the spawn rate after such errors will prevent the 
parent from trying to create children more rapidly.

Modified:
    httpd/httpd/trunk/CHANGES
    httpd/httpd/trunk/docs/log-message-tags/next-number
    httpd/httpd/trunk/server/mpm/event/event.c
    httpd/httpd/trunk/server/mpm/worker/worker.c

Modified: httpd/httpd/trunk/CHANGES
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=1363557&r1=1363556&r2=1363557&view=diff
==============================================================================
--- httpd/httpd/trunk/CHANGES [utf-8] (original)
+++ httpd/httpd/trunk/CHANGES [utf-8] Thu Jul 19 21:31:52 2012
@@ -1,6 +1,9 @@
                                                          -*- coding: utf-8 -*-
 Changes with Apache 2.5.0
 
+  *) mpm_event, mpm_worker: Remain active amidst prevalent child process
+     resource shortages.  [Jeff Trawick]
+
   *) mpm_event, mpm_worker: Fix cases where the spawn rate wasn't reduced
      after child process resource shortages.  [Jeff Trawick]
 

Modified: httpd/httpd/trunk/docs/log-message-tags/next-number
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/log-message-tags/next-number?rev=1363557&r1=1363556&r2=1363557&view=diff
==============================================================================
--- httpd/httpd/trunk/docs/log-message-tags/next-number (original)
+++ httpd/httpd/trunk/docs/log-message-tags/next-number Thu Jul 19 21:31:52 2012
@@ -1 +1 @@
-2324
+2326

Modified: httpd/httpd/trunk/server/mpm/event/event.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/event/event.c?rev=1363557&r1=1363556&r2=1363557&view=diff
==============================================================================
--- httpd/httpd/trunk/server/mpm/event/event.c (original)
+++ httpd/httpd/trunk/server/mpm/event/event.c Thu Jul 19 21:31:52 2012
@@ -183,6 +183,7 @@ static apr_uint32_t lingering_count = 0;
 static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
 static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
 static int resource_shortage = 0;
+static int had_healthy_child = 0;
 static fd_queue_t *worker_queue;
 static fd_queue_info_t *worker_queue_info;
 static int mpm_state = AP_MPMQ_STARTING;
@@ -2403,6 +2404,7 @@ static void perform_idle_server_maintena
         int any_dying_threads = 0;
         int any_dead_threads = 0;
         int all_dead_threads = 1;
+        int child_threads_active = 0;
 
         if (i >= retained->max_daemons_limit
             && totally_free_length == retained->idle_spawn_rate)
@@ -2438,6 +2440,7 @@ static void perform_idle_server_maintena
                 }
                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
                     ++active_thread_count;
+                    ++child_threads_active;
                 }
             }
         }
@@ -2464,6 +2467,9 @@ static void perform_idle_server_maintena
             }
             ++free_length;
         }
+        else if (child_threads_active == threads_per_child) {
+            had_healthy_child = 1;
+        }
         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
         if (!any_dying_threads) {
             last_non_dead = i;
@@ -2472,21 +2478,23 @@ static void perform_idle_server_maintena
     }
 
     if (retained->sick_child_detected) {
-        if (active_thread_count > 0) {
-            /* some child processes appear to be working.  don't kill the
-             * whole server.
+        if (had_healthy_child) {
+            /* Assume this is a transient error, even though it may not be.  Leave
+             * the server up in case it is able to serve some requests or the
+             * problem will be resolved.
              */
             retained->sick_child_detected = 0;
         }
         else {
-            /* looks like a basket case.  give up.
+            /* looks like a basket case, as no child ever fully initialized; give up.
              */
             shutdown_pending = 1;
             child_fatal = 1;
             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
-                         ap_server_conf, APLOGNO(00483)
-                         "No active workers found..."
-                         " Apache is exiting!");
+                         ap_server_conf, APLOGNO(02324)
+                         "A resource shortage or other unrecoverable failure "
+                         "was encountered before any child process initialized "
+                         "successfully... httpd is exiting!");
             /* the child already logged the failure details */
             return;
         }

Modified: httpd/httpd/trunk/server/mpm/worker/worker.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/worker/worker.c?rev=1363557&r1=1363556&r2=1363557&view=diff
==============================================================================
--- httpd/httpd/trunk/server/mpm/worker/worker.c (original)
+++ httpd/httpd/trunk/server/mpm/worker/worker.c Thu Jul 19 21:31:52 2012
@@ -129,6 +129,7 @@ static int listener_may_exit = 0;
 static int requests_this_child;
 static int num_listensocks = 0;
 static int resource_shortage = 0;
+static int had_healthy_child = 0;
 static fd_queue_t *worker_queue;
 static fd_queue_info_t *worker_queue_info;
 static int mpm_state = AP_MPMQ_STARTING;
@@ -1473,6 +1474,7 @@ static void perform_idle_server_maintena
         int any_dying_threads = 0;
         int any_dead_threads = 0;
         int all_dead_threads = 1;
+        int child_threads_active = 0;
 
         if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate)
             /* short cut if all active processes have been examined and
@@ -1507,6 +1509,7 @@ static void perform_idle_server_maintena
                 }
                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
                     ++active_thread_count;
+                    ++child_threads_active;
                 }
             }
         }
@@ -1532,6 +1535,9 @@ static void perform_idle_server_maintena
             }
             ++free_length;
         }
+        else if (child_threads_active == threads_per_child) {
+            had_healthy_child = 1;
+        }
         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
         if (!any_dying_threads) {
             last_non_dead = i;
@@ -1540,21 +1546,23 @@ static void perform_idle_server_maintena
     }
 
     if (retained->sick_child_detected) {
-        if (active_thread_count > 0) {
-            /* some child processes appear to be working.  don't kill the
-             * whole server.
+        if (had_healthy_child) {
+            /* Assume this is a transient error, even though it may not be.  Leave
+             * the server up in case it is able to serve some requests or the
+             * problem will be resolved.
              */
             retained->sick_child_detected = 0;
         }
         else {
-            /* looks like a basket case.  give up.
+            /* looks like a basket case, as no child ever fully initialized; give up.
              */
             shutdown_pending = 1;
             child_fatal = 1;
             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
-                         ap_server_conf, APLOGNO(00285)
-                         "No active workers found..."
-                         " Apache is exiting!");
+                         ap_server_conf, APLOGNO(02325)
+                         "A resource shortage or other unrecoverable failure "
+                         "was encountered before any child process initialized "
+                         "successfully... httpd is exiting!");
             /* the child already logged the failure details */
             return;
         }



Re: svn commit: r1363557 - in /httpd/httpd/trunk: CHANGES docs/log-message-tags/next-number server/mpm/event/event.c server/mpm/worker/worker.c

Posted by Jeff Trawick <tr...@gmail.com>.
On Thu, Jul 19, 2012 at 5:31 PM,  <tr...@apache.org> wrote:
> Author: trawick
> Date: Thu Jul 19 21:31:52 2012
> New Revision: 1363557
>
> URL: http://svn.apache.org/viewvc?rev=1363557&view=rev
> Log:
> mpm_event, mpm_worker: Remain active amidst prevalent child process
> resource shortages.
>
> This is a somewhat different direction than r168182 ("transient thread
> creation errors shouldn't take down the whole server").
>
> r168182: If APEXIT_CHILDSICK is received and there aren't any
>          active children at the time, exit.
>
> Now:     If APEXIT_CHILDSICK is received and we never successfully
>          initialized a child, exit.
>
> The issue seen with the r168182 handling is that it is rather easy
> to be left with no active child processes (which causes the server
> to exit completely) during a resource shortage that lasts for some
> measurable period of time, as contrasted with a resource shortage
> that results in only a handful of allocation failures.
>
> Now the server will remain active, though as long as the resource
> shortage exists children may continually fail and the parent will
> try once per second to create a replacement.  The existing logic
> to reduce the spawn rate after such errors will prevent the
> parent from trying to create children more rapidly.

This is change in strategies is worth some (other) thought.  In some
situations it may be trading one basket case for another, but there
are other situations  where a resource exhaustion attack on the system
(or some other self-inflicted fiasco with similar results) is resolved
but with httpd notably absent afterwards until manual intervention.

>
> Modified:
>     httpd/httpd/trunk/CHANGES
>     httpd/httpd/trunk/docs/log-message-tags/next-number
>     httpd/httpd/trunk/server/mpm/event/event.c
>     httpd/httpd/trunk/server/mpm/worker/worker.c
>
> Modified: httpd/httpd/trunk/CHANGES
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/CHANGES [utf-8] (original)
> +++ httpd/httpd/trunk/CHANGES [utf-8] Thu Jul 19 21:31:52 2012
> @@ -1,6 +1,9 @@
>                                                           -*- coding: utf-8 -*-
>  Changes with Apache 2.5.0
>
> +  *) mpm_event, mpm_worker: Remain active amidst prevalent child process
> +     resource shortages.  [Jeff Trawick]
> +
>    *) mpm_event, mpm_worker: Fix cases where the spawn rate wasn't reduced
>       after child process resource shortages.  [Jeff Trawick]
>
>
> Modified: httpd/httpd/trunk/docs/log-message-tags/next-number
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/log-message-tags/next-number?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/docs/log-message-tags/next-number (original)
> +++ httpd/httpd/trunk/docs/log-message-tags/next-number Thu Jul 19 21:31:52 2012
> @@ -1 +1 @@
> -2324
> +2326
>
> Modified: httpd/httpd/trunk/server/mpm/event/event.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/event/event.c?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/mpm/event/event.c (original)
> +++ httpd/httpd/trunk/server/mpm/event/event.c Thu Jul 19 21:31:52 2012
> @@ -183,6 +183,7 @@ static apr_uint32_t lingering_count = 0;
>  static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
>  static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
>  static int resource_shortage = 0;
> +static int had_healthy_child = 0;
>  static fd_queue_t *worker_queue;
>  static fd_queue_info_t *worker_queue_info;
>  static int mpm_state = AP_MPMQ_STARTING;
> @@ -2403,6 +2404,7 @@ static void perform_idle_server_maintena
>          int any_dying_threads = 0;
>          int any_dead_threads = 0;
>          int all_dead_threads = 1;
> +        int child_threads_active = 0;
>
>          if (i >= retained->max_daemons_limit
>              && totally_free_length == retained->idle_spawn_rate)
> @@ -2438,6 +2440,7 @@ static void perform_idle_server_maintena
>                  }
>                  if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
>                      ++active_thread_count;
> +                    ++child_threads_active;
>                  }
>              }
>          }
> @@ -2464,6 +2467,9 @@ static void perform_idle_server_maintena
>              }
>              ++free_length;
>          }
> +        else if (child_threads_active == threads_per_child) {
> +            had_healthy_child = 1;
> +        }
>          /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
>          if (!any_dying_threads) {
>              last_non_dead = i;
> @@ -2472,21 +2478,23 @@ static void perform_idle_server_maintena
>      }
>
>      if (retained->sick_child_detected) {
> -        if (active_thread_count > 0) {
> -            /* some child processes appear to be working.  don't kill the
> -             * whole server.
> +        if (had_healthy_child) {
> +            /* Assume this is a transient error, even though it may not be.  Leave
> +             * the server up in case it is able to serve some requests or the
> +             * problem will be resolved.
>               */
>              retained->sick_child_detected = 0;
>          }
>          else {
> -            /* looks like a basket case.  give up.
> +            /* looks like a basket case, as no child ever fully initialized; give up.
>               */
>              shutdown_pending = 1;
>              child_fatal = 1;
>              ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
> -                         ap_server_conf, APLOGNO(00483)
> -                         "No active workers found..."
> -                         " Apache is exiting!");
> +                         ap_server_conf, APLOGNO(02324)
> +                         "A resource shortage or other unrecoverable failure "
> +                         "was encountered before any child process initialized "
> +                         "successfully... httpd is exiting!");
>              /* the child already logged the failure details */
>              return;
>          }
>
> Modified: httpd/httpd/trunk/server/mpm/worker/worker.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/worker/worker.c?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/mpm/worker/worker.c (original)
> +++ httpd/httpd/trunk/server/mpm/worker/worker.c Thu Jul 19 21:31:52 2012
> @@ -129,6 +129,7 @@ static int listener_may_exit = 0;
>  static int requests_this_child;
>  static int num_listensocks = 0;
>  static int resource_shortage = 0;
> +static int had_healthy_child = 0;
>  static fd_queue_t *worker_queue;
>  static fd_queue_info_t *worker_queue_info;
>  static int mpm_state = AP_MPMQ_STARTING;
> @@ -1473,6 +1474,7 @@ static void perform_idle_server_maintena
>          int any_dying_threads = 0;
>          int any_dead_threads = 0;
>          int all_dead_threads = 1;
> +        int child_threads_active = 0;
>
>          if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate)
>              /* short cut if all active processes have been examined and
> @@ -1507,6 +1509,7 @@ static void perform_idle_server_maintena
>                  }
>                  if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
>                      ++active_thread_count;
> +                    ++child_threads_active;
>                  }
>              }
>          }
> @@ -1532,6 +1535,9 @@ static void perform_idle_server_maintena
>              }
>              ++free_length;
>          }
> +        else if (child_threads_active == threads_per_child) {
> +            had_healthy_child = 1;
> +        }
>          /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
>          if (!any_dying_threads) {
>              last_non_dead = i;
> @@ -1540,21 +1546,23 @@ static void perform_idle_server_maintena
>      }
>
>      if (retained->sick_child_detected) {
> -        if (active_thread_count > 0) {
> -            /* some child processes appear to be working.  don't kill the
> -             * whole server.
> +        if (had_healthy_child) {
> +            /* Assume this is a transient error, even though it may not be.  Leave
> +             * the server up in case it is able to serve some requests or the
> +             * problem will be resolved.
>               */
>              retained->sick_child_detected = 0;
>          }
>          else {
> -            /* looks like a basket case.  give up.
> +            /* looks like a basket case, as no child ever fully initialized; give up.
>               */
>              shutdown_pending = 1;
>              child_fatal = 1;
>              ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
> -                         ap_server_conf, APLOGNO(00285)
> -                         "No active workers found..."
> -                         " Apache is exiting!");
> +                         ap_server_conf, APLOGNO(02325)
> +                         "A resource shortage or other unrecoverable failure "
> +                         "was encountered before any child process initialized "
> +                         "successfully... httpd is exiting!");
>              /* the child already logged the failure details */
>              return;
>          }
>
>



-- 
Born in Roswell... married an alien...
http://emptyhammock.com/

Re: svn commit: r1363557 - in /httpd/httpd/trunk: CHANGES docs/log-message-tags/next-number server/mpm/event/event.c server/mpm/worker/worker.c

Posted by Jeff Trawick <tr...@gmail.com>.
On Sun, Jul 22, 2012 at 8:49 AM, Rainer Jung <ra...@kippdata.de> wrote:
> On 19.07.2012 23:31, trawick@apache.org wrote:
>>
>> Author: trawick
>> Date: Thu Jul 19 21:31:52 2012
>> New Revision: 1363557
>>
>> URL: http://svn.apache.org/viewvc?rev=1363557&view=rev
>> Log:
>> mpm_event, mpm_worker: Remain active amidst prevalent child process
>> resource shortages.
>>
>> This is a somewhat different direction than r168182 ("transient thread
>> creation errors shouldn't take down the whole server").
>>
>> r168182: If APEXIT_CHILDSICK is received and there aren't any
>>           active children at the time, exit.
>>
>> Now:     If APEXIT_CHILDSICK is received and we never successfully
>>           initialized a child, exit.
>>
>> The issue seen with the r168182 handling is that it is rather easy
>> to be left with no active child processes (which causes the server
>> to exit completely) during a resource shortage that lasts for some
>> measurable period of time, as contrasted with a resource shortage
>> that results in only a handful of allocation failures.
>>
>> Now the server will remain active, though as long as the resource
>> shortage exists children may continually fail and the parent will
>> try once per second to create a replacement.  The existing logic
>> to reduce the spawn rate after such errors will prevent the
>> parent from trying to create children more rapidly.
>>
>> Modified:
>>      httpd/httpd/trunk/CHANGES
>>      httpd/httpd/trunk/docs/log-message-tags/next-number
>>      httpd/httpd/trunk/server/mpm/event/event.c
>>      httpd/httpd/trunk/server/mpm/worker/worker.c
>>
>> Modified: httpd/httpd/trunk/CHANGES
>> URL:
>> http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=1363557&r1=1363556&r2=1363557&view=diff
>>
>> ==============================================================================
>> --- httpd/httpd/trunk/CHANGES [utf-8] (original)
>> +++ httpd/httpd/trunk/CHANGES [utf-8] Thu Jul 19 21:31:52 2012
>> @@ -1,6 +1,9 @@
>>                                                            -*- coding:
>> utf-8 -*-
>>   Changes with Apache 2.5.0
>>
>> +  *) mpm_event, mpm_worker: Remain active amidst prevalent child process
>> +     resource shortages.  [Jeff Trawick]
>> +
>>     *) mpm_event, mpm_worker: Fix cases where the spawn rate wasn't
>> reduced
>>        after child process resource shortages.  [Jeff Trawick]
>>
>>
>> Modified: httpd/httpd/trunk/docs/log-message-tags/next-number
>> URL:
>> http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/log-message-tags/next-number?rev=1363557&r1=1363556&r2=1363557&view=diff
>>
>> ==============================================================================
>> --- httpd/httpd/trunk/docs/log-message-tags/next-number (original)
>> +++ httpd/httpd/trunk/docs/log-message-tags/next-number Thu Jul 19
>> 21:31:52 2012
>> @@ -1 +1 @@
>> -2324
>> +2326
>>
>> Modified: httpd/httpd/trunk/server/mpm/event/event.c
>> URL:
>> http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/event/event.c?rev=1363557&r1=1363556&r2=1363557&view=diff
>>
>> ==============================================================================
>> --- httpd/httpd/trunk/server/mpm/event/event.c (original)
>> +++ httpd/httpd/trunk/server/mpm/event/event.c Thu Jul 19 21:31:52 2012
>> @@ -183,6 +183,7 @@ static apr_uint32_t lingering_count = 0;
>>   static apr_uint32_t suspended_count = 0;    /* Number of suspended
>> connections */
>>   static apr_uint32_t clogged_count = 0;      /* Number of threads
>> processing ssl conns */
>>   static int resource_shortage = 0;
>> +static int had_healthy_child = 0;
>>   static fd_queue_t *worker_queue;
>>   static fd_queue_info_t *worker_queue_info;
>>   static int mpm_state = AP_MPMQ_STARTING;
>> @@ -2403,6 +2404,7 @@ static void perform_idle_server_maintena
>>           int any_dying_threads = 0;
>>           int any_dead_threads = 0;
>>           int all_dead_threads = 1;
>> +        int child_threads_active = 0;
>>
>>           if (i >= retained->max_daemons_limit
>>               && totally_free_length == retained->idle_spawn_rate)
>> @@ -2438,6 +2440,7 @@ static void perform_idle_server_maintena
>>                   }
>>                   if (status >= SERVER_READY && status < SERVER_GRACEFUL)
>> {
>>                       ++active_thread_count;
>> +                    ++child_threads_active;
>
>
> Couldn't we now simplify by adding child_threads_active to
> active_thread_count outside the loop instead of incrementing
> active_thread_count for each thread?

yes/thanks!

>
>
>>                   }
>>               }
>>           }
>> @@ -2464,6 +2467,9 @@ static void perform_idle_server_maintena
>>               }
>>               ++free_length;
>>           }
>> +        else if (child_threads_active == threads_per_child) {
>> +            had_healthy_child = 1;
>> +        }
>
>
> As I understand it had_healthy_child is never reset. So when we do an
> "apachectl restart" (or graceful) the children started before that event
> still count. I'd say after a restart the condition should be reset, because
> often the config will have changed.

Ahh, I had loadable-MPM in my mind, and had_healthy_child would be
reset when the MPM was reloaded.  But with a statically linked MPM
had_healthy_child doesn't work right.  I'll fix that too.  (Generally
I see some potential confusion between the flags/counters used in
children, for which a forked child always gets a clean, initialized
copy, and flags/counters used in the parent but aren't intended to be
retained across unload/load, which can have initialization issues.
Maybe some rearrangement will help.)

>
> ...
>
> The same comments apply to the worker MPM patch.

Yep.

Thanks again for looking...

>
> Regards,
>
> Rainer



-- 
Born in Roswell... married an alien...
http://emptyhammock.com/

Re: svn commit: r1363557 - in /httpd/httpd/trunk: CHANGES docs/log-message-tags/next-number server/mpm/event/event.c server/mpm/worker/worker.c

Posted by Rainer Jung <ra...@kippdata.de>.
On 19.07.2012 23:31, trawick@apache.org wrote:
> Author: trawick
> Date: Thu Jul 19 21:31:52 2012
> New Revision: 1363557
>
> URL: http://svn.apache.org/viewvc?rev=1363557&view=rev
> Log:
> mpm_event, mpm_worker: Remain active amidst prevalent child process
> resource shortages.
>
> This is a somewhat different direction than r168182 ("transient thread
> creation errors shouldn't take down the whole server").
>
> r168182: If APEXIT_CHILDSICK is received and there aren't any
>           active children at the time, exit.
>
> Now:     If APEXIT_CHILDSICK is received and we never successfully
>           initialized a child, exit.
>
> The issue seen with the r168182 handling is that it is rather easy
> to be left with no active child processes (which causes the server
> to exit completely) during a resource shortage that lasts for some
> measurable period of time, as contrasted with a resource shortage
> that results in only a handful of allocation failures.
>
> Now the server will remain active, though as long as the resource
> shortage exists children may continually fail and the parent will
> try once per second to create a replacement.  The existing logic
> to reduce the spawn rate after such errors will prevent the
> parent from trying to create children more rapidly.
>
> Modified:
>      httpd/httpd/trunk/CHANGES
>      httpd/httpd/trunk/docs/log-message-tags/next-number
>      httpd/httpd/trunk/server/mpm/event/event.c
>      httpd/httpd/trunk/server/mpm/worker/worker.c
>
> Modified: httpd/httpd/trunk/CHANGES
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/CHANGES [utf-8] (original)
> +++ httpd/httpd/trunk/CHANGES [utf-8] Thu Jul 19 21:31:52 2012
> @@ -1,6 +1,9 @@
>                                                            -*- coding: utf-8 -*-
>   Changes with Apache 2.5.0
>
> +  *) mpm_event, mpm_worker: Remain active amidst prevalent child process
> +     resource shortages.  [Jeff Trawick]
> +
>     *) mpm_event, mpm_worker: Fix cases where the spawn rate wasn't reduced
>        after child process resource shortages.  [Jeff Trawick]
>
>
> Modified: httpd/httpd/trunk/docs/log-message-tags/next-number
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/docs/log-message-tags/next-number?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/docs/log-message-tags/next-number (original)
> +++ httpd/httpd/trunk/docs/log-message-tags/next-number Thu Jul 19 21:31:52 2012
> @@ -1 +1 @@
> -2324
> +2326
>
> Modified: httpd/httpd/trunk/server/mpm/event/event.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/event/event.c?rev=1363557&r1=1363556&r2=1363557&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/mpm/event/event.c (original)
> +++ httpd/httpd/trunk/server/mpm/event/event.c Thu Jul 19 21:31:52 2012
> @@ -183,6 +183,7 @@ static apr_uint32_t lingering_count = 0;
>   static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
>   static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
>   static int resource_shortage = 0;
> +static int had_healthy_child = 0;
>   static fd_queue_t *worker_queue;
>   static fd_queue_info_t *worker_queue_info;
>   static int mpm_state = AP_MPMQ_STARTING;
> @@ -2403,6 +2404,7 @@ static void perform_idle_server_maintena
>           int any_dying_threads = 0;
>           int any_dead_threads = 0;
>           int all_dead_threads = 1;
> +        int child_threads_active = 0;
>
>           if (i >= retained->max_daemons_limit
>               && totally_free_length == retained->idle_spawn_rate)
> @@ -2438,6 +2440,7 @@ static void perform_idle_server_maintena
>                   }
>                   if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
>                       ++active_thread_count;
> +                    ++child_threads_active;

Couldn't we now simplify by adding child_threads_active to 
active_thread_count outside the loop instead of incrementing 
active_thread_count for each thread?

>                   }
>               }
>           }
> @@ -2464,6 +2467,9 @@ static void perform_idle_server_maintena
>               }
>               ++free_length;
>           }
> +        else if (child_threads_active == threads_per_child) {
> +            had_healthy_child = 1;
> +        }

As I understand it had_healthy_child is never reset. So when we do an 
"apachectl restart" (or graceful) the children started before that event 
still count. I'd say after a restart the condition should be reset, 
because often the config will have changed.

...

The same comments apply to the worker MPM patch.

Regards,

Rainer