You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ji...@apache.org on 2013/03/01 16:23:59 UTC

svn commit: r1451614 - /httpd/httpd/trunk/server/mpm/eventopt/eventopt.c

Author: jim
Date: Fri Mar  1 15:23:58 2013
New Revision: 1451614

URL: http://svn.apache.org/r1451614
Log:
sync w/ current event bug-fixes

Modified:
    httpd/httpd/trunk/server/mpm/eventopt/eventopt.c

Modified: httpd/httpd/trunk/server/mpm/eventopt/eventopt.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm/eventopt/eventopt.c?rev=1451614&r1=1451613&r2=1451614&view=diff
==============================================================================
--- httpd/httpd/trunk/server/mpm/eventopt/eventopt.c (original)
+++ httpd/httpd/trunk/server/mpm/eventopt/eventopt.c Fri Mar  1 15:23:58 2013
@@ -169,13 +169,18 @@ static int ap_daemons_limit = 0;
 static int max_workers = 0;
 static int server_limit = 0;
 static int thread_limit = 0;
+static int had_healthy_child = 0;
 static int dying = 0;
 static int workers_may_exit = 0;
 static int start_thread_may_exit = 0;
 static int listener_may_exit = 0;
-static int conns_this_child;
 static int num_listensocks = 0;
-static apr_uint32_t connection_count = 0;
+static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
+                                               in listener thread */
+static apr_uint32_t connection_count = 0;   /* Number of open connections */
+static apr_uint32_t lingering_count = 0;    /* Number of connections in lingering close */
+static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
+static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
 static int resource_shortage = 0;
 static fd_queue_t *worker_queue;
 static fd_queue_info_t *worker_queue_info;
@@ -400,8 +405,12 @@ static void enable_listensocks(int proce
     int i;
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
                  "Accepting new connections again: "
-                 "%u active conns, %u idle workers",
+                 "%u active conns (%u lingering/%u clogged/%u suspended), "
+                 "%u idle workers",
                  apr_atomic_read32(&connection_count),
+                 apr_atomic_read32(&lingering_count),
+                 apr_atomic_read32(&clogged_count),
+                 apr_atomic_read32(&suspended_count),
                  ap_queue_info_get_idlers(worker_queue_info));
     for (i = 0; i < num_listensocks; i++)
         apr_pollset_add(event_pollset, &listener_pollfd[i]);
@@ -618,7 +627,20 @@ static int child_fatal;
 static int volatile shutdown_pending;
 static int volatile restart_pending;
 
-static apr_status_t decrement_connection_count(void *dummy) {
+static apr_status_t decrement_connection_count(void *cs_)
+{
+    event_conn_state_t *cs = cs_;
+    switch (cs->pub.state) {
+        case CONN_STATE_LINGER_NORMAL:
+        case CONN_STATE_LINGER_SHORT:
+            apr_atomic_dec32(&lingering_count);
+            break;
+        case CONN_STATE_SUSPENDED:
+            apr_atomic_dec32(&suspended_count);
+            break;
+        default:
+            break;
+    }
     apr_atomic_dec32(&connection_count);
     return APR_SUCCESS;
 }
@@ -766,7 +788,7 @@ static void set_signals(void)
 #endif
 }
 
-static void process_pollop(pollset_op_t *op)
+static int process_pollop(pollset_op_t *op)
 {
     apr_status_t rv;
     event_conn_state_t *cs = op->cs;
@@ -791,7 +813,12 @@ static void process_pollop(pollset_op_t 
     if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00467)
                      "%s: apr_pollset_add failure", op->tag);
+        apr_socket_close(cs->pfd.desc.s);
+        apr_pool_clear(cs->p);
+        ap_push_pool(worker_queue_info, cs->p);
+        return 0;
     }
+    return 1;
 }
 
 /*
@@ -807,63 +834,101 @@ static void process_pollop(pollset_op_t 
 static int start_lingering_close(event_conn_state_t *cs, ap_equeue_t *eq)
 {
     apr_status_t rv;
+    int ret = 1;
 
-    if (ap_start_lingering_close(cs->c)) {
-        apr_pool_clear(cs->p);
-        ap_push_pool(worker_queue_info, cs->p);
-        return 0;
+    apr_socket_t *csd = ap_get_conn_socket(cs->c);
+    pollset_op_t localv;
+    pollset_op_t *v;
+
+    if (eq) {
+        v = ap_equeue_writer_value(eq);
     }
     else {
-        apr_socket_t *csd = ap_get_conn_socket(cs->c);
-        pollset_op_t localv;
-        pollset_op_t *v;
-
-        if (eq) {
-            v = ap_equeue_writer_value(eq);
-        }
-        else {
-            v = &localv;
-        }
+        v = &localv;
+    }
 
 #ifdef AP_DEBUG
-        {
-            rv = apr_socket_timeout_set(csd, 0);
-            AP_DEBUG_ASSERT(rv == APR_SUCCESS);
-        }
+    {
+        rv = apr_socket_timeout_set(csd, 0);
+        AP_DEBUG_ASSERT(rv == APR_SUCCESS);
+    }
 #else
-        apr_socket_timeout_set(csd, 0);
+    apr_socket_timeout_set(csd, 0);
 #endif
-        /*
-         * If some module requested a shortened waiting period, only wait for
-         * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
-         * DoS attacks.
-         */
-        if (apr_table_get(cs->c->notes, "short-lingering-close")) {
-            cs->expiration_time =
-                apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER);
-            v->timeout_type = TIMEOUT_SHORT_LINGER;
-            v->tag = "start_lingering_close(short)";
-            cs->pub.state = CONN_STATE_LINGER_SHORT;
-        }
-        else {
-            cs->expiration_time =
-                apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER);
-            v->timeout_type = TIMEOUT_LINGER;
-            v->tag = "start_lingering_close(normal)";
-            cs->pub.state = CONN_STATE_LINGER_NORMAL;
-        }
+    /*
+     * If some module requested a shortened waiting period, only wait for
+     * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
+     * DoS attacks.
+     */
+    if (apr_table_get(cs->c->notes, "short-lingering-close")) {
+        cs->expiration_time =
+            apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER);
+        v->timeout_type = TIMEOUT_SHORT_LINGER;
+        v->tag = "start_lingering_close(short)";
+        cs->pub.state = CONN_STATE_LINGER_SHORT;
+    }
+    else {
+        cs->expiration_time =
+            apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER);
+        v->timeout_type = TIMEOUT_LINGER;
+        v->tag = "start_lingering_close(normal)";
+        cs->pub.state = CONN_STATE_LINGER_NORMAL;
+    }
+    apr_atomic_inc32(&lingering_count);
+    cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
+    v->cs = cs;
+    if (eq != NULL) {
+        ap_equeue_writer_onward(eq);
+        apr_pollset_wakeup(event_pollset);
+    }
+    else {
+        ret = process_pollop(v);
+    }
 
-        cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
-        v->cs = cs;
-        if (eq != NULL) {
-            ap_equeue_writer_onward(eq);
-            apr_pollset_wakeup(event_pollset);
-        }
-        else {
-            process_pollop(v);
-        }
+    return ret;
+}
+
+/*
+ * Close our side of the connection, flushing data to the client first.
+ * Pre-condition: cs is not in any timeout queue and not in the pollset,
+ *                timeout_mutex is not locked
+ * return: 0 if connection is fully closed,
+ *         1 if connection is lingering
+ * May only be called by worker thread.
+ */
+static int start_lingering_close_blocking(event_conn_state_t *cs, ap_equeue_t *eq)
+{
+    if (ap_start_lingering_close(cs->c)) {
+        apr_pool_clear(cs->p);
+        ap_push_pool(worker_queue_info, cs->p);
+        return 0;
     }
-    return 1;
+    return start_lingering_close(cs, eq);
+}
+
+/*
+ * Close our side of the connection, NOT flushing data to the client.
+ * This should only be called if there has been an error or if we know
+ * that our send buffers are empty.
+ * Pre-condition: cs is not in any timeout queue and not in the pollset,
+ *                timeout_mutex is not locked
+ * return: 0 if connection is fully closed,
+ *         1 if connection is lingering
+ * may be called by listener thread
+ */
+static int start_lingering_close_nonblocking(event_conn_state_t *cs, ap_equeue_t *eq)
+{
+    conn_rec *c = cs->c;
+    apr_socket_t *csd = cs->pfd.desc.s;
+
+    if (c->aborted
+        || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
+        apr_socket_close(csd);
+        apr_pool_clear(cs->p);
+        ap_push_pool(worker_queue_info, cs->p);
+        return 0;
+    }
+    return start_lingering_close(cs, eq);
 }
 
 /*
@@ -890,10 +955,8 @@ static int stop_lingering_close(event_co
 
 /*
  * process one connection in the worker
- * return: 1 if the connection has been completed,
- *         0 if it is still open and waiting for some event
  */
-static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
+static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
                           event_conn_state_t * cs,
                           ap_equeue_t *eq,
                           int my_child_num,
@@ -916,10 +979,11 @@ static int process_socket(apr_thread_t *
             apr_bucket_alloc_destroy(cs->bucket_alloc);
             apr_pool_clear(p);
             ap_push_pool(worker_queue_info, p);
-            return 1;
+            return;
         }
         apr_atomic_inc32(&connection_count);
-        apr_pool_cleanup_register(c->pool, NULL, decrement_connection_count, apr_pool_cleanup_null);
+        apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
+                                  apr_pool_cleanup_null);
         c->current_thread = thd;
         cs->c = c;
         c->cs = &(cs->pub);
@@ -967,10 +1031,12 @@ static int process_socket(apr_thread_t *
          * like mod_ssl, lets just do the normal read from input filters,
          * like the Worker MPM does.
          */
+        apr_atomic_inc32(&clogged_count);
         ap_run_process_connection(c);
         if (cs->pub.state != CONN_STATE_SUSPENDED) {
             cs->pub.state = CONN_STATE_LINGER;
         }
+        apr_atomic_dec32(&clogged_count);
     }
 
 read_request:
@@ -1017,7 +1083,7 @@ read_request:
 
             ap_equeue_writer_onward(eq);
             apr_pollset_wakeup(event_pollset);
-            return 1;
+            return;
         }
         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
             listener_may_exit) {
@@ -1033,8 +1099,8 @@ read_request:
     }
 
     if (cs->pub.state == CONN_STATE_LINGER) {
-        if (!start_lingering_close(cs, eq)) {
-            return 0;
+        if (!start_lingering_close_blocking(cs, eq)) {
+            return;
         }
     }
     else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
@@ -1060,7 +1126,9 @@ read_request:
         ap_equeue_writer_onward(eq);
         apr_pollset_wakeup(event_pollset);
     }
-
+    else if (cs->pub.state == CONN_STATE_SUSPENDED) {
+        apr_atomic_inc32(&suspended_count);
+    }
     /*
      * Prevent this connection from writing to our connection state after it
      * is no longer associated with this thread. This would happen if the EOR
@@ -1068,8 +1136,7 @@ read_request:
      * or timeout.
      */
     c->sbh = NULL;
-
-    return 1;
+    return;
 }
 
 /* conns_this_child has gone to zero or below.  See if the admin coded
@@ -1078,10 +1145,13 @@ read_request:
 static void check_infinite_requests(void)
 {
     if (ap_max_requests_per_child) {
+        ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
+                     "Stopping process due to MaxConnectionsPerChild");
         signal_threads(ST_GRACEFUL);
     }
     else {
-        conns_this_child = INT_MAX;  /* keep going */
+        /* keep going */
+        conns_this_child = APR_INT32_MAX;
     }
 }
 
@@ -1462,9 +1532,8 @@ static void * APR_THREAD_FUNC listener_t
                 break;
         }
 
-        if (conns_this_child <= 0) {
+        if (conns_this_child <= 0)
             check_infinite_requests();
-        }
 
         now = apr_time_now();
         if (APLOGtrace6(ap_server_conf)) {
@@ -1472,11 +1541,14 @@ static void * APR_THREAD_FUNC listener_t
             if (now - last_log > apr_time_from_msec(1000)) {
                 last_log = now;
                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
-                             "connections: %d (write-completion: %d "
-                             "keep-alive: %d lingering: %d)",
-                             connection_count, write_completion_q.count,
+                             "connections: %u (clogged: %u write-completion: %d "
+                             "keep-alive: %d lingering: %d suspended: %u)",
+                             apr_atomic_read32(&connection_count),
+                             apr_atomic_read32(&clogged_count),
+                             write_completion_q.count,
                              keepalive_q.count,
-                             linger_q.count + short_linger_q.count);
+                             apr_atomic_read32(&lingering_count),
+                             apr_atomic_read32(&suspended_count));
             }
         }
 
@@ -1539,7 +1611,7 @@ static void * APR_THREAD_FUNC listener_t
                 /* one of the sockets is readable */
                 struct timeout_queue *remove_from_q = &write_completion_q;
                 int blocking = 1;
-                cs = (event_conn_state_t *)pt->baton;
+                cs = (event_conn_state_t *) pt->baton;
                 switch (cs->pub.state) {
                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
                     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
@@ -1562,7 +1634,7 @@ static void * APR_THREAD_FUNC listener_t
                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
                                      "pollset remove failed");
-                        start_lingering_close(cs, NULL);
+                        start_lingering_close_nonblocking(cs, NULL);
                         break;
                     }
 
@@ -1572,7 +1644,7 @@ static void * APR_THREAD_FUNC listener_t
                      * re-connect to a different process.
                      */
                     if (!have_idle_worker) {
-                        start_lingering_close(cs, NULL);
+                        start_lingering_close_nonblocking(cs, NULL);
                         break;
                     }
                     rc = push2worker(out_pfd, event_pollset);
@@ -1663,6 +1735,7 @@ static void * APR_THREAD_FUNC listener_t
                     }
 
                     if (csd != NULL) {
+                        conns_this_child--;
                         rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
                         if (rc != APR_SUCCESS) {
                             /* trash the connection; we couldn't queue the connected
@@ -1735,14 +1808,15 @@ static void * APR_THREAD_FUNC listener_t
                                       timeout_time +
                                       ap_server_conf->keep_alive_timeout +
                                       apr_time_from_sec(2),
-                                      start_lingering_close);
+                                      start_lingering_close_nonblocking);
             }
             else {
                 process_timeout_queue(&keepalive_q, timeout_time,
-                                      start_lingering_close);
+                                      start_lingering_close_nonblocking);
             }
             /* Step 2: write completion timeouts */
-            process_timeout_queue(&write_completion_q, timeout_time, start_lingering_close);
+            process_timeout_queue(&write_completion_q, timeout_time,
+                                  start_lingering_close_nonblocking);
             /* Step 3: (normal) lingering close completion timeouts */
             process_timeout_queue(&linger_q, timeout_time, stop_lingering_close);
             /* Step 4: (short) lingering close completion timeouts */
@@ -1750,11 +1824,11 @@ static void * APR_THREAD_FUNC listener_t
 
             ps = ap_get_scoreboard_process(process_slot);
             ps->write_completion = write_completion_q.count;
-            ps->lingering_close = linger_q.count + short_linger_q.count;
             ps->keep_alive = keepalive_q.count;
 
             ps->connections = apr_atomic_read32(&connection_count);
-            /* XXX: should count CONN_STATE_SUSPENDED and set ps->suspended */
+            ps->suspended = apr_atomic_read32(&suspended_count);
+            ps->lingering_close = apr_atomic_read32(&lingering_count);
         }
         if (listeners_disabled && !workers_were_busy &&
             (int)apr_atomic_read32(&connection_count) <
@@ -1868,10 +1942,7 @@ static void *APR_THREAD_FUNC worker_thre
         else {
             is_idle = 0;
             worker_sockets[thread_slot] = csd;
-            rv = process_socket(thd, ptrans, csd, cs, eq, process_slot, thread_slot);
-            if (!rv) {
-                conns_this_child--;
-            }
+            process_socket(thd, ptrans, csd, cs, eq, process_slot, thread_slot);
             worker_sockets[thread_slot] = NULL;
         }
     }
@@ -1995,7 +2066,6 @@ static void *APR_THREAD_FUNC start_threa
 
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO()
                  "start_threads: Using %s", apr_pollset_method_name(event_pollset));
-
     worker_sockets = apr_pcalloc(pchild, threads_per_child
                                  * sizeof(apr_socket_t *));
 
@@ -2211,7 +2281,7 @@ static void child_main(int child_num_arg
     }
     else {
         /* coding a value of zero means infinity */
-        conns_this_child = INT_MAX;
+        conns_this_child = APR_INT32_MAX;
     }
 
     /* Setup worker threads */
@@ -2436,6 +2506,7 @@ static void perform_idle_server_maintena
         int any_dying_threads = 0;
         int any_dead_threads = 0;
         int all_dead_threads = 1;
+        int child_threads_active = 0;
 
         if (i >= retained->max_daemons_limit
             && totally_free_length == retained->idle_spawn_rate)
@@ -2470,10 +2541,11 @@ static void perform_idle_server_maintena
                     ++idle_thread_count;
                 }
                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
-                    ++active_thread_count;
+                    ++child_threads_active;
                 }
             }
         }
+        active_thread_count += child_threads_active;
         if (any_dead_threads
             && totally_free_length < retained->idle_spawn_rate
             && free_length < MAX_SPAWN_RATE
@@ -2497,6 +2569,9 @@ static void perform_idle_server_maintena
             }
             ++free_length;
         }
+        else if (child_threads_active == threads_per_child) {
+            had_healthy_child = 1;
+        }
         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
         if (!any_dying_threads) {
             last_non_dead = i;
@@ -2505,14 +2580,15 @@ static void perform_idle_server_maintena
     }
 
     if (retained->sick_child_detected) {
-        if (active_thread_count > 0) {
-            /* some child processes appear to be working.  don't kill the
-             * whole server.
+        if (had_healthy_child) {
+            /* Assume this is a transient error, even though it may not be.  Leave
+             * the server up in case it is able to serve some requests or the
+             * problem will be resolved.
              */
             retained->sick_child_detected = 0;
         }
         else {
-            /* looks like a basket case.  give up.
+            /* looks like a basket case, as no child ever fully initialized; give up.
              */
             shutdown_pending = 1;
             child_fatal = 1;
@@ -2652,6 +2728,11 @@ static void server_main_loop(int remaini
 
                 event_note_child_killed(-1, /* already out of the scoreboard */
                                         pid.pid, old_gen);
+                if (processed_status == APEXIT_CHILDSICK
+                    && old_gen == retained->my_generation) {
+                    /* resource shortage, minimize the fork rate */
+                    retained->idle_spawn_rate = 1;
+                }
 #if APR_HAS_OTHER_CHILD
             }
             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
@@ -2979,6 +3060,7 @@ static int event_pre_config(apr_pool_t *
     ap_daemons_limit = server_limit;
     threads_per_child = DEFAULT_THREADS_PER_CHILD;
     max_workers = ap_daemons_limit * threads_per_child;
+    had_healthy_child = 0;
     ap_extended_status = 0;
 
     return OK;