You are viewing a plain text version of this content. The canonical link for it is here.

Posted to cvs@httpd.apache.org by mt...@apache.org on 2009/03/30 10:08:00 UTC

svn commit: r759862 - /httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c

Author: mturk
Date: Mon Mar 30 08:07:59 2009
New Revision: 759862

URL: http://svn.apache.org/viewvc?rev=759862&view=rev
Log:
Use named watchdog for heartmonitor.
The watchdog has zero interval, leaving to the callback to determine the running loop.

Modified:
    httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c

Modified: httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c?rev=759862&r1=759861&r2=759862&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c (original)
+++ httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c Mon Mar 30 08:07:59 2009
@@ -19,16 +19,20 @@
 #include "http_log.h"
 #include "apr_strings.h"
 #include "apr_hash.h"
+#include "apr_time.h"
 #include "ap_mpm.h"
 #include "scoreboard.h"
+#include "mod_watchdog.h"
 
 
-#ifndef HN_UPDATE_SEC
+#ifndef HM_UPDATE_SEC
 /* How often we update the stats file */
 /* TODO: Make a runtime config */
-#define HN_UPDATE_SEC (5)
+#define HM_UPDATE_SEC (5)
 #endif
 
+#define HM_WATHCHDOG_NAME ("_heartmonitor_")
+
 module AP_MODULE_DECLARE_DATA heartmonitor_module;
 
 typedef struct hm_server_t
@@ -43,16 +47,15 @@
 {
     int active;
     const char *storage_path;
-    apr_proc_mutex_t *mutex;
-    const char *mutex_path;
+    ap_watchdog_t *watchdog;
+    apr_interval_time_t interval;
     apr_sockaddr_t *mcast_addr;
     apr_status_t status;
     volatile int keep_running;
-    apr_thread_mutex_t *start_mtx;
-    apr_thread_t *thread;
     apr_socket_t *sock;
     apr_pool_t *p;
     apr_hash_t *servers;
+    server_rec *s;
 } hm_ctx_t;
 
 static apr_status_t hm_listen(hm_ctx_t *ctx)
@@ -299,194 +302,114 @@
 #define apr_time_from_msec(x) (x * 1000)
 #endif
 
-static void* APR_THREAD_FUNC hm_worker(apr_thread_t *thd, void *data)
-{
-    apr_time_t last;
-    hm_ctx_t *ctx = (hm_ctx_t *) data;
-    apr_status_t rv;
-
-    ctx->p = apr_thread_pool_get(thd);
-    ctx->status = APR_SUCCESS;
-    ctx->keep_running = 1;
-    apr_thread_mutex_unlock(ctx->start_mtx);
-
-    while (ctx->keep_running) {
-        rv = apr_proc_mutex_trylock(ctx->mutex);
-        if (rv == APR_SUCCESS) {
-            break;
-        }
-        apr_sleep(apr_time_from_msec(200));
-    }
-
-    rv = hm_listen(ctx);
-
-    if (rv) {
-        ctx->status = rv;
-        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
-                     "Heartmonitor: Unable to listen for connections!");
-        apr_proc_mutex_unlock(ctx->mutex);
-        apr_thread_exit(ctx->thread, rv);
-        return NULL;
-    }
-
-
-    last = apr_time_now();
-    while (ctx->keep_running) {
-        int n;
-        apr_pool_t *p;
-        apr_pollfd_t pfd;
-        apr_interval_time_t timeout;
-        apr_time_t now;
-        apr_pool_create(&p, ctx->p);
-
-        now = apr_time_now();
-
-        if (apr_time_sec((now - last)) > HN_UPDATE_SEC) {
-            hm_update_stats(ctx, p);
-            apr_pool_clear(p);
-            last = now;
-        }
-
-        pfd.desc_type = APR_POLL_SOCKET;
-        pfd.desc.s = ctx->sock;
-        pfd.p = p;
-        pfd.reqevents = APR_POLLIN;
-
-        timeout = apr_time_from_sec(1);
-
-        rv = apr_poll(&pfd, 1, &n, timeout);
-
-        if (!ctx->keep_running) {
-            break;
-        }
-
-        if (rv) {
-            apr_pool_destroy(p);
-            continue;
-        }
-
-        if (pfd.rtnevents & APR_POLLIN) {
-            hm_recv(ctx, p);
-        }
-
-        apr_pool_destroy(p);
-    }
-
-    apr_proc_mutex_unlock(ctx->mutex);
-    apr_thread_exit(ctx->thread, APR_SUCCESS);
-
-    return NULL;
-}
-
-static apr_status_t hm_pool_cleanup(void *baton)
-{
-    apr_status_t rv;
-    hm_ctx_t *ctx = (hm_ctx_t *) baton;
-
-    ctx->keep_running = 0;
 
-    apr_thread_join(&rv, ctx->thread);
-
-    return rv;
-}
-
-static void start_hm_worker(apr_pool_t *p, hm_ctx_t *ctx)
+static apr_status_t hm_watchdog_callback(int state, void *data,
+                                         apr_pool_t *pool)
 {
-    apr_status_t rv;
-
-    rv = apr_thread_mutex_create(&ctx->start_mtx, APR_THREAD_MUTEX_UNNESTED,
-                                 p);
-
-    if (rv) {
-        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
-                     "Heartmonitor: apr_thread_mutex_create failed");
-        ctx->status = rv;
-        return;
-    }
-
-    /* This mutex fixes problems with a fast start/fast end, where the pool 
-     * cleanup was being invoked before the thread completely spawned. 
-     */
-    apr_thread_mutex_lock(ctx->start_mtx);
-
-    apr_pool_cleanup_register(p, ctx, hm_pool_cleanup, apr_pool_cleanup_null);
-
-    rv = apr_thread_create(&ctx->thread, NULL, hm_worker, ctx, p);
-    if (rv) {
-        apr_pool_cleanup_kill(p, ctx, hm_pool_cleanup);
-        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
-                     "Heartmonitor: apr_thread_create failed");
-        ctx->status = rv;
-    }
-
-    apr_thread_mutex_lock(ctx->start_mtx);
-    apr_thread_mutex_unlock(ctx->start_mtx);
-    apr_thread_mutex_destroy(ctx->start_mtx);
-}
-
-static void hm_child_init(apr_pool_t *p, server_rec *s)
-{
-    hm_ctx_t *ctx =
-        ap_get_module_config(s->module_config, &heartmonitor_module);
+    apr_status_t rv = APR_SUCCESS;
+    apr_time_t cur, now;
+    hm_ctx_t *ctx = (hm_ctx_t *)data;
 
     if (!ctx->active) {
-        return;
+        return rv;
     }
 
-    apr_proc_mutex_child_init(&ctx->mutex, ctx->mutex_path, p);
-
-    ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s,
-                 "Heartmonitor: Starting Listener Thread. mcast=%pI",
-                 ctx->mcast_addr);
-
-    ctx->status = APR_EGENERAL;
-
-    start_hm_worker(p, ctx);
-
-    if (ctx->status) {
-        ap_log_error(APLOG_MARK, APLOG_CRIT, ctx->status, s,
-                     "Heartmonitor: Failed to start listener thread.");
-        return;
+    switch (state) {
+        case AP_WATCHDOG_STATE_STARTING:
+            rv = hm_listen(ctx);
+            if (rv) {
+                ctx->status = rv;
+                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
+                             "Heartmonitor: Unable to listen for connections!");
+            }
+            else {
+                ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ctx->s,
+                             "Heartmonitor: %s listener started.",
+                             HM_WATHCHDOG_NAME);
+            }
+        break;
+        case AP_WATCHDOG_STATE_RUNNING:
+            hm_update_stats(ctx, pool);
+            cur = now = apr_time_sec(apr_time_now());
+            /* TODO: Insted HN_UPDATE_SEC use
+             * the ctx->interval
+             */
+            while ((now - cur) < apr_time_sec(ctx->interval)) {
+                int n;
+                apr_status_t rc;
+                apr_pool_t *p;
+                apr_pollfd_t pfd;
+                apr_interval_time_t timeout;
+
+                apr_pool_create(&p, pool);
+
+                pfd.desc_type = APR_POLL_SOCKET;
+                pfd.desc.s = ctx->sock;
+                pfd.p = p;
+                pfd.reqevents = APR_POLLIN;
+
+                timeout = apr_time_from_sec(1);
+
+                rc = apr_poll(&pfd, 1, &n, timeout);
+
+                if (!ctx->keep_running) {
+                    break;
+                }
+                if (rc == APR_SUCCESS && (pfd.rtnevents & APR_POLLIN)) {
+                    hm_recv(ctx, p);
+                }
+                now = apr_time_sec(apr_time_now());
+                apr_pool_destroy(p);
+            }
+        break;
+        case AP_WATCHDOG_STATE_STOPPING:
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ctx->s,
+                         "Heartmonitor: stopping %s listener.",
+                         HM_WATHCHDOG_NAME);
+
+            ctx->keep_running = 0;
+            if (ctx->sock) {
+                apr_socket_close(ctx->sock);
+                ctx->sock = NULL;
+            }
+        break;
     }
-
-    return;
+    return rv;
 }
 
 static int hm_post_config(apr_pool_t *p, apr_pool_t *plog,
                           apr_pool_t *ptemp, server_rec *s)
 {
-    apr_lockmech_e mech;
     apr_status_t rv;
     hm_ctx_t *ctx = ap_get_module_config(s->module_config,
                                          &heartmonitor_module);
 
-
     if (!ctx->active) {
         return OK;
     }
-
-#if APR_HAS_FCNTL_SERIALIZE
-    mech = APR_LOCK_FCNTL;
-#else
-#if APR_HAS_FLOCK_SERIALIZE
-    mech = APR_LOCK_FLOCK;
-#else
-#error port me to a non crap platform.
-#endif
-#endif
-    
-    rv = apr_proc_mutex_create(&ctx->mutex,
-                                            ctx->mutex_path,
-                                            mech,
-                                            p);
-
+    apr_pool_create(&ctx->p, p);
+    rv = ap_watchdog_get_instance(&ctx->watchdog,
+                                  HM_WATHCHDOG_NAME,
+                                  0, 1, p);
     if (rv) {
         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, s,
-                     "Heartmonitor: Failed to create listener "
-                     "mutex at %s (type=%d)", ctx->mutex_path,
-                     mech);
+                     "Heartmonitor: Failed to create watchdog "
+                     "instance (%s)", HM_WATHCHDOG_NAME);
         return !OK;
     }
+    /* Register a callback with zero interval. */
+    rv = ap_watchdog_register_callback(ctx->watchdog,
+                                       0,
+                                       ctx,
+                                       hm_watchdog_callback);
+    if (rv) {
+        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, s,
+                     "Heartmonitor: Failed to register watchdog "
+                     "callback (%s)", HM_WATHCHDOG_NAME);
+        return !OK;
+    }
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
+                 "Heartmonitor: wd callback %s", HM_WATHCHDOG_NAME);
 
     return OK;
 }
@@ -494,7 +417,6 @@
 static void hm_register_hooks(apr_pool_t *p)
 {
     ap_hook_post_config(hm_post_config, NULL, NULL, APR_HOOK_MIDDLE);
-    ap_hook_child_init(hm_child_init, NULL, NULL, APR_HOOK_MIDDLE);
 }
 
 static void *hm_create_config(apr_pool_t *p, server_rec *s)
@@ -503,9 +425,10 @@
 
     ctx->active = 0;
     ctx->storage_path = ap_server_root_relative(p, "logs/hb.dat");
-    ctx->mutex_path = 
-        ap_server_root_relative(p, apr_pstrcat(p, ctx->storage_path, ".hm-lock", NULL));
-
+    /* TODO: Add directive for tuning the update interval
+     */
+    ctx->interval = apr_time_from_sec(HM_UPDATE_SEC);
+    ctx->s = s;
     return ctx;
 }
 
@@ -523,8 +446,6 @@
     }
 
     ctx->storage_path = ap_server_root_relative(p, path);
-    ctx->mutex_path =
-        ap_server_root_relative(p, apr_pstrcat(p, path, ".hm-lock", NULL));
 
     return NULL;
 }

Re: svn commit: r759862 - /httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c

Posted by Jim Jagielski <ji...@jaguNET.com>.

On Mar 30, 2009, at 2:39 PM, Ruediger Pluem wrote:
>
> IMHO we should do apr_pool_create *once * before the loop and
> apr_pool_destroy *after* the lookp. In the loop we should only use
> apr_pool_clear. apr_pool_create and apr_pool_destroy require locking
> which seems to be an unneeded overhead here.
>

+1

Re: svn commit: r759862 - /httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c

Posted by Ruediger Pluem <rp...@apache.org>.


On 03/30/2009 10:08 AM, mturk@apache.org wrote:
> Author: mturk
> Date: Mon Mar 30 08:07:59 2009
> New Revision: 759862
> 
> URL: http://svn.apache.org/viewvc?rev=759862&view=rev
> Log:
> Use named watchdog for heartmonitor.
> The watchdog has zero interval, leaving to the callback to determine the running loop.
> 
> Modified:
>     httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
> 
> Modified: httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c?rev=759862&r1=759861&r2=759862&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c (original)
> +++ httpd/httpd/trunk/modules/cluster/mod_heartmonitor.c Mon Mar 30 08:07:59 2009

> @@ -299,194 +302,114 @@
>  #define apr_time_from_msec(x) (x * 1000)
>  #endif
>  
> -static void* APR_THREAD_FUNC hm_worker(apr_thread_t *thd, void *data)
> -{
> -    apr_time_t last;
> -    hm_ctx_t *ctx = (hm_ctx_t *) data;
> -    apr_status_t rv;
> -
> -    ctx->p = apr_thread_pool_get(thd);
> -    ctx->status = APR_SUCCESS;
> -    ctx->keep_running = 1;
> -    apr_thread_mutex_unlock(ctx->start_mtx);
> -
> -    while (ctx->keep_running) {
> -        rv = apr_proc_mutex_trylock(ctx->mutex);
> -        if (rv == APR_SUCCESS) {
> -            break;
> -        }
> -        apr_sleep(apr_time_from_msec(200));
> -    }
> -
> -    rv = hm_listen(ctx);
> -
> -    if (rv) {
> -        ctx->status = rv;
> -        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
> -                     "Heartmonitor: Unable to listen for connections!");
> -        apr_proc_mutex_unlock(ctx->mutex);
> -        apr_thread_exit(ctx->thread, rv);
> -        return NULL;
> -    }
> -
> -
> -    last = apr_time_now();
> -    while (ctx->keep_running) {
> -        int n;
> -        apr_pool_t *p;
> -        apr_pollfd_t pfd;
> -        apr_interval_time_t timeout;
> -        apr_time_t now;
> -        apr_pool_create(&p, ctx->p);
> -
> -        now = apr_time_now();
> -
> -        if (apr_time_sec((now - last)) > HN_UPDATE_SEC) {
> -            hm_update_stats(ctx, p);
> -            apr_pool_clear(p);
> -            last = now;
> -        }
> -
> -        pfd.desc_type = APR_POLL_SOCKET;
> -        pfd.desc.s = ctx->sock;
> -        pfd.p = p;
> -        pfd.reqevents = APR_POLLIN;
> -
> -        timeout = apr_time_from_sec(1);
> -
> -        rv = apr_poll(&pfd, 1, &n, timeout);
> -
> -        if (!ctx->keep_running) {
> -            break;
> -        }
> -
> -        if (rv) {
> -            apr_pool_destroy(p);
> -            continue;
> -        }
> -
> -        if (pfd.rtnevents & APR_POLLIN) {
> -            hm_recv(ctx, p);
> -        }
> -
> -        apr_pool_destroy(p);
> -    }
> -
> -    apr_proc_mutex_unlock(ctx->mutex);
> -    apr_thread_exit(ctx->thread, APR_SUCCESS);
> -
> -    return NULL;
> -}
> -
> -static apr_status_t hm_pool_cleanup(void *baton)
> -{
> -    apr_status_t rv;
> -    hm_ctx_t *ctx = (hm_ctx_t *) baton;
> -
> -    ctx->keep_running = 0;
>  
> -    apr_thread_join(&rv, ctx->thread);
> -
> -    return rv;
> -}
> -
> -static void start_hm_worker(apr_pool_t *p, hm_ctx_t *ctx)
> +static apr_status_t hm_watchdog_callback(int state, void *data,
> +                                         apr_pool_t *pool)
>  {
> -    apr_status_t rv;
> -
> -    rv = apr_thread_mutex_create(&ctx->start_mtx, APR_THREAD_MUTEX_UNNESTED,
> -                                 p);
> -
> -    if (rv) {
> -        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
> -                     "Heartmonitor: apr_thread_mutex_create failed");
> -        ctx->status = rv;
> -        return;
> -    }
> -
> -    /* This mutex fixes problems with a fast start/fast end, where the pool 
> -     * cleanup was being invoked before the thread completely spawned. 
> -     */
> -    apr_thread_mutex_lock(ctx->start_mtx);
> -
> -    apr_pool_cleanup_register(p, ctx, hm_pool_cleanup, apr_pool_cleanup_null);
> -
> -    rv = apr_thread_create(&ctx->thread, NULL, hm_worker, ctx, p);
> -    if (rv) {
> -        apr_pool_cleanup_kill(p, ctx, hm_pool_cleanup);
> -        ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
> -                     "Heartmonitor: apr_thread_create failed");
> -        ctx->status = rv;
> -    }
> -
> -    apr_thread_mutex_lock(ctx->start_mtx);
> -    apr_thread_mutex_unlock(ctx->start_mtx);
> -    apr_thread_mutex_destroy(ctx->start_mtx);
> -}
> -
> -static void hm_child_init(apr_pool_t *p, server_rec *s)
> -{
> -    hm_ctx_t *ctx =
> -        ap_get_module_config(s->module_config, &heartmonitor_module);
> +    apr_status_t rv = APR_SUCCESS;
> +    apr_time_t cur, now;
> +    hm_ctx_t *ctx = (hm_ctx_t *)data;
>  
>      if (!ctx->active) {
> -        return;
> +        return rv;
>      }
>  
> -    apr_proc_mutex_child_init(&ctx->mutex, ctx->mutex_path, p);
> -
> -    ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, s,
> -                 "Heartmonitor: Starting Listener Thread. mcast=%pI",
> -                 ctx->mcast_addr);
> -
> -    ctx->status = APR_EGENERAL;
> -
> -    start_hm_worker(p, ctx);
> -
> -    if (ctx->status) {
> -        ap_log_error(APLOG_MARK, APLOG_CRIT, ctx->status, s,
> -                     "Heartmonitor: Failed to start listener thread.");
> -        return;
> +    switch (state) {
> +        case AP_WATCHDOG_STATE_STARTING:
> +            rv = hm_listen(ctx);
> +            if (rv) {
> +                ctx->status = rv;
> +                ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
> +                             "Heartmonitor: Unable to listen for connections!");
> +            }
> +            else {
> +                ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ctx->s,
> +                             "Heartmonitor: %s listener started.",
> +                             HM_WATHCHDOG_NAME);
> +            }
> +        break;
> +        case AP_WATCHDOG_STATE_RUNNING:
> +            hm_update_stats(ctx, pool);
> +            cur = now = apr_time_sec(apr_time_now());
> +            /* TODO: Insted HN_UPDATE_SEC use
> +             * the ctx->interval
> +             */
> +            while ((now - cur) < apr_time_sec(ctx->interval)) {
> +                int n;
> +                apr_status_t rc;
> +                apr_pool_t *p;
> +                apr_pollfd_t pfd;
> +                apr_interval_time_t timeout;
> +
> +                apr_pool_create(&p, pool);
> +
> +                pfd.desc_type = APR_POLL_SOCKET;
> +                pfd.desc.s = ctx->sock;
> +                pfd.p = p;
> +                pfd.reqevents = APR_POLLIN;
> +
> +                timeout = apr_time_from_sec(1);
> +
> +                rc = apr_poll(&pfd, 1, &n, timeout);
> +
> +                if (!ctx->keep_running) {
> +                    break;
> +                }
> +                if (rc == APR_SUCCESS && (pfd.rtnevents & APR_POLLIN)) {
> +                    hm_recv(ctx, p);
> +                }
> +                now = apr_time_sec(apr_time_now());
> +                apr_pool_destroy(p);
> +            }

IMHO we should do apr_pool_create *once * before the loop and
apr_pool_destroy *after* the lookp. In the loop we should only use
apr_pool_clear. apr_pool_create and apr_pool_destroy require locking
which seems to be an unneeded overhead here.

Regards

Rüdiger