You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by rj...@apache.org on 2006/05/14 22:08:24 UTC

svn commit: r406411 - in /tomcat/connectors/trunk/jk/native/common: jk_lb_worker.c jk_lb_worker.h jk_shm.h jk_status.c

Author: rjung
Date: Sun May 14 13:08:24 2006
New Revision: 406411

URL: http://svn.apache.org/viewcvs?rev=406411&view=rev
Log:
Rework load balancer:
- add a global maintenance method, that is
  cyclically called in only one process.
- Only check for recovery during global maintenance
- Use lb_value for all strategies, change to 64-bit
- Decay lb_value during global maintenance
  for Request and Traffic strategies
- Use lb_mult to reflect balancing factors
- Add lb_mult to status worker
- Make all strategies use the same function find_best_byvalue()
- Set lb_value to max, when a worker recovers or
  is being started/enabled in the status worker
- Improve locking

Modified:
    tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
    tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
    tomcat/connectors/trunk/jk/native/common/jk_shm.h
    tomcat/connectors/trunk/jk/native/common/jk_status.c

Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c Sun May 14 13:08:24 2006
@@ -51,6 +51,73 @@
 typedef struct lb_endpoint lb_endpoint_t;
 
 
+/* Calculate the greatest common divisor of two positive integers */
+static int gcd(int a, int b)
+{
+    int r;
+    if (b > a) {
+        r = a;
+        a = b;
+        b = r;
+    }
+    while (b > 0) {
+        r = a % b;
+        a = b;
+        b = r;
+    }
+    return a;
+}
+
+/* Calculate the smallest common multiple of two positive integers */
+static jk_uint64_t scm(jk_uint64_t a, jk_uint64_t b)
+{
+    return a*b/gcd(a,b);
+}
+
+/* Update the load multipliers wrt. lb_factor */
+void update_mult(lb_worker_t *p, jk_logger_t *l)
+{
+    int i = 0;
+    jk_uint64_t s = 1;
+    JK_TRACE_ENTER(l);
+    for (i = 0; i < p->num_of_workers; i++) {
+        s = scm(s, p->lb_workers[i].s->lb_factor);
+    }
+    for (i = 0; i < p->num_of_workers; i++) {
+        p->lb_workers[i].s->lb_mult = s / p->lb_workers[i].s->lb_factor;
+        if (JK_IS_DEBUG_LEVEL(l))
+            jk_log(l, JK_LOG_DEBUG,
+                   "worker %s gets multiplicity %"
+                   JK_UINT64_T_FMT,
+                   p->lb_workers[i].s->name,
+                   p->lb_workers[i].s->lb_mult);
+    }
+    JK_TRACE_EXIT(l);
+}
+
+/* Get the correct lb_value when recovering/starting/enabling a worker */
+/* This function needs to be externally synchronized! */
+jk_uint64_t restart_value(lb_worker_t *p, jk_logger_t *l)
+{
+    int i = 0;
+    jk_uint64_t curmax = 0;
+    JK_TRACE_ENTER(l);
+    if (p->lbmethod != JK_LB_BYBUSYNESS) {
+        for (i = 0; i < p->num_of_workers; i++) {
+            if (p->lb_workers[i].s->lb_value > curmax) {
+                curmax = p->lb_workers[i].s->lb_value;
+            }
+        }
+    }
+    if (JK_IS_DEBUG_LEVEL(l))
+        jk_log(l, JK_LOG_DEBUG,
+               "restarting worker with lb_value %"
+               JK_UINT64_T_FMT,
+               curmax);
+    JK_TRACE_EXIT(l);
+    return curmax;
+}
+
 /* Retrieve the parameter with the given name                                */
 static char *get_path_param(jk_ws_service_t *s, const char *name)
 {
@@ -159,42 +226,116 @@
     }
 }
 
-static int JK_METHOD maintain_workers(jk_worker_t *p, jk_logger_t *l)
+/* If the worker is in error state run
+ * retry on that worker. It will be marked as
+ * operational if the retry timeout is elapsed.
+ * The worker might still be unusable, but we try
+ * anyway.
+ */
+static void recover_workers(lb_worker_t *p,
+                            jk_uint64_t curmax,
+                            jk_logger_t *l)
 {
-    unsigned int i = 0;
-    lb_worker_t *lb = (lb_worker_t *)p->worker_private;
-    for (i = 0; i < lb->num_of_workers; i++) {
-        if (lb->lb_workers[i].w->maintain) {
-            lb->lb_workers[i].w->maintain(lb->lb_workers[i].w, l);
+    int i;
+    time_t now = time(NULL);
+    int elapsed;
+    worker_record_t *w = NULL;
+    JK_TRACE_ENTER(l);
+
+    for (i = 0; i < p->num_of_workers; i++) {
+        w = &p->lb_workers[i];
+        if (JK_WORKER_IN_ERROR(w->s)) {
+            elapsed = (int)difftime(now, w->s->error_time);
+            if (elapsed <= p->s->recover_wait_time) {
+                if (JK_IS_DEBUG_LEVEL(l))
+                    jk_log(l, JK_LOG_DEBUG,
+                           "worker %s will recover in %d seconds",
+                           w->s->name, p->s->recover_wait_time - elapsed);
+            }
+            else {
+                if (JK_IS_DEBUG_LEVEL(l))
+                    jk_log(l, JK_LOG_DEBUG,
+                           "worker %s is marked for recovery",
+                           w->s->name);
+                w->s->lb_value = curmax;
+                w->s->in_recovering = JK_TRUE;
+                w->s->in_error_state = JK_FALSE;
+                w->s->is_busy = JK_FALSE;
+            }
         }
     }
-    return JK_TRUE;
+
+    JK_TRACE_EXIT(l);
 }
 
-static void retry_worker(worker_record_t *w,
-                         int recover_wait_time,
-                         jk_logger_t *l)
+/* Divide old load values by the decay factor, */
+/* such that older values get less important */
+/* for the routing decisions. */
+static jk_uint64_t decay_load(lb_worker_t *p,
+                              int exponent,
+                              jk_logger_t *l)
 {
-    int elapsed = (int)difftime(time(NULL), w->s->error_time);
+    int i;
+    jk_uint64_t curmax = 0;
     JK_TRACE_ENTER(l);
+    if (p->lbmethod != JK_LB_BYBUSYNESS) {
+        for (i = 0; i < p->num_of_workers; i++) {
+            p->lb_workers[i].s->lb_value >>= exponent;
+            if (p->lb_workers[i].s->lb_value > curmax) {
+                curmax = p->lb_workers[i].s->lb_value;
+            }
+        }
+    }
+    JK_TRACE_EXIT(l);
+    return curmax;
+}
+
+static int JK_METHOD maintain_workers(jk_worker_t *p, jk_logger_t *l)
+{
+    unsigned int i = 0;
+    jk_uint64_t curmax = 0;
+    int delta;
+    time_t now = time(NULL);
+    JK_TRACE_ENTER(l);
+
+    if (p && p->worker_private) {
+        lb_worker_t *lb = (lb_worker_t *)p->worker_private;
+
+        for (i = 0; i < lb->num_of_workers; i++) {
+            if (lb->lb_workers[i].w->maintain) {
+                lb->lb_workers[i].w->maintain(lb->lb_workers[i].w, l);
+            }
+        }
+
+        jk_shm_lock();
+
+/* Now we check for global maintenance (once for all processes).
+ * Checking workers for recovery and applying decay to the
+ * load values should not be done by each process individually.
+ * Therefore we globally sync and we use a global timestamp.
+ * Since it's possible that we come here a few milliseconds
+ * before the interval has passed, we allow a little tolerance.
+ */
+        delta = difftime(now, lb->s->last_maintain_time) + JK_LB_MAINTAIN_TOLERANCE;
+        if (delta >= lb->maintain_time) {
+            lb->s->last_maintain_time = now;
+            if (JK_IS_DEBUG_LEVEL(l))
+                jk_log(l, JK_LOG_DEBUG,
+                       "decay with 2^%d",
+                       JK_LB_DECAY_MULT * delta / lb->maintain_time);
+            curmax = decay_load(lb, JK_LB_DECAY_MULT * delta / lb->maintain_time, l);
+            recover_workers(lb, curmax, l);
+        }
+
+        jk_shm_unlock();
 
-    if (elapsed <= recover_wait_time) {
-        if (JK_IS_DEBUG_LEVEL(l))
-            jk_log(l, JK_LOG_DEBUG,
-                    "worker %s will recover in %d seconds",
-                    w->s->name, recover_wait_time - elapsed);
     }
     else {
-        if (JK_IS_DEBUG_LEVEL(l))
-            jk_log(l, JK_LOG_DEBUG,
-                    "worker %s is marked for recover",
-                    w->s->name);
-        w->s->in_recovering  = JK_TRUE;
-        w->s->in_error_state = JK_FALSE;
-        w->s->is_busy = JK_FALSE;
+        JK_LOG_NULL_PARAMS(l);
     }
 
     JK_TRACE_EXIT(l);
+    return JK_TRUE;
 }
 
 static worker_record_t *find_by_session(lb_worker_t *p,
@@ -220,27 +361,10 @@
                                            jk_logger_t *l)
 {
     unsigned int i;
-    int total_factor = 0;
-    jk_uint64_t mytraffic = 0;
     jk_uint64_t curmin = 0;
-    int bfn = 1;
-    int bfd = 1;
 
     worker_record_t *candidate = NULL;
 
-    if (p->lbmethod == JK_LB_BYTRAFFIC) {
-        double diff;
-        time_t now = time(NULL);
-        /* Update transfer rate for each worker */
-        for (i = 0; i < p->num_of_workers; i++) {
-            diff = difftime(now, p->lb_workers[i].s->service_time);
-            if (diff > JK_SERVICE_TRANSFER_INTERVAL) {
-                p->lb_workers[i].s->service_time = now;
-                p->lb_workers[i].s->readed /= JK_SERVICE_TRANSFER_INTERVAL;
-                p->lb_workers[i].s->transferred /= JK_SERVICE_TRANSFER_INTERVAL;
-            }
-        }
-    }
     /* First try to see if we have available candidate */
     for (i = 0; i < p->num_of_workers; i++) {
         /* Skip all workers that are not member of domain */
@@ -248,41 +372,17 @@
             strcmp(p->lb_workers[i].s->domain, domain))
             continue;
         /* Take into calculation only the workers that are
-         * not in error state, stopped or not disabled.
+         * not in error state, stopped, disabled or busy.
          */
         if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
-            if (p->lbmethod == JK_LB_BYREQUESTS) {
-                p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
-                total_factor += p->lb_workers[i].s->lb_factor;
-                if (!candidate || p->lb_workers[i].s->lb_value > candidate->s->lb_value)
-                    candidate = &p->lb_workers[i];
-            }
-            else if (p->lbmethod == JK_LB_BYTRAFFIC) {
-                mytraffic = (p->lb_workers[i].s->transferred +
-                             p->lb_workers[i].s->readed ) / p->lb_workers[i].s->lb_factor;
-                if (!candidate || mytraffic < curmin) {
-                    candidate = &p->lb_workers[i];
-                    curmin = mytraffic;
-                }
-            }
-            else {
-                /* compare rational numbers: (a/b) < (c/d) iff a*d < c*b
-    			 */
-                int left  = p->lb_workers[i].s->busy * bfd;
-                int right = bfn * p->lb_workers[i].s->lb_factor;
-
-                if (!candidate || (left < right)) {
-                    candidate = &p->lb_workers[i];
-                    bfn = p->lb_workers[i].s->busy;
-                    bfd = p->lb_workers[i].s->lb_factor;
-                }
+            if (!candidate || p->lb_workers[i].s->lb_value < curmin) {
+                candidate = &p->lb_workers[i];
+                curmin = p->lb_workers[i].s->lb_value;
             }
         }
     }
 
     if (candidate) {
-        if (p->lbmethod == JK_LB_BYREQUESTS)
-            candidate->s->lb_value -= total_factor;
         candidate->r = &(candidate->s->domain[0]);
     }
 
@@ -290,130 +390,31 @@
 }
 
 
-static worker_record_t *find_best_byrequests(lb_worker_t *p,
-                                             jk_logger_t *l)
-{
-    unsigned int i;
-    int total_factor = 0;
-    worker_record_t *candidate = NULL;
-
-    /* First try to see if we have available candidate */
-    for (i = 0; i < p->num_of_workers; i++) {
-        /* If the worker is in error state run
-         * retry on that worker. It will be marked as
-         * operational if the retry timeout is elapsed.
-         * The worker might still be unusable, but we try
-         * anyway.
-         */
-        if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
-            retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
-        }
-        /* Take into calculation only the workers that are
-         * not in error state, stopped or not disabled.
-         */
-        if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
-            p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
-            total_factor += p->lb_workers[i].s->lb_factor;
-            if (!candidate || p->lb_workers[i].s->lb_value > candidate->s->lb_value)
-                candidate = &p->lb_workers[i];
-        }
-    }
-
-    if (candidate)
-        candidate->s->lb_value -= total_factor;
-
-    return candidate;
-}
-
-static worker_record_t *find_best_bytraffic(lb_worker_t *p,
-                                             jk_logger_t *l)
-{
-    unsigned int i;
-    jk_uint64_t mytraffic = 0;
-    jk_uint64_t curmin = 0;
-    worker_record_t *candidate = NULL;
-    double diff;
-    time_t now = time(NULL);
-
-    for (i = 0; i < p->num_of_workers; i++) {
-        diff = difftime(now, p->lb_workers[i].s->service_time);
-        if (diff > JK_SERVICE_TRANSFER_INTERVAL) {
-            p->lb_workers[i].s->service_time = now;
-            p->lb_workers[i].s->readed /= JK_SERVICE_TRANSFER_INTERVAL;
-            p->lb_workers[i].s->transferred /= JK_SERVICE_TRANSFER_INTERVAL;
-        }
-    }
-    /* First try to see if we have available candidate */
-    for (i = 0; i < p->num_of_workers; i++) {
-        /* If the worker is in error state run
-         * retry on that worker. It will be marked as
-         * operational if the retry timeout is elapsed.
-         * The worker might still be unusable, but we try
-         * anyway.
-         */
-        if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
-            retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
-        }
-        /* Take into calculation only the workers that are
-         * not in error state, stopped or not disabled.
-         */
-        if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
-            mytraffic = (p->lb_workers[i].s->transferred/p->lb_workers[i].s->lb_factor) +
-                        (p->lb_workers[i].s->readed/p->lb_workers[i].s->lb_factor);
-            if (!candidate || mytraffic < curmin) {
-                candidate = &p->lb_workers[i];
-                curmin = mytraffic;
-            }
-        }
-    }
-    return candidate;
-}
-
-static worker_record_t *find_best_bybusyness(lb_worker_t *p,
-                                             jk_logger_t *l)
+static worker_record_t *find_best_byvalue(lb_worker_t *p,
+                                          jk_logger_t *l)
 {
     static unsigned int next_offset = 0;
     unsigned int i;
     unsigned int j;
     unsigned int offset;
-    int bfn = 1;  /* Numerator of best busy factor */
-    int bfd = 1;  /* Denominator of best busy factor */
-
-    int left; /* left and right are used to compare rational numbers */
-    int right;
+    jk_uint64_t curmin = 0;
 
     /* find the least busy worker */
     worker_record_t *candidate = NULL;
 
     offset = next_offset;
 
-    /* First try to see if we have available candidate
-	 */
-    for (j = 0; j < p->num_of_workers; j++) {
-        i = (j + offset) % p->num_of_workers;
-
-        /* If the worker is in error state run
-         * retry on that worker. It will be marked as
-         * operational if the retry timeout is elapsed.
-         * The worker might still be unusable, but we try
-         * anyway.
-         */
-        if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
-            retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
-        }
+    /* First try to see if we have available candidate */
+    for (j = offset; j < p->num_of_workers + offset; j++) {
+        i = j % p->num_of_workers;
+
         /* Take into calculation only the workers that are
-         * not in error state, stopped or not disabled.
+         * not in error state, stopped, disabled or busy.
          */
         if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
-            /* compare rational numbers: (a/b) < (c/d) iff a*d < c*b
-			 */
-            left  = p->lb_workers[i].s->busy * bfd;
-            right = bfn * p->lb_workers[i].s->lb_factor;
-
-            if (!candidate || (left < right)) {
+            if (!candidate || (p->lb_workers[i].s->lb_value < curmin)) {
                 candidate = &p->lb_workers[i];
-                bfn = p->lb_workers[i].s->busy;
-                bfd = p->lb_workers[i].s->lb_factor;
+                curmin = p->lb_workers[i].s->lb_value;
                 next_offset = i + 1;
             }
         }
@@ -426,7 +427,6 @@
                                              jk_logger_t *l)
 {
     unsigned int i;
-    int total_factor = 0;
     int uses_domain  = 0;
     worker_record_t *candidate = NULL;
 
@@ -436,9 +436,6 @@
         candidate = find_best_bydomain(p, name, l);
     }
     if (candidate) {
-        if (JK_WORKER_IN_ERROR(candidate->s)) {
-            retry_worker(candidate, p->s->recover_wait_time, l);
-        }
         if (candidate->s->in_error_state || candidate->s->is_stopped ) {
             /* We have a worker that is error state or stopped.
              * If it has a redirection set use that redirection worker.
@@ -458,21 +455,6 @@
                 candidate = NULL;
         }
     }
-    if (candidate && !uses_domain &&
-        p->lbmethod == JK_LB_BYREQUESTS) {
-
-        for (i = 0; i < p->num_of_workers; i++) {
-            if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
-                /* Skip all workers that are not member of candidate domain */
-                if (*candidate->s->domain &&
-                    strcmp(p->lb_workers[i].s->domain, candidate->s->domain))
-                    continue;
-                p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
-                total_factor += p->lb_workers[i].s->lb_factor;
-            }
-        }
-        candidate->s->lb_value -= total_factor;
-    }
     return candidate;
 }
 
@@ -499,12 +481,7 @@
 {
     worker_record_t *rc = NULL;
 
-    if (p->lbmethod == JK_LB_BYREQUESTS)
-        rc = find_best_byrequests(p, l);
-    else if (p->lbmethod == JK_LB_BYTRAFFIC)
-        rc = find_best_bytraffic(p, l);
-    else if (p->lbmethod == JK_LB_BYBUSYNESS)
-        rc = find_best_bybusyness(p, l);
+    rc = find_best_byvalue(p, l);
     /* By default use worker name as session route */
     if (rc)
         rc->r = &(rc->s->name[0]);
@@ -527,10 +504,6 @@
         /* No need to find the best worker
          * if there is a single one
          */
-        if (JK_WORKER_IN_ERROR(p->lb_workers[0].s)) {
-            retry_worker(&p->lb_workers[0], p->s->recover_wait_time, l);
-        }
-        /* Check if worker is marked for retry */
         if(!p->lb_workers[0].s->in_error_state && !p->lb_workers[0].s->is_stopped) {
             p->lb_workers[0].r = &(p->lb_workers[0].s->name[0]);
             JK_TRACE_EXIT(l);
@@ -564,7 +537,7 @@
         if (JK_IS_DEBUG_LEVEL(l)) {
             jk_log(l, JK_LOG_DEBUG,
                    "total sessionid is %s",
-                    sessionid ? sessionid : "empty");
+                   sessionid ? sessionid : "empty");
         }
         while (sessionid) {
             char *next = strchr(sessionid, ';');
@@ -582,7 +555,7 @@
                 if (JK_IS_DEBUG_LEVEL(l))
                     jk_log(l, JK_LOG_DEBUG,
                            "searching worker for session route %s",
-                            session_route);
+                           session_route);
 
                 /* We have a session route. Whow! */
                 rc = find_bysession_route(p, session_route, l);
@@ -612,7 +585,7 @@
             }
             jk_log(l, JK_LOG_INFO,
                    "all workers are in error state for session %s",
-                    session);
+                   session);
             JK_TRACE_EXIT(l);
             return NULL;
         }
@@ -691,6 +664,10 @@
                     if (p->worker->s->busy > p->worker->s->max_busy)
                         p->worker->s->max_busy = p->worker->s->busy;
                     rec->s->busy++;
+                    if (p->worker->lbmethod == JK_LB_BYREQUESTS)
+                        rec->s->lb_value += rec->s->lb_mult;
+                    else if (p->worker->lbmethod == JK_LB_BYBUSYNESS)
+                        rec->s->lb_value += rec->s->lb_mult;
                     if (rec->s->busy > rec->s->max_busy)
                         rec->s->max_busy = rec->s->busy;
                     if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
@@ -707,6 +684,25 @@
                     /* Update partial reads and writes if any */
                     rec->s->readed += rd;
                     rec->s->transferred += wr;
+                    if (p->worker->lbmethod == JK_LB_BYTRAFFIC)
+                        rec->s->lb_value += (rd+wr)*rec->s->lb_mult;
+                    else if (p->worker->lbmethod == JK_LB_BYBUSYNESS)
+                        if (rec->s->lb_value >= rec->s->lb_mult)
+                            rec->s->lb_value -= rec->s->lb_mult;
+                        else {
+                            rec->s->lb_value = 0;
+                            if (JK_IS_DEBUG_LEVEL(l))
+                                jk_log(l, JK_LOG_DEBUG,
+                                       "worker %s has load value to low (%"
+                                       JK_UINT64_T_FMT
+                                       " < %"
+                                       JK_UINT64_T_FMT
+                                       ") ",
+                                       "- correcting to 0",
+                                       rec->s->name,
+                                       rec->s->lb_value,
+                                       rec->s->lb_mult);
+                        }
 
                     /* When returning the endpoint mark the worker as not busy.
                      * We have at least one endpoint free
@@ -919,7 +915,7 @@
                 if ((s = jk_get_worker_redirect(props, worker_names[i], NULL)))
                     strncpy(p->lb_workers[i].s->redirect, s, JK_SHM_STR_SIZ);
 
-                p->lb_workers[i].s->lb_value = p->lb_workers[i].s->lb_factor;
+                p->lb_workers[i].s->lb_value = 0;
                 p->lb_workers[i].s->in_error_state = JK_FALSE;
                 p->lb_workers[i].s->in_recovering = JK_FALSE;
                 p->lb_workers[i].s->is_busy = JK_FALSE;
@@ -949,14 +945,15 @@
                 close_workers(p, i, l);
             }
             else {
-                if (JK_IS_DEBUG_LEVEL(l)) {
-                    for (i = 0; i < num_of_workers; i++) {
+                for (i = 0; i < num_of_workers; i++) {
+                    if (JK_IS_DEBUG_LEVEL(l)) {
                         jk_log(l, JK_LOG_DEBUG,
                                "Balanced worker %i has name %s in domain %s",
                                i, p->lb_workers[i].s->name, p->lb_workers[i].s->domain);
                     }
                 }
                 p->num_of_workers = num_of_workers;
+                update_mult(p, l);
                 JK_TRACE_EXIT(l);
                 return JK_TRUE;
             }
@@ -984,6 +981,8 @@
                                                             WAIT_BEFORE_RECOVER);
     if (p->s->recover_wait_time < WAIT_BEFORE_RECOVER)
         p->s->recover_wait_time = WAIT_BEFORE_RECOVER;
+    p->maintain_time = jk_get_worker_maintain_time(props);
+    p->s->last_maintain_time = time(NULL);
 
     p->lbmethod = jk_get_lb_method(props, p->s->name);
     p->lblock   = jk_get_lb_lock(props, p->s->name);

Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h Sun May 14 13:08:24 2006
@@ -48,10 +48,14 @@
 #define JK_LB_LM_DEFAULT       ("Optimistic")
 #define JK_LB_LM_PESSIMISTIC   ("Pessimistic")
 
-/*
- * Time to wait before retry...
- */
+/* Time to wait before retry. */
 #define WAIT_BEFORE_RECOVER   (60)
+/* We accept doing global maintenance if we are */
+/* JK_LB_MAINTAIN_TOLERANCE seconds early. */
+#define JK_LB_MAINTAIN_TOLERANCE (2)
+/* We divide load values by 2^x during global maintenance. */
+/* The exponent x is JK_LB_DECAY_MULT*#MAINT_INTV_SINCE_LAST_MAINT */
+#define JK_LB_DECAY_MULT         (1)
 
 static const char *lb_method_type[] = {
     JK_LB_METHOD_REQUESTS,
@@ -77,6 +81,7 @@
     unsigned int num_of_workers;
     int          lbmethod;
     int          lblock;
+    time_t       maintain_time;
 
     jk_pool_t p;
     jk_pool_atom_t buf[TINY_POOL_SIZE];
@@ -91,6 +96,9 @@
 
 int JK_METHOD lb_worker_factory(jk_worker_t **w,
                                 const char *name, jk_logger_t *l);
+
+jk_uint64_t restart_value(lb_worker_t *p, jk_logger_t *l);
+void update_mult(lb_worker_t * p, jk_logger_t *l);
 
 #ifdef __cplusplus
 }

Modified: tomcat/connectors/trunk/jk/native/common/jk_shm.h
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_shm.h?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_shm.h (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_shm.h Sun May 14 13:08:24 2006
@@ -53,9 +53,6 @@
 #define JK_SHM_ALIGNMENT    64
 #define JK_SHM_ALIGN(x)     JK_ALIGN(x, JK_SHM_ALIGNMENT)
 
-/* Use 1 minute for measuring read/write data */
-#define JK_SERVICE_TRANSFER_INTERVAL    60
-
 /** jk shm worker record structure */
 struct jk_shm_worker
 {
@@ -78,8 +75,10 @@
     volatile int is_busy;
     /* Current lb factor */
     volatile int lb_factor;
+    /* Current lb reciprocal factor */
+    volatile jk_uint64_t lb_mult;
     /* Current lb value  */
-    volatile int lb_value;
+    volatile jk_uint64_t lb_value;
     volatile int in_error_state;
     volatile int in_recovering;
     int     sticky_session;
@@ -89,7 +88,7 @@
     /* Statistical data */
     volatile time_t  error_time;
     /* Service transfer rate time */
-    volatile time_t  service_time;
+    volatile time_t  last_maintain_time;
     /* Number of bytes read from remote */
     volatile jk_uint64_t readed;
     /* Number of bytes transferred to remote */

Modified: tomcat/connectors/trunk/jk/native/common/jk_status.c
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_status.c?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_status.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_status.c Sun May 14 13:08:24 2006
@@ -448,7 +448,7 @@
             jk_puts(s, "</tr>\n</table>\n<br/>\n");
             jk_puts(s, "<table><tr>"
                     "<th>Name</th><th>Type</th><th>Host</th><th>Addr</th>"
-                    "<th>Stat</th><th>F</th><th>V</th><th>Acc</th><th>Err</th>"
+                    "<th>Stat</th><th>F</th><th>M</th><th>V</th><th>Acc</th><th>Err</th>"
                     "<th>Wr</th><th>Rd</th><th>Busy</th><th>Max</th><th>RR</th><th>Cd</th></tr>\n");
             for (j = 0; j < lb->num_of_workers; j++) {
                 worker_record_t *wr = &(lb->lb_workers[j]);
@@ -472,7 +472,8 @@
                                           wr->s->is_busy),
                         "</td>", NULL);
                 jk_printf(s, "<td>%d</td>", wr->s->lb_factor);
-                jk_printf(s, "<td>%d</td>", wr->s->lb_value);
+                jk_printf(s, "<td>%" JK_UINT64_T_FMT "</td>", wr->s->lb_mult);
+                jk_printf(s, "<td>%" JK_UINT64_T_FMT "</td>", wr->s->lb_value);
                 jk_printf(s, "<td>%u</td>", wr->s->elected);
                 jk_printf(s, "<td>%u</td>", wr->s->errors);
                 jk_putv(s, "<td>", status_strfsize(wr->s->transferred, buf),
@@ -579,6 +580,7 @@
             "<tr><th>Addr</th><td>Backend Address info</td></tr>\n"
             "<tr><th>Stat</th><td>Worker status</td></tr>\n"
             "<tr><th>F</th><td>Load Balancer Factor</td></tr>\n"
+            "<tr><th>M</th><td>Load Balancer Multiplicity</td></tr>\n"
             "<tr><th>V</th><td>Load Balancer Value</td></tr>\n"
             "<tr><th>Acc</th><td>Number of requests</td></tr>\n"
             "<tr><th>Err</th><td>Number of failed requests</td></tr>\n"
@@ -652,7 +654,8 @@
                                   wr->s->is_busy) );
 
             jk_printf(s, " lbfactor=\"%d\"", wr->s->lb_factor);
-            jk_printf(s, " lbvalue=\"%d\"", wr->s->lb_value);
+            jk_printf(s, " lbmult=\"%" JK_UINT64_T_FMT "\"", wr->s->lb_mult);
+            jk_printf(s, " lbvalue=\"%" JK_UINT64_T_FMT "\"", wr->s->lb_value);
             jk_printf(s, " elected=\"%u\"", wr->s->elected);
             jk_printf(s, " errors=\"%u\"", wr->s->errors);
             jk_printf(s, " transferred=\"%" JK_UINT64_T_FMT "\"", wr->s->transferred);
@@ -678,6 +681,7 @@
                           const char *dworker, jk_logger_t *l)
 {
     int i;
+    int j;
     char buf[1024];
     const char *b;
     lb_worker_t *lb;
@@ -731,11 +735,30 @@
             strncpy(wr->s->domain, b, JK_SHM_STR_SIZ);
         else
             memset(wr->s->domain, 0, JK_SHM_STR_SIZ);
-        wr->s->is_disabled = status_bool("wd", s->query_string);
-        wr->s->is_stopped = status_bool("ws", s->query_string);
+        i = status_bool("wd", s->query_string);
+        j = status_bool("ws", s->query_string);
+        if (wr->s->is_disabled!=i || wr->s->is_stopped!=j) {
+            /* lock shared memory */
+            jk_shm_lock();
+            wr->s->is_disabled = i;
+            wr->s->is_stopped = j;
+            wr->s->lb_value = restart_value(lb, l);
+            if (i+j==0) {
+                jk_log(l, JK_LOG_INFO,
+                       "worker %s restarted in status worker with lb_value %"
+                       JK_UINT64_T_FMT,
+                       wr->s->name,
+                       wr->s->lb_value);
+            }
+            /* unlock the shared memory */
+            jk_shm_unlock();
+        }
         i = status_int("wf", s->query_string, wr->s->lb_factor);
-        if (i > 0)
+        if (i > 0 && wr->s->lb_factor != i) {
             wr->s->lb_factor = i;
+/* Recalculate the load multiplicators wrt. lb_factor */
+            update_mult(lb, l);
+        }
     }
 }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org