You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by rj...@apache.org on 2006/05/14 22:08:24 UTC
svn commit: r406411 - in /tomcat/connectors/trunk/jk/native/common:
jk_lb_worker.c jk_lb_worker.h jk_shm.h jk_status.c
Author: rjung
Date: Sun May 14 13:08:24 2006
New Revision: 406411
URL: http://svn.apache.org/viewcvs?rev=406411&view=rev
Log:
Rework load balancer:
- add a global maintenance method, that is
cyclically called in only one process.
- Only check for recovery during global maintenance
- Use lb_value for all strategies, change to 64-bit
- Decay lb_value during global maintenance
for Request and Traffic strategies
- Use lb_mult to reflect balancing factors
- Add lb_mult to status worker
- Make all strategies use the same function find_best_byvalue()
- Set lb_value to max, when a worker recovers or
is being started/enabled in the status worker
- Improve locking
Modified:
tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
tomcat/connectors/trunk/jk/native/common/jk_shm.h
tomcat/connectors/trunk/jk/native/common/jk_status.c
Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c Sun May 14 13:08:24 2006
@@ -51,6 +51,73 @@
typedef struct lb_endpoint lb_endpoint_t;
+/* Calculate the greatest common divisor of two positive integers */
+static int gcd(int a, int b)
+{
+ int r;
+ if (b > a) {
+ r = a;
+ a = b;
+ b = r;
+ }
+ while (b > 0) {
+ r = a % b;
+ a = b;
+ b = r;
+ }
+ return a;
+}
+
+/* Calculate the smallest common multiple of two positive integers */
+static jk_uint64_t scm(jk_uint64_t a, jk_uint64_t b)
+{
+ return a*b/gcd(a,b);
+}
+
+/* Update the load multipliers wrt. lb_factor */
+void update_mult(lb_worker_t *p, jk_logger_t *l)
+{
+ int i = 0;
+ jk_uint64_t s = 1;
+ JK_TRACE_ENTER(l);
+ for (i = 0; i < p->num_of_workers; i++) {
+ s = scm(s, p->lb_workers[i].s->lb_factor);
+ }
+ for (i = 0; i < p->num_of_workers; i++) {
+ p->lb_workers[i].s->lb_mult = s / p->lb_workers[i].s->lb_factor;
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "worker %s gets multiplicity %"
+ JK_UINT64_T_FMT,
+ p->lb_workers[i].s->name,
+ p->lb_workers[i].s->lb_mult);
+ }
+ JK_TRACE_EXIT(l);
+}
+
+/* Get the correct lb_value when recovering/starting/enabling a worker */
+/* This function needs to be externally synchronized! */
+jk_uint64_t restart_value(lb_worker_t *p, jk_logger_t *l)
+{
+ int i = 0;
+ jk_uint64_t curmax = 0;
+ JK_TRACE_ENTER(l);
+ if (p->lbmethod != JK_LB_BYBUSYNESS) {
+ for (i = 0; i < p->num_of_workers; i++) {
+ if (p->lb_workers[i].s->lb_value > curmax) {
+ curmax = p->lb_workers[i].s->lb_value;
+ }
+ }
+ }
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "restarting worker with lb_value %"
+ JK_UINT64_T_FMT,
+ curmax);
+ JK_TRACE_EXIT(l);
+ return curmax;
+}
+
/* Retrieve the parameter with the given name */
static char *get_path_param(jk_ws_service_t *s, const char *name)
{
@@ -159,42 +226,116 @@
}
}
-static int JK_METHOD maintain_workers(jk_worker_t *p, jk_logger_t *l)
+/* If the worker is in error state run
+ * retry on that worker. It will be marked as
+ * operational if the retry timeout is elapsed.
+ * The worker might still be unusable, but we try
+ * anyway.
+ */
+static void recover_workers(lb_worker_t *p,
+ jk_uint64_t curmax,
+ jk_logger_t *l)
{
- unsigned int i = 0;
- lb_worker_t *lb = (lb_worker_t *)p->worker_private;
- for (i = 0; i < lb->num_of_workers; i++) {
- if (lb->lb_workers[i].w->maintain) {
- lb->lb_workers[i].w->maintain(lb->lb_workers[i].w, l);
+ int i;
+ time_t now = time(NULL);
+ int elapsed;
+ worker_record_t *w = NULL;
+ JK_TRACE_ENTER(l);
+
+ for (i = 0; i < p->num_of_workers; i++) {
+ w = &p->lb_workers[i];
+ if (JK_WORKER_IN_ERROR(w->s)) {
+ elapsed = (int)difftime(now, w->s->error_time);
+ if (elapsed <= p->s->recover_wait_time) {
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "worker %s will recover in %d seconds",
+ w->s->name, p->s->recover_wait_time - elapsed);
+ }
+ else {
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "worker %s is marked for recovery",
+ w->s->name);
+ w->s->lb_value = curmax;
+ w->s->in_recovering = JK_TRUE;
+ w->s->in_error_state = JK_FALSE;
+ w->s->is_busy = JK_FALSE;
+ }
}
}
- return JK_TRUE;
+
+ JK_TRACE_EXIT(l);
}
-static void retry_worker(worker_record_t *w,
- int recover_wait_time,
- jk_logger_t *l)
+/* Divide old load values by the decay factor, */
+/* such that older values get less important */
+/* for the routing decisions. */
+static jk_uint64_t decay_load(lb_worker_t *p,
+ int exponent,
+ jk_logger_t *l)
{
- int elapsed = (int)difftime(time(NULL), w->s->error_time);
+ int i;
+ jk_uint64_t curmax = 0;
JK_TRACE_ENTER(l);
+ if (p->lbmethod != JK_LB_BYBUSYNESS) {
+ for (i = 0; i < p->num_of_workers; i++) {
+ p->lb_workers[i].s->lb_value >>= exponent;
+ if (p->lb_workers[i].s->lb_value > curmax) {
+ curmax = p->lb_workers[i].s->lb_value;
+ }
+ }
+ }
+ JK_TRACE_EXIT(l);
+ return curmax;
+}
+
+static int JK_METHOD maintain_workers(jk_worker_t *p, jk_logger_t *l)
+{
+ unsigned int i = 0;
+ jk_uint64_t curmax = 0;
+ int delta;
+ time_t now = time(NULL);
+ JK_TRACE_ENTER(l);
+
+ if (p && p->worker_private) {
+ lb_worker_t *lb = (lb_worker_t *)p->worker_private;
+
+ for (i = 0; i < lb->num_of_workers; i++) {
+ if (lb->lb_workers[i].w->maintain) {
+ lb->lb_workers[i].w->maintain(lb->lb_workers[i].w, l);
+ }
+ }
+
+ jk_shm_lock();
+
+/* Now we check for global maintenance (once for all processes).
+ * Checking workers for recovery and applying decay to the
+ * load values should not be done by each process individually.
+ * Therefore we globally sync and we use a global timestamp.
+ * Since it's possible that we come here a few milliseconds
+ * before the interval has passed, we allow a little tolerance.
+ */
+ delta = difftime(now, lb->s->last_maintain_time) + JK_LB_MAINTAIN_TOLERANCE;
+ if (delta >= lb->maintain_time) {
+ lb->s->last_maintain_time = now;
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "decay with 2^%d",
+ JK_LB_DECAY_MULT * delta / lb->maintain_time);
+ curmax = decay_load(lb, JK_LB_DECAY_MULT * delta / lb->maintain_time, l);
+ recover_workers(lb, curmax, l);
+ }
+
+ jk_shm_unlock();
- if (elapsed <= recover_wait_time) {
- if (JK_IS_DEBUG_LEVEL(l))
- jk_log(l, JK_LOG_DEBUG,
- "worker %s will recover in %d seconds",
- w->s->name, recover_wait_time - elapsed);
}
else {
- if (JK_IS_DEBUG_LEVEL(l))
- jk_log(l, JK_LOG_DEBUG,
- "worker %s is marked for recover",
- w->s->name);
- w->s->in_recovering = JK_TRUE;
- w->s->in_error_state = JK_FALSE;
- w->s->is_busy = JK_FALSE;
+ JK_LOG_NULL_PARAMS(l);
}
JK_TRACE_EXIT(l);
+ return JK_TRUE;
}
static worker_record_t *find_by_session(lb_worker_t *p,
@@ -220,27 +361,10 @@
jk_logger_t *l)
{
unsigned int i;
- int total_factor = 0;
- jk_uint64_t mytraffic = 0;
jk_uint64_t curmin = 0;
- int bfn = 1;
- int bfd = 1;
worker_record_t *candidate = NULL;
- if (p->lbmethod == JK_LB_BYTRAFFIC) {
- double diff;
- time_t now = time(NULL);
- /* Update transfer rate for each worker */
- for (i = 0; i < p->num_of_workers; i++) {
- diff = difftime(now, p->lb_workers[i].s->service_time);
- if (diff > JK_SERVICE_TRANSFER_INTERVAL) {
- p->lb_workers[i].s->service_time = now;
- p->lb_workers[i].s->readed /= JK_SERVICE_TRANSFER_INTERVAL;
- p->lb_workers[i].s->transferred /= JK_SERVICE_TRANSFER_INTERVAL;
- }
- }
- }
/* First try to see if we have available candidate */
for (i = 0; i < p->num_of_workers; i++) {
/* Skip all workers that are not member of domain */
@@ -248,41 +372,17 @@
strcmp(p->lb_workers[i].s->domain, domain))
continue;
/* Take into calculation only the workers that are
- * not in error state, stopped or not disabled.
+ * not in error state, stopped, disabled or busy.
*/
if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
- if (p->lbmethod == JK_LB_BYREQUESTS) {
- p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
- total_factor += p->lb_workers[i].s->lb_factor;
- if (!candidate || p->lb_workers[i].s->lb_value > candidate->s->lb_value)
- candidate = &p->lb_workers[i];
- }
- else if (p->lbmethod == JK_LB_BYTRAFFIC) {
- mytraffic = (p->lb_workers[i].s->transferred +
- p->lb_workers[i].s->readed ) / p->lb_workers[i].s->lb_factor;
- if (!candidate || mytraffic < curmin) {
- candidate = &p->lb_workers[i];
- curmin = mytraffic;
- }
- }
- else {
- /* compare rational numbers: (a/b) < (c/d) iff a*d < c*b
- */
- int left = p->lb_workers[i].s->busy * bfd;
- int right = bfn * p->lb_workers[i].s->lb_factor;
-
- if (!candidate || (left < right)) {
- candidate = &p->lb_workers[i];
- bfn = p->lb_workers[i].s->busy;
- bfd = p->lb_workers[i].s->lb_factor;
- }
+ if (!candidate || p->lb_workers[i].s->lb_value < curmin) {
+ candidate = &p->lb_workers[i];
+ curmin = p->lb_workers[i].s->lb_value;
}
}
}
if (candidate) {
- if (p->lbmethod == JK_LB_BYREQUESTS)
- candidate->s->lb_value -= total_factor;
candidate->r = &(candidate->s->domain[0]);
}
@@ -290,130 +390,31 @@
}
-static worker_record_t *find_best_byrequests(lb_worker_t *p,
- jk_logger_t *l)
-{
- unsigned int i;
- int total_factor = 0;
- worker_record_t *candidate = NULL;
-
- /* First try to see if we have available candidate */
- for (i = 0; i < p->num_of_workers; i++) {
- /* If the worker is in error state run
- * retry on that worker. It will be marked as
- * operational if the retry timeout is elapsed.
- * The worker might still be unusable, but we try
- * anyway.
- */
- if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
- retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
- }
- /* Take into calculation only the workers that are
- * not in error state, stopped or not disabled.
- */
- if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
- p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
- total_factor += p->lb_workers[i].s->lb_factor;
- if (!candidate || p->lb_workers[i].s->lb_value > candidate->s->lb_value)
- candidate = &p->lb_workers[i];
- }
- }
-
- if (candidate)
- candidate->s->lb_value -= total_factor;
-
- return candidate;
-}
-
-static worker_record_t *find_best_bytraffic(lb_worker_t *p,
- jk_logger_t *l)
-{
- unsigned int i;
- jk_uint64_t mytraffic = 0;
- jk_uint64_t curmin = 0;
- worker_record_t *candidate = NULL;
- double diff;
- time_t now = time(NULL);
-
- for (i = 0; i < p->num_of_workers; i++) {
- diff = difftime(now, p->lb_workers[i].s->service_time);
- if (diff > JK_SERVICE_TRANSFER_INTERVAL) {
- p->lb_workers[i].s->service_time = now;
- p->lb_workers[i].s->readed /= JK_SERVICE_TRANSFER_INTERVAL;
- p->lb_workers[i].s->transferred /= JK_SERVICE_TRANSFER_INTERVAL;
- }
- }
- /* First try to see if we have available candidate */
- for (i = 0; i < p->num_of_workers; i++) {
- /* If the worker is in error state run
- * retry on that worker. It will be marked as
- * operational if the retry timeout is elapsed.
- * The worker might still be unusable, but we try
- * anyway.
- */
- if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
- retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
- }
- /* Take into calculation only the workers that are
- * not in error state, stopped or not disabled.
- */
- if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
- mytraffic = (p->lb_workers[i].s->transferred/p->lb_workers[i].s->lb_factor) +
- (p->lb_workers[i].s->readed/p->lb_workers[i].s->lb_factor);
- if (!candidate || mytraffic < curmin) {
- candidate = &p->lb_workers[i];
- curmin = mytraffic;
- }
- }
- }
- return candidate;
-}
-
-static worker_record_t *find_best_bybusyness(lb_worker_t *p,
- jk_logger_t *l)
+static worker_record_t *find_best_byvalue(lb_worker_t *p,
+ jk_logger_t *l)
{
static unsigned int next_offset = 0;
unsigned int i;
unsigned int j;
unsigned int offset;
- int bfn = 1; /* Numerator of best busy factor */
- int bfd = 1; /* Denominator of best busy factor */
-
- int left; /* left and right are used to compare rational numbers */
- int right;
+ jk_uint64_t curmin = 0;
/* find the least busy worker */
worker_record_t *candidate = NULL;
offset = next_offset;
- /* First try to see if we have available candidate
- */
- for (j = 0; j < p->num_of_workers; j++) {
- i = (j + offset) % p->num_of_workers;
-
- /* If the worker is in error state run
- * retry on that worker. It will be marked as
- * operational if the retry timeout is elapsed.
- * The worker might still be unusable, but we try
- * anyway.
- */
- if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
- retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
- }
+ /* First try to see if we have available candidate */
+ for (j = offset; j < p->num_of_workers + offset; j++) {
+ i = j % p->num_of_workers;
+
/* Take into calculation only the workers that are
- * not in error state, stopped or not disabled.
+ * not in error state, stopped, disabled or busy.
*/
if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
- /* compare rational numbers: (a/b) < (c/d) iff a*d < c*b
- */
- left = p->lb_workers[i].s->busy * bfd;
- right = bfn * p->lb_workers[i].s->lb_factor;
-
- if (!candidate || (left < right)) {
+ if (!candidate || (p->lb_workers[i].s->lb_value < curmin)) {
candidate = &p->lb_workers[i];
- bfn = p->lb_workers[i].s->busy;
- bfd = p->lb_workers[i].s->lb_factor;
+ curmin = p->lb_workers[i].s->lb_value;
next_offset = i + 1;
}
}
@@ -426,7 +427,6 @@
jk_logger_t *l)
{
unsigned int i;
- int total_factor = 0;
int uses_domain = 0;
worker_record_t *candidate = NULL;
@@ -436,9 +436,6 @@
candidate = find_best_bydomain(p, name, l);
}
if (candidate) {
- if (JK_WORKER_IN_ERROR(candidate->s)) {
- retry_worker(candidate, p->s->recover_wait_time, l);
- }
if (candidate->s->in_error_state || candidate->s->is_stopped ) {
/* We have a worker that is error state or stopped.
* If it has a redirection set use that redirection worker.
@@ -458,21 +455,6 @@
candidate = NULL;
}
}
- if (candidate && !uses_domain &&
- p->lbmethod == JK_LB_BYREQUESTS) {
-
- for (i = 0; i < p->num_of_workers; i++) {
- if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
- /* Skip all workers that are not member of candidate domain */
- if (*candidate->s->domain &&
- strcmp(p->lb_workers[i].s->domain, candidate->s->domain))
- continue;
- p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
- total_factor += p->lb_workers[i].s->lb_factor;
- }
- }
- candidate->s->lb_value -= total_factor;
- }
return candidate;
}
@@ -499,12 +481,7 @@
{
worker_record_t *rc = NULL;
- if (p->lbmethod == JK_LB_BYREQUESTS)
- rc = find_best_byrequests(p, l);
- else if (p->lbmethod == JK_LB_BYTRAFFIC)
- rc = find_best_bytraffic(p, l);
- else if (p->lbmethod == JK_LB_BYBUSYNESS)
- rc = find_best_bybusyness(p, l);
+ rc = find_best_byvalue(p, l);
/* By default use worker name as session route */
if (rc)
rc->r = &(rc->s->name[0]);
@@ -527,10 +504,6 @@
/* No need to find the best worker
* if there is a single one
*/
- if (JK_WORKER_IN_ERROR(p->lb_workers[0].s)) {
- retry_worker(&p->lb_workers[0], p->s->recover_wait_time, l);
- }
- /* Check if worker is marked for retry */
if(!p->lb_workers[0].s->in_error_state && !p->lb_workers[0].s->is_stopped) {
p->lb_workers[0].r = &(p->lb_workers[0].s->name[0]);
JK_TRACE_EXIT(l);
@@ -564,7 +537,7 @@
if (JK_IS_DEBUG_LEVEL(l)) {
jk_log(l, JK_LOG_DEBUG,
"total sessionid is %s",
- sessionid ? sessionid : "empty");
+ sessionid ? sessionid : "empty");
}
while (sessionid) {
char *next = strchr(sessionid, ';');
@@ -582,7 +555,7 @@
if (JK_IS_DEBUG_LEVEL(l))
jk_log(l, JK_LOG_DEBUG,
"searching worker for session route %s",
- session_route);
+ session_route);
/* We have a session route. Whow! */
rc = find_bysession_route(p, session_route, l);
@@ -612,7 +585,7 @@
}
jk_log(l, JK_LOG_INFO,
"all workers are in error state for session %s",
- session);
+ session);
JK_TRACE_EXIT(l);
return NULL;
}
@@ -691,6 +664,10 @@
if (p->worker->s->busy > p->worker->s->max_busy)
p->worker->s->max_busy = p->worker->s->busy;
rec->s->busy++;
+ if (p->worker->lbmethod == JK_LB_BYREQUESTS)
+ rec->s->lb_value += rec->s->lb_mult;
+ else if (p->worker->lbmethod == JK_LB_BYBUSYNESS)
+ rec->s->lb_value += rec->s->lb_mult;
if (rec->s->busy > rec->s->max_busy)
rec->s->max_busy = rec->s->busy;
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
@@ -707,6 +684,25 @@
/* Update partial reads and writes if any */
rec->s->readed += rd;
rec->s->transferred += wr;
+ if (p->worker->lbmethod == JK_LB_BYTRAFFIC)
+ rec->s->lb_value += (rd+wr)*rec->s->lb_mult;
+ else if (p->worker->lbmethod == JK_LB_BYBUSYNESS)
+ if (rec->s->lb_value >= rec->s->lb_mult)
+ rec->s->lb_value -= rec->s->lb_mult;
+ else {
+ rec->s->lb_value = 0;
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "worker %s has load value to low (%"
+ JK_UINT64_T_FMT
+ " < %"
+ JK_UINT64_T_FMT
+ ") ",
+ "- correcting to 0",
+ rec->s->name,
+ rec->s->lb_value,
+ rec->s->lb_mult);
+ }
/* When returning the endpoint mark the worker as not busy.
* We have at least one endpoint free
@@ -919,7 +915,7 @@
if ((s = jk_get_worker_redirect(props, worker_names[i], NULL)))
strncpy(p->lb_workers[i].s->redirect, s, JK_SHM_STR_SIZ);
- p->lb_workers[i].s->lb_value = p->lb_workers[i].s->lb_factor;
+ p->lb_workers[i].s->lb_value = 0;
p->lb_workers[i].s->in_error_state = JK_FALSE;
p->lb_workers[i].s->in_recovering = JK_FALSE;
p->lb_workers[i].s->is_busy = JK_FALSE;
@@ -949,14 +945,15 @@
close_workers(p, i, l);
}
else {
- if (JK_IS_DEBUG_LEVEL(l)) {
- for (i = 0; i < num_of_workers; i++) {
+ for (i = 0; i < num_of_workers; i++) {
+ if (JK_IS_DEBUG_LEVEL(l)) {
jk_log(l, JK_LOG_DEBUG,
"Balanced worker %i has name %s in domain %s",
i, p->lb_workers[i].s->name, p->lb_workers[i].s->domain);
}
}
p->num_of_workers = num_of_workers;
+ update_mult(p, l);
JK_TRACE_EXIT(l);
return JK_TRUE;
}
@@ -984,6 +981,8 @@
WAIT_BEFORE_RECOVER);
if (p->s->recover_wait_time < WAIT_BEFORE_RECOVER)
p->s->recover_wait_time = WAIT_BEFORE_RECOVER;
+ p->maintain_time = jk_get_worker_maintain_time(props);
+ p->s->last_maintain_time = time(NULL);
p->lbmethod = jk_get_lb_method(props, p->s->name);
p->lblock = jk_get_lb_lock(props, p->s->name);
Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h Sun May 14 13:08:24 2006
@@ -48,10 +48,14 @@
#define JK_LB_LM_DEFAULT ("Optimistic")
#define JK_LB_LM_PESSIMISTIC ("Pessimistic")
-/*
- * Time to wait before retry...
- */
+/* Time to wait before retry. */
#define WAIT_BEFORE_RECOVER (60)
+/* We accept doing global maintenance if we are */
+/* JK_LB_MAINTAIN_TOLERANCE seconds early. */
+#define JK_LB_MAINTAIN_TOLERANCE (2)
+/* We divide load values by 2^x during global maintenance. */
+/* The exponent x is JK_LB_DECAY_MULT*#MAINT_INTV_SINCE_LAST_MAINT */
+#define JK_LB_DECAY_MULT (1)
static const char *lb_method_type[] = {
JK_LB_METHOD_REQUESTS,
@@ -77,6 +81,7 @@
unsigned int num_of_workers;
int lbmethod;
int lblock;
+ time_t maintain_time;
jk_pool_t p;
jk_pool_atom_t buf[TINY_POOL_SIZE];
@@ -91,6 +96,9 @@
int JK_METHOD lb_worker_factory(jk_worker_t **w,
const char *name, jk_logger_t *l);
+
+jk_uint64_t restart_value(lb_worker_t *p, jk_logger_t *l);
+void update_mult(lb_worker_t * p, jk_logger_t *l);
#ifdef __cplusplus
}
Modified: tomcat/connectors/trunk/jk/native/common/jk_shm.h
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_shm.h?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_shm.h (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_shm.h Sun May 14 13:08:24 2006
@@ -53,9 +53,6 @@
#define JK_SHM_ALIGNMENT 64
#define JK_SHM_ALIGN(x) JK_ALIGN(x, JK_SHM_ALIGNMENT)
-/* Use 1 minute for measuring read/write data */
-#define JK_SERVICE_TRANSFER_INTERVAL 60
-
/** jk shm worker record structure */
struct jk_shm_worker
{
@@ -78,8 +75,10 @@
volatile int is_busy;
/* Current lb factor */
volatile int lb_factor;
+ /* Current lb reciprocal factor */
+ volatile jk_uint64_t lb_mult;
/* Current lb value */
- volatile int lb_value;
+ volatile jk_uint64_t lb_value;
volatile int in_error_state;
volatile int in_recovering;
int sticky_session;
@@ -89,7 +88,7 @@
/* Statistical data */
volatile time_t error_time;
/* Service transfer rate time */
- volatile time_t service_time;
+ volatile time_t last_maintain_time;
/* Number of bytes read from remote */
volatile jk_uint64_t readed;
/* Number of bytes transferred to remote */
Modified: tomcat/connectors/trunk/jk/native/common/jk_status.c
URL: http://svn.apache.org/viewcvs/tomcat/connectors/trunk/jk/native/common/jk_status.c?rev=406411&r1=406410&r2=406411&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_status.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_status.c Sun May 14 13:08:24 2006
@@ -448,7 +448,7 @@
jk_puts(s, "</tr>\n</table>\n<br/>\n");
jk_puts(s, "<table><tr>"
"<th>Name</th><th>Type</th><th>Host</th><th>Addr</th>"
- "<th>Stat</th><th>F</th><th>V</th><th>Acc</th><th>Err</th>"
+ "<th>Stat</th><th>F</th><th>M</th><th>V</th><th>Acc</th><th>Err</th>"
"<th>Wr</th><th>Rd</th><th>Busy</th><th>Max</th><th>RR</th><th>Cd</th></tr>\n");
for (j = 0; j < lb->num_of_workers; j++) {
worker_record_t *wr = &(lb->lb_workers[j]);
@@ -472,7 +472,8 @@
wr->s->is_busy),
"</td>", NULL);
jk_printf(s, "<td>%d</td>", wr->s->lb_factor);
- jk_printf(s, "<td>%d</td>", wr->s->lb_value);
+ jk_printf(s, "<td>%" JK_UINT64_T_FMT "</td>", wr->s->lb_mult);
+ jk_printf(s, "<td>%" JK_UINT64_T_FMT "</td>", wr->s->lb_value);
jk_printf(s, "<td>%u</td>", wr->s->elected);
jk_printf(s, "<td>%u</td>", wr->s->errors);
jk_putv(s, "<td>", status_strfsize(wr->s->transferred, buf),
@@ -579,6 +580,7 @@
"<tr><th>Addr</th><td>Backend Address info</td></tr>\n"
"<tr><th>Stat</th><td>Worker status</td></tr>\n"
"<tr><th>F</th><td>Load Balancer Factor</td></tr>\n"
+ "<tr><th>M</th><td>Load Balancer Multiplicity</td></tr>\n"
"<tr><th>V</th><td>Load Balancer Value</td></tr>\n"
"<tr><th>Acc</th><td>Number of requests</td></tr>\n"
"<tr><th>Err</th><td>Number of failed requests</td></tr>\n"
@@ -652,7 +654,8 @@
wr->s->is_busy) );
jk_printf(s, " lbfactor=\"%d\"", wr->s->lb_factor);
- jk_printf(s, " lbvalue=\"%d\"", wr->s->lb_value);
+ jk_printf(s, " lbmult=\"%" JK_UINT64_T_FMT "\"", wr->s->lb_mult);
+ jk_printf(s, " lbvalue=\"%" JK_UINT64_T_FMT "\"", wr->s->lb_value);
jk_printf(s, " elected=\"%u\"", wr->s->elected);
jk_printf(s, " errors=\"%u\"", wr->s->errors);
jk_printf(s, " transferred=\"%" JK_UINT64_T_FMT "\"", wr->s->transferred);
@@ -678,6 +681,7 @@
const char *dworker, jk_logger_t *l)
{
int i;
+ int j;
char buf[1024];
const char *b;
lb_worker_t *lb;
@@ -731,11 +735,30 @@
strncpy(wr->s->domain, b, JK_SHM_STR_SIZ);
else
memset(wr->s->domain, 0, JK_SHM_STR_SIZ);
- wr->s->is_disabled = status_bool("wd", s->query_string);
- wr->s->is_stopped = status_bool("ws", s->query_string);
+ i = status_bool("wd", s->query_string);
+ j = status_bool("ws", s->query_string);
+ if (wr->s->is_disabled!=i || wr->s->is_stopped!=j) {
+ /* lock shared memory */
+ jk_shm_lock();
+ wr->s->is_disabled = i;
+ wr->s->is_stopped = j;
+ wr->s->lb_value = restart_value(lb, l);
+ if (i+j==0) {
+ jk_log(l, JK_LOG_INFO,
+ "worker %s restarted in status worker with lb_value %"
+ JK_UINT64_T_FMT,
+ wr->s->name,
+ wr->s->lb_value);
+ }
+ /* unlock the shared memory */
+ jk_shm_unlock();
+ }
i = status_int("wf", s->query_string, wr->s->lb_factor);
- if (i > 0)
+ if (i > 0 && wr->s->lb_factor != i) {
wr->s->lb_factor = i;
+/* Recalculate the load multiplicators wrt. lb_factor */
+ update_mult(lb, l);
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org