You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by co...@apache.org on 2002/05/04 00:12:18 UTC
cvs commit: jakarta-tomcat-connectors/jk/native2/common jk_channel_apr_socket.c jk_endpoint.c jk_msg_ajp.c jk_requtil.c jk_workerEnv.c jk_worker_ajp13.c jk_worker_lb.c
costin 02/05/03 15:12:18
Modified: jk/native2/common jk_channel_apr_socket.c jk_endpoint.c
jk_msg_ajp.c jk_requtil.c jk_workerEnv.c
jk_worker_ajp13.c jk_worker_lb.c
Log:
Few more lb changes.
This is getting a bit more agressive in trying to recover
failed workers and simplifies the logic that is used.
I tested it and seems to work very well with 'lbfactor=0' -
if the default tomcat goes down, the request goes to the
backup, when the default tomcat goes up ( after the timeout )
it'll be tried again.
As soon as shm is finished, the default will go up when it
re-register itself in the shm.
Revision Changes Path
1.14 +4 -5 jakarta-tomcat-connectors/jk/native2/common/jk_channel_apr_socket.c
Index: jk_channel_apr_socket.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_channel_apr_socket.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- jk_channel_apr_socket.c 3 May 2002 18:44:03 -0000 1.13
+++ jk_channel_apr_socket.c 3 May 2002 22:12:17 -0000 1.14
@@ -426,8 +426,6 @@
#ifdef HAVE_UNIXSOCKETS
unixsock=chD->unixsock;
#endif
- env->l->jkLog(env, env->l, JK_LOG_ERROR,
- "jk2_channel_apr_send %d\n",chD->type);
if (chD->type==TYPE_NET) {
length = (apr_size_t) len;
@@ -443,10 +441,11 @@
#ifdef HAVE_UNIXSOCKETS
while(sent < len) {
/* this_time = send(unixsock, (char *)b + sent , len - sent, 0); */
+ errno=0;
this_time = write(unixsock, (char *)b + sent , len - sent);
env->l->jkLog(env, env->l, JK_LOG_INFO,
- "channel.apr:send() send() %d %d %s\n", this_time, errno,
+ "channel.apr:send() write() %d %d %s\n", this_time, errno,
strerror( errno));
/* if( errno != 0 ) { */
/* env->l->jkLog(env, env->l, JK_LOG_ERROR, */
@@ -551,7 +550,7 @@
blen=msg->checkHeader( env, msg, endpoint );
if( blen < 0 ) {
env->l->jkLog(env, env->l, JK_LOG_ERROR,
- "channelAprArp.receive(): Bad header\n" );
+ "channelApr.receive(): Bad header\n" );
return JK_ERR;
}
@@ -559,7 +558,7 @@
if(rc < 0) {
env->l->jkLog(env, env->l, JK_LOG_ERROR,
- "channelAprApr.receive(): Error receiving message body %d %d\n",
+ "channelApr.receive(): Error receiving message body %d %d\n",
rc, errno);
return JK_ERR;
}
1.9 +14 -1 jakarta-tomcat-connectors/jk/native2/common/jk_endpoint.c
Index: jk_endpoint.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_endpoint.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- jk_endpoint.c 25 Apr 2002 18:50:22 -0000 1.8
+++ jk_endpoint.c 3 May 2002 22:12:17 -0000 1.9
@@ -73,6 +73,8 @@
#include "jk_objCache.h"
#include "jk_registry.h"
+static char *myAttInfo[]={ "channel", "active", NULL };
+
/** Will return endpoint specific runtime properties
*
* uri The uri that is beeing processed, NULL if the endpoing is inactive
@@ -83,6 +85,16 @@
*
*/
static void * JK_METHOD jk2_endpoint_getAttribute(jk_env_t *env, jk_bean_t *bean, char *name ) {
+ jk_endpoint_t *ep=(jk_endpoint_t *)bean->object;
+
+ if( strcmp( name, "channel" )==0 ) {
+ return ep->worker->channel->mbean->name;
+ } else if (strcmp( name, "active" )==0 ) {
+ if( ep->currentRequest != NULL )
+ return ep->currentRequest->req_uri;
+ } else {
+ return NULL;
+ }
return NULL;
}
@@ -107,7 +119,8 @@
e->request = jk2_msg_ajp_create( env, e->pool, 0);
e->reply = jk2_msg_ajp_create( env, e->pool, 0);
e->post = jk2_msg_ajp_create( env, e->pool, 0);
-
+ result->getAttributeInfo=myAttInfo;
+ result->getAttribute= jk2_endpoint_getAttribute;
e->reuse = JK_FALSE;
e->cPool=endpointPool->create(env, endpointPool, HUGE_POOL_SIZE );
1.10 +2 -1 jakarta-tomcat-connectors/jk/native2/common/jk_msg_ajp.c
Index: jk_msg_ajp.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_msg_ajp.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- jk_msg_ajp.c 3 May 2002 17:41:52 -0000 1.9
+++ jk_msg_ajp.c 3 May 2002 22:12:17 -0000 1.10
@@ -60,7 +60,7 @@
* Author: Costin Manolache
* Author: Gal Shachor <sh...@il.ibm.com> *
* Author: Henri Gomez <hg...@slib.fr> *
- * Version: $Revision: 1.9 $ *
+ * Version: $Revision: 1.10 $ *
***************************************************************************/
#include "jk_pool.h"
@@ -379,6 +379,7 @@
env->l->jkLog(env, env->l, JK_LOG_ERROR,
"msgAjp.receive(): Bad signature %x%x\n",
head[0], head[1]);
+ msg->dump( env, msg, "BAD MESSAGE: " );
return -1;
}
1.14 +2 -2 jakarta-tomcat-connectors/jk/native2/common/jk_requtil.c
Index: jk_requtil.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_requtil.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- jk_requtil.c 3 May 2002 18:44:03 -0000 1.13
+++ jk_requtil.c 3 May 2002 22:12:17 -0000 1.14
@@ -288,8 +288,8 @@
break;
default:
- env->l->jkLog(env, env->l, JK_LOG_INFO,
- "requtil.getHeaderId() long header %s\n", header_name);
+/* env->l->jkLog(env, env->l, JK_LOG_INFO, */
+/* "requtil.getHeaderId() long header %s\n", header_name); */
return JK_ERR;
}
1.34 +4 -3 jakarta-tomcat-connectors/jk/native2/common/jk_workerEnv.c
Index: jk_workerEnv.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_workerEnv.c,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -r1.33 -r1.34
--- jk_workerEnv.c 3 May 2002 17:45:57 -0000 1.33
+++ jk_workerEnv.c 3 May 2002 22:12:17 -0000 1.34
@@ -59,7 +59,7 @@
* Description: Workers controller *
* Author: Gal Shachor <sh...@il.ibm.com> *
* Author: Henri Gomez <hg...@slib.fr> *
- * Version: $Revision: 1.33 $ *
+ * Version: $Revision: 1.34 $ *
***************************************************************************/
#include "jk_env.h"
@@ -387,7 +387,8 @@
handler=NULL;
env->l->jkLog(env, env->l, JK_LOG_INFO,
- "ajp14.processCallbacks() Waiting reply\n");
+ "ajp14.processCallbacks() Waiting reply %s\n",
+ ep->worker->channel->mbean->name);
msg->reset(env, msg);
rc= ep->worker->channel->recv( env, ep->worker->channel, ep,
@@ -399,7 +400,7 @@
return JK_ERR;
}
- ep->reply->dump(env, ep->reply, "Received ");
+ /* ep->reply->dump(env, ep->reply, "Received "); */
code = (int)msg->getByte(env, msg);
rc=jk2_workerEnv_dispatch( env, wEnv, req, ep, code, msg );
1.13 +14 -6 jakarta-tomcat-connectors/jk/native2/common/jk_worker_ajp13.c
Index: jk_worker_ajp13.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_worker_ajp13.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- jk_worker_ajp13.c 3 May 2002 17:47:31 -0000 1.12
+++ jk_worker_ajp13.c 3 May 2002 22:12:17 -0000 1.13
@@ -87,7 +87,7 @@
/* -------------------- Impl -------------------- */
static char *myAttInfo[]={ "lb_factor", "lb_value", "reqCnt", "errCnt",
"route", "errorState", "recovering",
- "epCount", NULL };
+ "epCount", "errorTime", NULL };
static void * JK_METHOD jk2_worker_ajp14_getAttribute(jk_env_t *env, jk_bean_t *bean, char *name ) {
jk_worker_t *worker=(jk_worker_t *)bean->object;
@@ -99,6 +99,10 @@
return worker->channelName;
} else if (strcmp( name, "route" )==0 ) {
return worker->route;
+ } else if (strcmp( name, "errorTime" )==0 ) {
+ char *buf=env->tmpPool->calloc( env, env->tmpPool, 20 );
+ sprintf( buf, "%d", worker->error_time );
+ return buf;
} else if (strcmp( name, "lb_value" )==0 ) {
char *buf=env->tmpPool->calloc( env, env->tmpPool, 20 );
sprintf( buf, "%f", worker->lb_value );
@@ -381,7 +385,7 @@
}
env->l->jkLog(env, env->l, JK_LOG_INFO,
- "ajp14.service() processing callbacks\n");
+ "ajp14.service() processing callbacks %s\n", e->worker->channel->mbean->name);
err = e->worker->workerEnv->processCallbacks(env, e->worker->workerEnv,
e, s);
@@ -506,10 +510,10 @@
jk_worker_t *w;
w= e->worker;
-
+
if( e->cPool != NULL )
e->cPool->reset(env, e->cPool);
- if (w->endpointCache != NULL ) {
+ if (! w->in_error_state && w->endpointCache != NULL ) {
int err=0;
err=w->endpointCache->put( env, w->endpointCache, e );
if( err==JK_OK ) {
@@ -548,7 +552,8 @@
if (e!=NULL) {
env->l->jkLog(env, env->l, JK_LOG_INFO,
- "ajp14.getEndpoint(): Reusing endpoint\n");
+ "ajp14.getEndpoint(): Reusing endpoint %s %s\n",
+ e->mbean->name, e->worker->mbean->name);
*eP = e;
return JK_OK;
}
@@ -579,6 +584,9 @@
err=jk2_worker_ajp14_service1( env, w, s, e );
+ if( err!=JK_OK ) {
+ w->in_error_state=JK_TRUE;
+ }
jk2_worker_ajp14_done( env, w, e);
return err;
}
@@ -725,6 +733,7 @@
w->service = jk2_worker_ajp14_service;
result->setAttribute= jk2_worker_ajp14_setAttribute;
+ result->getAttributeInfo=myAttInfo;
result->getAttribute= jk2_worker_ajp14_getAttribute;
result->object = w;
w->mbean=result;
@@ -732,7 +741,6 @@
w->workerEnv=env->getByName( env, "workerEnv" );
w->workerEnv->addWorker( env, w->workerEnv, w );
- result->getAttributeInfo=myAttInfo;
return JK_OK;
}
1.8 +53 -39 jakarta-tomcat-connectors/jk/native2/common/jk_worker_lb.c
Index: jk_worker_lb.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native2/common/jk_worker_lb.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- jk_worker_lb.c 3 May 2002 18:23:26 -0000 1.7
+++ jk_worker_lb.c 3 May 2002 22:12:17 -0000 1.8
@@ -74,7 +74,8 @@
#define DEFAULT_LB_FACTOR (1.0)
/* Time to wait before retry... */
-#define WAIT_BEFORE_RECOVER (60*1)
+/* XXX make it longer - debugging only */
+#define WAIT_BEFORE_RECOVER (5)
#define ADDITINAL_WAIT_LOAD (20)
@@ -132,21 +133,28 @@
/** Get one worker that is ready */
for(i = 0 ; i < lb->num_of_workers ; i++) {
if(lb->lb_workers[i]->in_error_state) {
- if(!lb->lb_workers[i]->in_recovering) {
- if( now==0 )
- now = time(NULL);
+ /* Check if it's ready for recovery */
+ /* if(!lb->lb_workers[i]->in_recovering) { */
+ if( now==0 )
+ now = time(NULL);
- if((now - lb->lb_workers[i]->error_time) > WAIT_BEFORE_RECOVER) {
-
- lb->lb_workers[i]->in_recovering = JK_TRUE;
- lb->lb_workers[i]->error_time = now;
- lb->lb_workers[i]->retry_count++;
- rc = lb->lb_workers[i];
+ if((now - lb->lb_workers[i]->error_time) > WAIT_BEFORE_RECOVER) {
+ env->l->jkLog(env, env->l, JK_LOG_ERROR,
+ "lb.getWorker() timeout expired, reenable again %s\n", lb->
+ lb_workers[i]->mbean->name);
+
+ lb->lb_workers[i]->in_recovering = JK_TRUE;
+ lb->lb_workers[i]->in_error_state=JK_FALSE;
+ /* lb->lb_workers[i]->error_time = now; */
+ /* lb->lb_workers[i]->retry_count++; */
+ /* rc = lb->lb_workers[i]; */
- break;
- }
+ /* Don't give bigger priority to recovered workers
+ break;
+ */
}
- } else {
+ }
+ if( ! lb->lb_workers[i]->in_error_state ) {
if(lb->lb_workers[i]->lb_value == 0 ) {
/* That's the 'default' worker, it'll take all requests.
* All other workers are not used unless this is in error state.
@@ -160,22 +168,25 @@
if(lb->lb_workers[i]->lb_value < lb_min ||
( rc==NULL ) ) {
lb_min = lb->lb_workers[i]->lb_value;
- rc = lb->lb_workers[i];
+ rc = lb->lb_workers[i];
}
- }
+ }
}
-
+
if ( rc==NULL ) {
/* no workers found (rc is null), now try as hard as possible to get a
worker anyway, pick one with largest error time.. */
+ env->l->jkLog(env, env->l, JK_LOG_ERROR,
+ "lb.getWorker() All workers in error state, use the one with oldest error\n");
+
for(i = 0 ; i < lb->num_of_workers ; i++) {
- if(lb->lb_workers[i]->in_error_state) {
- if(!lb->lb_workers[i]->in_recovering) {
+/* if(lb->lb_workers[i]->in_error_state) { */
+/* if(!lb->lb_workers[i]->in_recovering) { */
/* if the retry count is zero, that means the worker only
failed once, this is to e that the failed worker will
not continue to be retried over and over again.
*/
- if ( lb->lb_workers[i]->retry_count == 0 ) {
+/* if ( lb->lb_workers[i]->retry_count == 0 ) { */
if ( rc != NULL ) {
/* pick the oldest failed worker */
if ( lb->lb_workers[i]->error_time < rc->error_time ) {
@@ -184,24 +195,25 @@
} else {
rc = lb->lb_workers[i];
}
- }
- }
- } else {
- /* This is a good worker - it may have come to life */
- if(lb->lb_workers[i]->lb_value < lb_min || rc != NULL) {
- lb_min = lb->lb_workers[i]->lb_value;
- rc = lb->lb_workers[i];
- break;
- }
- }
+/* } */
+/* } */
+/* } else { */
+ /* This is a good worker - it may have come to life */
+/* if(lb->lb_workers[i]->lb_value < lb_min || rc != NULL) { */
+/* lb_min = lb->lb_workers[i]->lb_value; */
+/* rc = lb->lb_workers[i]; */
+/* break; */
+/* } */
+/* } */
}
-
+
if ( rc && rc->in_error_state ) {
- if(now==0)
- now = time(0);
+/* if(now==0) */
+/* now = time(0); */
rc->in_recovering = JK_TRUE;
- rc->error_time = now;
- rc->retry_count++;
+ rc->in_error_state = JK_FALSE;
+/* rc->error_time = now; */
+/* rc->retry_count++; */
}
}
@@ -299,9 +311,9 @@
s->realWorker=NULL;
/* reset all the retry counts to 0. XXX may be a problem if we have many workers ? */
- for(i = 0 ; i < lb->num_of_workers ; i++) {
- lb->lb_workers[i]->retry_count = 0;
- }
+/* for(i = 0 ; i < lb->num_of_workers ; i++) { */
+/* lb->lb_workers[i]->retry_count = 0; */
+/* } */
if( wEnv->shm != NULL && wEnv->shm->head != NULL ) {
/* We have shm, let's check for updates. This is just checking one
@@ -344,8 +356,8 @@
env->l->jkLog(env, env->l, JK_LOG_INFO,
"lb.service() try %s\n", rec->mbean->name );
- s->jvm_route = s->pool->pstrdup(env, s->pool, rec->mbean->name);
-
+ s->jvm_route = rec->route;
+
rec->reqCnt++;
rc = rec->service(env, rec, s);
@@ -366,6 +378,8 @@
return JK_OK;
}
+ env->l->jkLog(env, env->l, JK_LOG_ERROR,
+ "lb.service() worker failed\n");
/*
* Service failed !!!
*
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>