You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@tomcat.apache.org by mt...@apache.org on 2012/03/19 15:40:16 UTC

svn commit: r1302479 - /tomcat/jk/trunk/native/common/jk_lb_worker.c

Author: mturk
Date: Mon Mar 19 14:40:15 2012
New Revision: 1302479

URL: http://svn.apache.org/viewvc?rev=1302479&view=rev
Log:
Make sure we pull only if the sequence is above us

Modified:
    tomcat/jk/trunk/native/common/jk_lb_worker.c

Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c
URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1302479&r1=1302478&r2=1302479&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_lb_worker.c (original)
+++ tomcat/jk/trunk/native/common/jk_lb_worker.c Mon Mar 19 14:40:15 2012
@@ -295,7 +295,7 @@ void jk_lb_pull(lb_worker_t *p, int lock
                p->name, p->sequence, p->s->h.sequence);
     if (locked == JK_FALSE)
         jk_shm_lock();
-    if (p->sequence > p->s->h.sequence) {
+    if (p->sequence == p->s->h.sequence) {
         if (locked == JK_FALSE)
             jk_shm_unlock();
         return;
@@ -315,7 +315,7 @@ void jk_lb_pull(lb_worker_t *p, int lock
 
     for (i = 0; i < p->num_of_workers; i++) {
         lb_sub_worker_t *w = &p->lb_workers[i];
-        if (w->sequence != w->s->h.sequence) {
+        if (w->sequence < w->s->h.sequence) {
             jk_worker_t *jw = w->worker;
             ajp_worker_t *aw = (ajp_worker_t *)jw->worker_private;
 
@@ -365,13 +365,12 @@ void jk_lb_push(lb_worker_t *p, int lock
     p->s->lbmethod = p->lbmethod;
     p->s->lblock = p->lblock;
     p->s->max_packet_size = p->max_packet_size;
-    p->s->h.sequence = p->sequence;
     strncpy(p->s->session_cookie, p->session_cookie, JK_SHM_STR_SIZ);
     strncpy(p->s->session_path, p->session_path, JK_SHM_STR_SIZ);
 
     for (i = 0; i < p->num_of_workers; i++) {
         lb_sub_worker_t *w = &p->lb_workers[i];
-        if (w->sequence != w->s->h.sequence) {
+        if (w->sequence < w->s->h.sequence) {
             jk_worker_t *jw = w->worker;
             ajp_worker_t *aw = (ajp_worker_t *)jw->worker_private;
 
@@ -391,6 +390,7 @@ void jk_lb_push(lb_worker_t *p, int lock
             w->s->h.sequence = w->sequence;
         }
     }
+    p->s->h.sequence = p->sequence;
     if (locked == JK_FALSE)
         jk_shm_unlock();
 
@@ -562,7 +562,7 @@ static int recover_workers(lb_worker_t *
     ajp_worker_t *aw = NULL;
     JK_TRACE_ENTER(l);
 
-    if (p->sequence != p->s->h.sequence)
+    if (p->sequence < p->s->h.sequence)
         jk_lb_pull(p, JK_TRUE, l);
     for (i = 0; i < p->num_of_workers; i++) {
         w = &p->lb_workers[i];
@@ -1146,7 +1146,7 @@ static int JK_METHOD service(jk_endpoint
     /* Set returned error to OK */
     *is_error = JK_HTTP_OK;
 
-    if (p->worker->sequence != p->worker->s->h.sequence)
+    if (p->worker->sequence < p->worker->s->h.sequence)
         jk_lb_pull(p->worker, JK_FALSE, l);
     for (i = 0; i < num_of_workers; i++) {
         lb_sub_worker_t *rec = &(p->worker->lb_workers[i]);
@@ -1206,7 +1206,7 @@ static int JK_METHOD service(jk_endpoint
                        retry, p->worker->retry_interval);
             jk_sleep(p->worker->retry_interval);
             /* Pull shared memory if something changed during sleep */
-            if (p->worker->sequence != p->worker->s->h.sequence)
+            if (p->worker->sequence < p->worker->s->h.sequence)
                 jk_lb_pull(p->worker, JK_FALSE, l);
             for (i = 0; i < num_of_workers; i++) {
                 /* Copy the shared state info */
@@ -1631,7 +1631,6 @@ static int JK_METHOD validate(jk_worker_
                 strncpy(p->lb_workers[i].s->h.name, worker_names[i],
                         JK_SHM_STR_SIZ);
                 p->lb_workers[i].sequence = 0;
-                p->lb_workers[i].s->h.sequence = 0;
                 p->lb_workers[i].lb_factor =
                     jk_get_lb_factor(props, worker_names[i]);
                 if (p->lb_workers[i].lb_factor < 1) {
@@ -1786,7 +1785,7 @@ static int JK_METHOD init(jk_worker_t *p
     }
 
     p->sequence++;
-    jk_lb_push(p, JK_FALSE, log);
+    jk_lb_push(p, JK_TRUE, log);
 
     JK_TRACE_EXIT(log);
     return JK_TRUE;
@@ -1878,7 +1877,6 @@ int JK_METHOD lb_worker_factory(jk_worke
         private_data->error_escalation_time = private_data->recover_wait_time / 2;
         private_data->max_reply_timeouts = 0;
         private_data->sequence = 0;
-        private_data->s->h.sequence = 0;
         private_data->next_offset = 0;
         *w = &private_data->worker;
         JK_TRACE_EXIT(l);



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: svn commit: r1302479 - /tomcat/jk/trunk/native/common/jk_lb_worker.c

Posted by Mladen Turk <mt...@apache.org>.
On 03/26/2012 11:46 PM, Rainer Jung wrote:
> Hi Mladen,
>
>>
>> for (i = 0; i< p->num_of_workers; i++) {
>> lb_sub_worker_t *w =&p->lb_workers[i];
>> - if (w->sequence != w->s->h.sequence) {
>> + if (w->sequence< w->s->h.sequence) {
>
> I think this one is wrong. It is inside push not pull, so it should be if (local > shared) and not "<".
>

... continue from first mail

Inside lb_worker::init we have

1787: p->sequence++;
1788: jk_lb_push(p, JK_TRUE, log);

Now, this is completely wrong.
It will always set sequence to 1, so in case worker process
gets recycled we have things out of sync.
Anyhow why push at init stage?



... next inside lb_push we have

393: p->s->h.sequence = p->sequence;

Now this should actually be
p->s->h.sequence++;

The shm sequence can be generations ahead of heap sequence number.

Anyhow, like said, *->s->h.sequence should be updated
only when someone explicitly clicks on the jkstatus page.
Other then that I see no reason for its update.
All other runtime params are intrinsic and volatile should
guarantee its consistency.


Regards
-- 
^TM

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: svn commit: r1302479 - /tomcat/jk/trunk/native/common/jk_lb_worker.c

Posted by Mladen Turk <mt...@apache.org>.
On 03/26/2012 11:46 PM, Rainer Jung wrote:
> Hi Mladen,
>
>>
>> for (i = 0; i< p->num_of_workers; i++) {
>> lb_sub_worker_t *w =&p->lb_workers[i];
>> - if (w->sequence != w->s->h.sequence) {
>> + if (w->sequence< w->s->h.sequence) {
>
> I think this one is wrong. It is inside push not pull, so it should be if (local > shared) and not "<".
>

Might be.
The real question is why we need a difference in sequences for a push?
IMO if the push is needed it should be unconditional, and heap
sequence number should be updated *only* on pull (never directly)

>
> The other changes in this commit look right.
>

I'm not sure that's the case :)

Think that the entire sequence push/pull should be
reviewed. IMO the only reason for it is to lower the
number of lock/unlock calls.

The reason for this I see only in case of explicit
batch update trough status worker. All other operations
should just depend on volatile shared mem parameters
which should ensure enough atomicity.

Also we should apply the check in this way

if (w->sequence < w->s->h.sequence) {
    jk_shm_lock();
    /* Now check the value again after we obtained a lock */
    if (w->sequence < w->s->h.sequence) {

    }
}


The reason for IIS crash is the fact that we can  have
N processes updating the shared memory (no parent process,
so no one that would set initial values).
Lock is done per worker basis, and this means that one worker
can update the sequence, other will see it as next generation
although the first one was not done updating all workers from
config and then you have ka-boom.

This makes the entire sequence based push/pull conceptually
wrong, and we need to think of something else.


Regards
-- 
^TM

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Re: svn commit: r1302479 - /tomcat/jk/trunk/native/common/jk_lb_worker.c

Posted by Rainer Jung <ra...@kippdata.de>.
Hi Mladen,

On 19.03.2012 15:40, mturk@apache.org wrote:
> Author: mturk
> Date: Mon Mar 19 14:40:15 2012
> New Revision: 1302479
>
> URL: http://svn.apache.org/viewvc?rev=1302479&view=rev
> Log:
> Make sure we pull only if the sequence is above us
>
> Modified:
>      tomcat/jk/trunk/native/common/jk_lb_worker.c
>
> Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c
> URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1302479&r1=1302478&r2=1302479&view=diff
> ==============================================================================
> --- tomcat/jk/trunk/native/common/jk_lb_worker.c (original)
> +++ tomcat/jk/trunk/native/common/jk_lb_worker.c Mon Mar 19 14:40:15 2012

...

> @@ -365,13 +365,12 @@ void jk_lb_push(lb_worker_t *p, int lock
>       p->s->lbmethod = p->lbmethod;
>       p->s->lblock = p->lblock;
>       p->s->max_packet_size = p->max_packet_size;
> -    p->s->h.sequence = p->sequence;
>       strncpy(p->s->session_cookie, p->session_cookie, JK_SHM_STR_SIZ);
>       strncpy(p->s->session_path, p->session_path, JK_SHM_STR_SIZ);
>
>       for (i = 0; i<  p->num_of_workers; i++) {
>           lb_sub_worker_t *w =&p->lb_workers[i];
> -        if (w->sequence != w->s->h.sequence) {
> +        if (w->sequence<  w->s->h.sequence) {

I think this one is wrong. It is inside push not pull, so it should be 
if (local > shared) and not "<".

>               jk_worker_t *jw = w->worker;
>               ajp_worker_t *aw = (ajp_worker_t *)jw->worker_private;

The other changes in this commit look right.

Regards,

Rainer


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org