You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@zookeeper.apache.org by ashitha velayudhan <ve...@gmail.com> on 2019/10/18 19:30:09 UTC

zookeeper_close: Does not makes sure that close session is sent

Hi Everyone,

I have been hitting this issue
https://issues.apache.org/jira/browse/ZOOKEEPER-1105 in
zookeeper-3.5.1-alpha. This was causing ephermeral nodes to linger even
after application had exited. The application does a zookeeper_close()
followed by a quick_exit(0). The lingering ephemeral nodes had some
unintended side effects and I got the ephemeral node deletion callback only
after the session expired due to timeout. The timeout is 180 seconds for
me.

I added the below fix and it is working as intended. I noticed that a
similar fix was committed to address ZOOKEEPER-1105 and then it was
reverted. Why was it reverted? Is there any side effects to the below
changes?

diff --git a/zookeeper-3.5.1-alpha/src/c/src/zookeeper.c
b/zookeeper-3.5.1-alpha_fix/src/c/src/zookeeper.c
index cbf55c7..fe874ec 100644
--- a/zookeeper-3.5.1-alpha/src/c/src/zookeeper.c
+++ b/zookeeper-3.5.1-alpha_fix/src/c/src/zookeeper.c
@@ -3250,6 +3250,7 @@ int zookeeper_close(zhandle_t *zh)
     if (is_connected(zh)){
         struct oarchive *oa;
         struct RequestHeader h = {get_xid(), ZOO_CLOSE_OP};
+        struct timeval started;
         LOG_INFO(LOGCALLBACK(zh), "Closing zookeeper sessionId=%#llx to
[%s]\n",
                 zh->client_id.client_id,zoo_get_current_server(zh));
         oa = create_buffer_oarchive();
@@ -3266,6 +3267,45 @@ int zookeeper_close(zhandle_t *zh)
         /* make sure the close request is sent; we set timeout to an
arbitrary
          * (but reasonable) number of milliseconds since we want the call
to block*/
         rc=adaptor_send_queue(zh, 3000);
+        get_system_time(&started);
+        while(1) {
+            /* Make sure that zookeeper closeSession request is sent to
the server.
+             * To assure that all data is sent to server, wait until a
read returns <= 0
+             * This indicates that the server has closed the socket. Or
return a timeout.
+             * */
+            fd_set rfds;
+            struct timeval waittime = {0, 50000};
+            int ret;
+            int elapsed;
+            int timeout_msecs = 1000;
+            struct timeval now;
+            get_system_time(&now);
+            elapsed=calculate_interval(&started,&now);
+            if (elapsed>timeout_msecs) {
+                rc = ZOPERATIONTIMEOUT;
+                break;
+            }
+            FD_ZERO(&rfds);
+            FD_SET( zh->fd , &rfds);
+            ret = select(zh->fd+1, &rfds, NULL, NULL, &waittime);
+
+            if (ret<=0) {
+                /* timed out or an error or POLLERR */
+                LOG_DEBUG(LOGCALLBACK(zh),"Timeout or System error when
waiting"
+                        " for server's reply after sending a close
request, "
+                        "sessionId=%#lx\n", zh->client_id.client_id);
+                rc = ret==0 ? ZOPERATIONTIMEOUT : ZSYSTEMERROR;
+            } else {
+                char buf[1000];
+                int ret = read(zh->fd, buf, sizeof(buf));
+                if (ret<=0) {
+                    LOG_DEBUG(LOGCALLBACK(zh),"Read error %d, errno %d
sessionId=%#lx\n",
+                            ret, errno, zh->client_id.client_id);
+                    rc = ZOK;
+                    break;
+                }
+            }
+        }
     }else{
         LOG_INFO(LOGCALLBACK(zh), "Freeing zookeeper resources for
sessionId=%#llx\n",
                 zh->client_id.client_id);

Thanks
Ashitha