You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@zookeeper.apache.org by ashitha velayudhan <ve...@gmail.com> on 2019/10/18 19:30:09 UTC
zookeeper_close: Does not makes sure that close session is sent
Hi Everyone,
I have been hitting this issue
https://issues.apache.org/jira/browse/ZOOKEEPER-1105 in
zookeeper-3.5.1-alpha. This was causing ephermeral nodes to linger even
after application had exited. The application does a zookeeper_close()
followed by a quick_exit(0). The lingering ephemeral nodes had some
unintended side effects and I got the ephemeral node deletion callback only
after the session expired due to timeout. The timeout is 180 seconds for
me.
I added the below fix and it is working as intended. I noticed that a
similar fix was committed to address ZOOKEEPER-1105 and then it was
reverted. Why was it reverted? Is there any side effects to the below
changes?
diff --git a/zookeeper-3.5.1-alpha/src/c/src/zookeeper.c
b/zookeeper-3.5.1-alpha_fix/src/c/src/zookeeper.c
index cbf55c7..fe874ec 100644
--- a/zookeeper-3.5.1-alpha/src/c/src/zookeeper.c
+++ b/zookeeper-3.5.1-alpha_fix/src/c/src/zookeeper.c
@@ -3250,6 +3250,7 @@ int zookeeper_close(zhandle_t *zh)
if (is_connected(zh)){
struct oarchive *oa;
struct RequestHeader h = {get_xid(), ZOO_CLOSE_OP};
+ struct timeval started;
LOG_INFO(LOGCALLBACK(zh), "Closing zookeeper sessionId=%#llx to
[%s]\n",
zh->client_id.client_id,zoo_get_current_server(zh));
oa = create_buffer_oarchive();
@@ -3266,6 +3267,45 @@ int zookeeper_close(zhandle_t *zh)
/* make sure the close request is sent; we set timeout to an
arbitrary
* (but reasonable) number of milliseconds since we want the call
to block*/
rc=adaptor_send_queue(zh, 3000);
+ get_system_time(&started);
+ while(1) {
+ /* Make sure that zookeeper closeSession request is sent to
the server.
+ * To assure that all data is sent to server, wait until a
read returns <= 0
+ * This indicates that the server has closed the socket. Or
return a timeout.
+ * */
+ fd_set rfds;
+ struct timeval waittime = {0, 50000};
+ int ret;
+ int elapsed;
+ int timeout_msecs = 1000;
+ struct timeval now;
+ get_system_time(&now);
+ elapsed=calculate_interval(&started,&now);
+ if (elapsed>timeout_msecs) {
+ rc = ZOPERATIONTIMEOUT;
+ break;
+ }
+ FD_ZERO(&rfds);
+ FD_SET( zh->fd , &rfds);
+ ret = select(zh->fd+1, &rfds, NULL, NULL, &waittime);
+
+ if (ret<=0) {
+ /* timed out or an error or POLLERR */
+ LOG_DEBUG(LOGCALLBACK(zh),"Timeout or System error when
waiting"
+ " for server's reply after sending a close
request, "
+ "sessionId=%#lx\n", zh->client_id.client_id);
+ rc = ret==0 ? ZOPERATIONTIMEOUT : ZSYSTEMERROR;
+ } else {
+ char buf[1000];
+ int ret = read(zh->fd, buf, sizeof(buf));
+ if (ret<=0) {
+ LOG_DEBUG(LOGCALLBACK(zh),"Read error %d, errno %d
sessionId=%#lx\n",
+ ret, errno, zh->client_id.client_id);
+ rc = ZOK;
+ break;
+ }
+ }
+ }
}else{
LOG_INFO(LOGCALLBACK(zh), "Freeing zookeeper resources for
sessionId=%#llx\n",
zh->client_id.client_id);
Thanks
Ashitha