You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by xy...@apache.org on 2023/04/27 20:36:44 UTC

[helix] 35/37: Add comment and description for ZkMetaClient reconnect handling (#2443)

This is an automated email from the ASF dual-hosted git repository.

xyuanlu pushed a commit to branch metaclient
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 9af5dc99ffa4474baec1a104848536b3edb15467
Author: xyuanlu <xy...@gmail.com>
AuthorDate: Thu Apr 13 17:34:55 2023 -0700

    Add comment and description for ZkMetaClient reconnect handling (#2443)
    
    Add comment and description for ZkMetaClient reconnect handling
---
 .../helix/metaclient/impl/zk/ZkMetaClient.java     | 59 ++++++++++++++++++++--
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java b/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
index af31423d0..984766254 100644
--- a/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
+++ b/meta-client/src/main/java/org/apache/helix/metaclient/impl/zk/ZkMetaClient.java
@@ -449,12 +449,63 @@ public class ZkMetaClient<T> implements MetaClientInterface<T>, AutoCloseable {
     }
   }
 
+  /**
+   * MetaClient uses Helix ZkClient (@see org.apache.helix.zookeeper.impl.client.ZkClient) to connect
+   * to ZK. Current implementation of ZkClient auto-reconnects infinitely. We use monitor thread
+   * in ZkMetaClient to monitor reconnect status and close ZkClient when the client still is in
+   * disconnected state when it reach reconnect timeout.
+   *
+   *
+   * case 1: Start the monitor thread when ZkMetaClient gets disconnected even to check connect state
+   *         when timeout reached. If not re-connected when timed out, kill the monitor thread
+   *         and close ZkClient.
+   * [MetaClient thread]        ---------------------------------------------------------------
+   *                              ( When disconnected, schedule a event
+   *                              to check connect state after timeout)
+   * [Reconnect monitor thread]          --------------------------------------
+   *                                   ^                                     |  not reconnected when timed out
+   *                                  /                                      |
+   *                                 | disconnected event                    v
+   * [ZkClient]               -------X---------------------------------------X zkClient.close()
+   * [ZkClient exp back              |         X            X
+   *  -off retry connection]         |--------|--------------|--------------
+   *
+   *
+   * case 2: Start the monitor thread when ZkMetaClient gets disconnected even to check connect state
+   *         when timeout reached. If re-connected before timed out, cancel the delayed monitor thread.
+   *
+   * [MetaClient thread]       ---------------------------------------------------------------
+   *                            (cancel scheduled task when reconnected)
+   * [Reconnect monitor]               ---------------------------------X
+   *                                  ^                                ^
+   *                                 /                                /
+   *                                | disconnected event             |  reconnected event
+   * [ZkClient]                -----X------------------------------------------------------
+   * [ZkClient exp back             |        X                      Y  Reconnected before timed out
+   *  -off retry connection]        |--------| ---------------------|
+   *
+   *
+   * case 3: Start the monitor thread when ZkMetaClient gets disconnected even to check connect state
+   *         when timeout reached. If re-connected errored, kill the monitor thread  and cancel the
+   *         delayed monitor thread.
+   * [MetaClient thread]       ---------------------------------------------------------------
+   *                          (cancel scheduled task and close ZkClient when reconnected error)
+   * [Reconnect monitor]              ----------------------------------X
+   *                                 ^                               ^  |
+   *                                /                           err /   |
+   *                               | disconnected event            |    v close ZkClient
+   * [ZkClient]               -----X-------------------------------X ---X
+   * [ZkClient exp back            |        X                     ^ Reconnect error
+   *  -off retry connection]       |--------| --------------------|
+   *
+   */
+
   private class ReconnectStateChangeListener implements IZkStateListener {
     // Schedule a monitor to track ZkClient auto reconnect when Disconnected
     // Cancel the monitor thread when connected.
     @Override
     public void handleStateChanged(Watcher.Event.KeeperState state) throws Exception {
-      if (state == Watcher.Event.KeeperState.Disconnected) {
+      if (state == Watcher.Event.KeeperState.Disconnected) {                        // ------case 1
         // Expired. start a new event monitoring retry
         _zkClientConnectionMutex.lockInterruptibly();
         try {
@@ -472,7 +523,7 @@ public class ZkMetaClient<T> implements MetaClientInterface<T>, AutoCloseable {
           _zkClientConnectionMutex.unlock();
         }
       } else if (state == Watcher.Event.KeeperState.SyncConnected
-          || state == Watcher.Event.KeeperState.ConnectedReadOnly) {
+          || state == Watcher.Event.KeeperState.ConnectedReadOnly) {               // ------ case 2
         cleanUpAndClose(true, false);
         LOG.info("ZkClient is SyncConnected, reconnect monitor thread is canceled (if any)");
       }
@@ -480,14 +531,14 @@ public class ZkMetaClient<T> implements MetaClientInterface<T>, AutoCloseable {
 
     // Cancel the monitor thread when connected.
     @Override
-    public void handleNewSession(String sessionId) throws Exception {
+    public void handleNewSession(String sessionId) throws Exception {             // ------ case 2
       cleanUpAndClose(true, false);
       LOG.info("New session initiated in ZkClient, reconnect monitor thread is canceled (if any)");
     }
 
     // Cancel the monitor thread and close ZkClient when connect error.
     @Override
-    public void handleSessionEstablishmentError(Throwable error) throws Exception {
+    public void handleSessionEstablishmentError(Throwable error) throws Exception {    // -- case 3
       cleanUpAndClose(true, true);
       LOG.info("New session initiated in ZkClient, reconnect monitor thread is canceled (if any)");
     }