You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ka...@apache.org on 2023/03/21 09:15:43 UTC

[druid] branch master updated: Make zk connection retries configurable (#13913)

This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 617c325c70 Make zk connection retries configurable (#13913)
617c325c70 is described below

commit 617c325c70bf33e4e9e7ae8016c8f2b777869526
Author: Atul Mohan <at...@gmail.com>
AuthorDate: Tue Mar 21 02:15:28 2023 -0700

    Make zk connection retries configurable (#13913)
    
    * This makes the zookeeper connection retry count configurable. This is presently hardcoded to 29 tries which ends up taking a long time for the druid node to shutdown in case of ZK connectivity loss.
    Having a shorter retry count helps k8s deployments to fail fast. In situations where the underlying k8s node loses network connectivity or is no longer able to talk to zookeeper, failing fast can trigger pod restarts which can then reassign the pod to a healthy k8s node.
    Existing behavior is preserved, but users can override this property if needed.
---
 .../src/main/java/org/apache/druid/curator/CuratorConfig.java  | 10 ++++++++++
 .../src/main/java/org/apache/druid/curator/CuratorModule.java  |  3 +--
 .../test/java/org/apache/druid/curator/CuratorConfigTest.java  |  3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/server/src/main/java/org/apache/druid/curator/CuratorConfig.java b/server/src/main/java/org/apache/druid/curator/CuratorConfig.java
index 68ce0812de..7a53ee941d 100644
--- a/server/src/main/java/org/apache/druid/curator/CuratorConfig.java
+++ b/server/src/main/java/org/apache/druid/curator/CuratorConfig.java
@@ -58,6 +58,11 @@ public class CuratorConfig
   @JsonProperty("authScheme")
   private String authScheme = "digest";
 
+  // Configures the maximum number of retries for attempting connection to Zookeeper.
+  // Smaller retry counts helps nodes to fail fast in case of ZK connection loss.
+  @JsonProperty("maxZkRetries")
+  private int maxZkRetries = 29;
+
   public static CuratorConfig create(String hosts)
   {
     CuratorConfig config = new CuratorConfig();
@@ -131,4 +136,9 @@ public class CuratorConfig
   {
     return authScheme;
   }
+
+  public int getMaxZkRetries()
+  {
+    return maxZkRetries;
+  }
 }
diff --git a/server/src/main/java/org/apache/druid/curator/CuratorModule.java b/server/src/main/java/org/apache/druid/curator/CuratorModule.java
index 07c0ad8478..201c96bfdf 100644
--- a/server/src/main/java/org/apache/druid/curator/CuratorModule.java
+++ b/server/src/main/java/org/apache/druid/curator/CuratorModule.java
@@ -49,7 +49,6 @@ public class CuratorModule implements Module
 
   static final int BASE_SLEEP_TIME_MS = 1000;
   static final int MAX_SLEEP_TIME_MS = 45000;
-  private static final int MAX_RETRIES = 29;
 
   private final boolean haltOnFailedStart;
 
@@ -89,7 +88,7 @@ public class CuratorModule implements Module
       );
     }
 
-    RetryPolicy retryPolicy = new BoundedExponentialBackoffRetry(BASE_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS, MAX_RETRIES);
+    RetryPolicy retryPolicy = new BoundedExponentialBackoffRetry(BASE_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS, config.getMaxZkRetries());
 
     return builder
         .ensembleProvider(new FixedEnsembleProvider(config.getZkHosts()))
diff --git a/server/src/test/java/org/apache/druid/curator/CuratorConfigTest.java b/server/src/test/java/org/apache/druid/curator/CuratorConfigTest.java
index b3719d85d1..ef4d4fc354 100644
--- a/server/src/test/java/org/apache/druid/curator/CuratorConfigTest.java
+++ b/server/src/test/java/org/apache/druid/curator/CuratorConfigTest.java
@@ -33,6 +33,7 @@ public class CuratorConfigTest extends JsonConfigTesterBase<CuratorConfig>
     propertyValues.put(getPropertyKey("user"), "test-zk-user");
     propertyValues.put(getPropertyKey("pwd"), "test-zk-pwd");
     propertyValues.put(getPropertyKey("authScheme"), "auth");
+    propertyValues.put(getPropertyKey("maxZkRetries"), "20");
     testProperties.putAll(propertyValues);
     configProvider.inject(testProperties, configurator);
     CuratorConfig config = configProvider.get().get();
@@ -41,6 +42,7 @@ public class CuratorConfigTest extends JsonConfigTesterBase<CuratorConfig>
     Assert.assertEquals("test-zk-user", config.getZkUser());
     Assert.assertEquals("test-zk-pwd", config.getZkPwd());
     Assert.assertEquals("auth", config.getAuthScheme());
+    Assert.assertEquals(20, config.getMaxZkRetries());
   }
 
   @Test
@@ -51,5 +53,6 @@ public class CuratorConfigTest extends JsonConfigTesterBase<CuratorConfig>
     Assert.assertEquals(false, config.getEnableAcl());
     Assert.assertNull(config.getZkUser());
     Assert.assertEquals("digest", config.getAuthScheme());
+    Assert.assertEquals(29, config.getMaxZkRetries());
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org