You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by an...@apache.org on 2019/07/31 09:04:18 UTC

[zookeeper] branch master updated: Revert "ZOOKEEPER-3320: Leader election port stop listen when hostname unresolvable for some time"

This is an automated email from the ASF dual-hosted git repository.

andor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zookeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new a89c094  Revert "ZOOKEEPER-3320: Leader election port stop listen when hostname unresolvable for some time"
a89c094 is described below

commit a89c0942e45bb16e5282eee9d3a56ebbddbaae15
Author: Andor Molnar <an...@apache.org>
AuthorDate: Wed Jul 31 11:02:00 2019 +0200

    Revert "ZOOKEEPER-3320: Leader election port stop listen when hostname unresolvable for some time"
    
    This reverts commit 05ee9413e7a31703395b81fb8d72baf1cb09a46d.
---
 .../src/main/resources/markdown/zookeeperAdmin.md  | 12 ------
 .../zookeeper/server/quorum/QuorumCnxManager.java  | 46 ++++++----------------
 .../zookeeper/server/quorum/CnxManagerTest.java    | 29 --------------
 3 files changed, 13 insertions(+), 74 deletions(-)

diff --git a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
index 835e357..5c71832 100644
--- a/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
+++ b/zookeeper-docs/src/main/resources/markdown/zookeeperAdmin.md
@@ -1076,18 +1076,6 @@ As an example, this will enable all four letter word commands:
     properly, check your operating system's options regarding TCP
     keepalive for more information.  Defaults to
     **false**.
-    
-* *zookeeper.electionPortBindRetry* :
-    (Java system property only: **zookeeper.electionPortBindRetry**)
-    Property set max retry count when Zookeeper server fails to bind 
-    leader election port. Such errors can be temporary and recoverable, 
-    such as DNS issue described in [ZOOKEEPER-3320](https://issues.apache.org/jira/projects/ZOOKEEPER/issues/ZOOKEEPER-3320),
-    or non-retryable, such as port already in use.  
-    In case of transient errors, this property can improve availability 
-    of Zookeeper server and help it to self recover. 
-    Default value 3. In container environment, especially in Kubernetes, 
-    this value should be increased to overcome issues related to DNS name resolving.
-    
 
 * *observer.reconnectDelayMs* :
     (Java system property: **zookeeper.observer.reconnectDelayMs**)
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java
index 4be8fa6..d97da2a 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/QuorumCnxManager.java
@@ -18,8 +18,6 @@
 
 package org.apache.zookeeper.server.quorum;
 
-import static org.apache.zookeeper.common.NetUtils.formatInetAddr;
-
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
@@ -38,7 +36,6 @@ import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Set;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;
@@ -46,20 +43,24 @@ import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import java.util.NoSuchElementException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
-import javax.net.ssl.SSLSocket;
+
 import org.apache.zookeeper.common.X509Exception;
 import org.apache.zookeeper.server.ExitCode;
-import org.apache.zookeeper.server.ZooKeeperThread;
 import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException;
+import org.apache.zookeeper.server.util.ConfigUtils;
+import org.apache.zookeeper.server.ZooKeeperThread;
 import org.apache.zookeeper.server.quorum.auth.QuorumAuthLearner;
 import org.apache.zookeeper.server.quorum.auth.QuorumAuthServer;
 import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier;
-import org.apache.zookeeper.server.util.ConfigUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.net.ssl.SSLSocket;
+import static org.apache.zookeeper.common.NetUtils.formatInetAddr;
+
 /**
  * This class implements a connection manager for leader election using TCP. It
  * maintains one connection for every pair of servers. The tricky part is to
@@ -847,30 +848,12 @@ public class QuorumCnxManager {
      */
     public class Listener extends ZooKeeperThread {
 
-        private static final String ELECTION_PORT_BIND_RETRY = "zookeeper.electionPortBindRetry";
-        private static final int DEFAULT_PORT_BIND_MAX_RETRY = 3;
-
-        private final int portBindMaxRetry;
         volatile ServerSocket ss = null;
 
         public Listener() {
             // During startup of thread, thread name will be overridden to
             // specific election address
             super("ListenerThread");
-
-            // maximum retry count while trying to bind to election port
-            // see ZOOKEEPER-3320 for more details
-            final Integer maxRetry = Integer.getInteger(ELECTION_PORT_BIND_RETRY,
-                                                        DEFAULT_PORT_BIND_MAX_RETRY);
-            if (maxRetry >= 0) {
-                LOG.info("Election port bind maximum retries is {}", maxRetry);
-                portBindMaxRetry = maxRetry;
-            } else {
-                LOG.info("'{}' contains invalid value: {}(must be >= 0). "
-                         + "Use default value of {} instead.",
-                         ELECTION_PORT_BIND_RETRY, maxRetry, DEFAULT_PORT_BIND_MAX_RETRY);
-                portBindMaxRetry = DEFAULT_PORT_BIND_MAX_RETRY;
-            }
         }
 
         /**
@@ -882,7 +865,7 @@ public class QuorumCnxManager {
             InetSocketAddress addr;
             Socket client = null;
             Exception exitException = null;
-            while((!shutdown) && (numRetries < portBindMaxRetry)){
+            while((!shutdown) && (numRetries < 3)){
                 try {
                     if (self.shouldUsePortUnification()) {
                         LOG.info("Creating TLS-enabled quorum server socket");
@@ -952,14 +935,11 @@ public class QuorumCnxManager {
             }
             LOG.info("Leaving listener");
             if (!shutdown) {
-                LOG.error("As I'm leaving the listener thread after "
-                          + numRetries + " errors. "
-                          + "I won't be able to participate in leader "
-                          + "election any longer: "
-                          + formatInetAddr(self.getElectionAddress())
-                          + ". Use " + ELECTION_PORT_BIND_RETRY + " property to "
-                          + "increase retry count.");
-                if (exitException instanceof SocketException) {
+                LOG.error("As I'm leaving the listener thread, "
+                        + "I won't be able to participate in leader "
+                        + "election any longer: "
+                        + formatInetAddr(self.getElectionAddress()));
+                if (exitException instanceof BindException) {
                     // After leaving listener thread, the host cannot join the
                     // quorum anymore, this is a severe error that we cannot
                     // recover from, so we need to exit
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java
index 200ed99..878e41b 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/CnxManagerTest.java
@@ -291,35 +291,6 @@ public class CnxManagerTest extends ZKTestCase {
     }
 
     /**
-     * Test for bug described in {@link https://issues.apache.org/jira/browse/ZOOKEEPER-3320}.
-     * Test create peer with address which contains unresolvable DNS name,
-     * leader election listener thread should stop after N errors.
-     *
-     * @throws Exception
-     */
-    @Test
-    public void testCnxManagerListenerThreadConfigurableRetry() throws Exception {
-        final Map<Long,QuorumServer> unresolvablePeers = new HashMap<>();
-        final long myid = 1L;
-        unresolvablePeers.put(myid, new QuorumServer(myid, "unresolvable-domain.org:2182:2183;2181"));
-        final QuorumPeer peer = new QuorumPeer(unresolvablePeers,
-                                               ClientBase.createTmpDir(),
-                                               ClientBase.createTmpDir(),
-                                               2181, 3, myid, 1000, 2, 2, 2);
-        final QuorumCnxManager cnxManager = peer.createCnxnManager();
-        QuorumCnxManager.Listener listener = cnxManager.listener;
-        listener.start();
-        // listener thread should stop and throws error which notify QuorumPeer about error.
-        // QuorumPeer should start shutdown process
-        listener.join(15000); // set wait time, if listener contains bug and thread not stops.
-        Assert.assertFalse(listener.isAlive());
-        Assert.assertFalse(peer.isRunning());
-        peer.join(15000);
-        Assert.assertFalse(QuorumPeer.class.getSimpleName() + " not stopped after "
-                           + "listener thread death", listener.isAlive());
-    }
-
-    /**
      * Tests a bug in QuorumCnxManager that causes a NPE when a 3.4.6
      * observer connects to a 3.5.0 server. 
      * see https://issues.apache.org/jira/browse/ZOOKEEPER-1789