You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by mi...@apache.org on 2015/03/15 08:54:59 UTC

svn commit: r1666784 - in /zookeeper/trunk: CHANGES.txt src/java/main/org/apache/zookeeper/server/quorum/Learner.java src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java

Author: michim
Date: Sun Mar 15 07:54:59 2015
New Revision: 1666784

URL: http://svn.apache.org/r1666784
Log:
ZOOKEEPER-1865 Fix retry logic in Learner.connectToLeader() (Edward Carter via michim)

Modified:
    zookeeper/trunk/CHANGES.txt
    zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
    zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java

Modified: zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/trunk/CHANGES.txt?rev=1666784&r1=1666783&r2=1666784&view=diff
==============================================================================
--- zookeeper/trunk/CHANGES.txt (original)
+++ zookeeper/trunk/CHANGES.txt Sun Mar 15 07:54:59 2015
@@ -48,9 +48,12 @@ BUGFIXES:
   (Michi Mutsuzaki via rakeshr)
 
   ZOOKEEPER-2137 Make testPortChange() less flaky (Hongchao Deng via michim)
-  
+
   ZOOKEEPER-1893. automake: use serial-tests option (michim via camille)
 
+  ZOOKEEPER-1865 Fix retry logic in Learner.connectToLeader() (Edward Carter
+  via michim)
+
 IMPROVEMENTS:
   ZOOKEEPER-1660 Documentation for Dynamic Reconfiguration (Reed Wanderman-Milne via shralex)  
 

Modified: zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java?rev=1666784&r1=1666783&r2=1666784&view=diff
==============================================================================
--- zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java (original)
+++ zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Learner.java Sun Mar 15 07:54:59 2015
@@ -214,10 +214,27 @@ public class Learner {
         }
         return addr;
     }
-    
+   
+    /**
+     * Overridable helper method to return the System.nanoTime().
+     * This method behaves identical to System.nanoTime().
+     */
+    protected long nanoTime() {
+        return System.nanoTime();
+    }
+
+    /**
+     * Overridable helper method to simply call sock.connect(). This can be
+     * overriden in tests to fake connection success/failure for connectToLeader. 
+     */
+    protected void sockConnect(Socket sock, InetSocketAddress addr, int timeout) 
+    throws IOException {
+        sock.connect(addr, timeout);
+    }
+
     /**
      * Establish a connection with the Leader found by findLeader. Retries
-     * 5 times before giving up. 
+     * until either initLimit time has elapsed or 5 tries have happened. 
      * @param addr - the address of the Leader to connect to.
      * @throws IOException - if the socket connection fails on the 5th attempt
      * @throws ConnectException
@@ -227,17 +244,39 @@ public class Learner {
     throws IOException, ConnectException, InterruptedException {
         sock = new Socket();        
         sock.setSoTimeout(self.tickTime * self.initLimit);
+
+        int initLimitTime = self.tickTime * self.initLimit;
+        int remainingInitLimitTime = initLimitTime;
+        long startNanoTime = nanoTime();
+
         for (int tries = 0; tries < 5; tries++) {
             try {
-                sock.connect(addr, self.tickTime * self.syncLimit);
+                // recalculate the init limit time because retries sleep for 1000 milliseconds
+                remainingInitLimitTime = initLimitTime - (int)((nanoTime() - startNanoTime) / 1000000);
+                if (remainingInitLimitTime <= 0) {
+                    LOG.error("initLimit exceeded on retries.");
+                    throw new IOException("initLimit exceeded on retries.");
+                }
+
+                sockConnect(sock, addr, Math.min(self.tickTime * self.syncLimit, remainingInitLimitTime));
                 sock.setTcpNoDelay(nodelay);
                 break;
             } catch (IOException e) {
-                if (tries == 4) {
-                    LOG.error("Unexpected exception",e);
+                remainingInitLimitTime = initLimitTime - (int)((nanoTime() - startNanoTime) / 1000000);
+
+                if (remainingInitLimitTime <= 1000) {
+                    LOG.error("Unexpected exception, initLimit exceeded. tries=" + tries +
+                             ", remaining init limit=" + remainingInitLimitTime +
+                             ", connecting to " + addr,e);
+                    throw e;
+                } else if (tries >= 4) {
+                    LOG.error("Unexpected exception, retries exceeded. tries=" + tries +
+                             ", remaining init limit=" + remainingInitLimitTime +
+                             ", connecting to " + addr,e);
                     throw e;
                 } else {
-                    LOG.warn("Unexpected exception, tries="+tries+
+                    LOG.warn("Unexpected exception, tries=" + tries +
+                            ", remaining init limit=" + remainingInitLimitTime +
                             ", connecting to " + addr,e);
                     sock = new Socket();
                     sock.setSoTimeout(self.tickTime * self.initLimit);

Modified: zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java
URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java?rev=1666784&r1=1666783&r2=1666784&view=diff
==============================================================================
--- zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java (original)
+++ zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/LearnerTest.java Sun Mar 15 07:54:59 2015
@@ -24,6 +24,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.EOFException;
 import java.io.File;
 import java.io.IOException;
+import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.util.ArrayList;
 
@@ -77,6 +78,77 @@ public class LearnerTest extends ZKTestC
         }
     }
 
+    static class TimeoutLearner extends Learner {
+        int passSocketConnectOnAttempt = 10;
+        int socketConnectAttempt = 0;
+        long timeMultiplier = 0;
+
+        public void setTimeMultiplier(long multiplier) {
+            timeMultiplier = multiplier;
+        }
+
+        public void setPassConnectAttempt(int num) {
+            passSocketConnectOnAttempt = num;
+        }
+
+        protected long nanoTime() {
+            return socketConnectAttempt * timeMultiplier;
+        }
+        
+        protected int getSockConnectAttempt() {
+            return socketConnectAttempt;
+        }
+
+        @Override
+        protected void sockConnect(Socket sock, InetSocketAddress addr, int timeout) 
+        throws IOException {
+            if (++socketConnectAttempt < passSocketConnectOnAttempt)    {
+                throw new IOException("Test injected Socket.connect() error.");
+            }
+        }
+    }
+
+    @Test(expected=IOException.class)
+    public void connectionRetryTimeoutTest() throws Exception {
+        Learner learner = new TimeoutLearner();
+        learner.self = new QuorumPeer();
+        learner.self.setTickTime(2000);
+        learner.self.setInitLimit(5);
+        learner.self.setSyncLimit(2);
+
+        // this addr won't even be used since we fake the Socket.connect
+        InetSocketAddress addr = new InetSocketAddress(1111);
+
+        // we expect this to throw an IOException since we're faking socket connect errors every time
+        learner.connectToLeader(addr);
+    }
+    @Test
+    public void connectionInitLimitTimeoutTest() throws Exception {
+        TimeoutLearner learner = new TimeoutLearner();
+        learner.self = new QuorumPeer();
+        learner.self.setTickTime(2000);
+        learner.self.setInitLimit(5);
+        learner.self.setSyncLimit(2);
+
+        // this addr won't even be used since we fake the Socket.connect
+        InetSocketAddress addr = new InetSocketAddress(1111);
+        
+        // pretend each connect attempt takes 4000 milliseconds
+        learner.setTimeMultiplier((long)4000 * 1000000);
+        
+        learner.setPassConnectAttempt(5);
+
+        // we expect this to throw an IOException since we're faking socket connect errors every time
+        try {
+            learner.connectToLeader(addr);
+            Assert.fail("should have thrown IOException!");
+        } catch (IOException e) {
+            //good, wanted to see that, let's make sure we ran out of time
+            Assert.assertTrue(learner.nanoTime() > 2000*5*1000000);
+            Assert.assertEquals(3, learner.getSockConnectAttempt());
+        }
+    }
+
     @Test
     public void syncTest() throws Exception {
         File tmpFile = File.createTempFile("test", ".dir", testData);