You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by fp...@apache.org on 2015/09/04 01:00:20 UTC

svn commit: r1701146 - in /zookeeper/branches/branch-3.4: CHANGES.txt src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java

Author: fpj
Date: Thu Sep  3 23:00:20 2015
New Revision: 1701146

URL: http://svn.apache.org/r1701146
Log:
ZOOKEEPER-2033: zookeeper follower fails to start after a restart immediately following a new epoch (Asad Saeed via fpj)

Modified:
    zookeeper/branches/branch-3.4/CHANGES.txt
    zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java
    zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java

Modified: zookeeper/branches/branch-3.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/CHANGES.txt?rev=1701146&r1=1701145&r2=1701146&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/CHANGES.txt (original)
+++ zookeeper/branches/branch-3.4/CHANGES.txt Thu Sep  3 23:00:20 2015
@@ -111,6 +111,9 @@ BUGFIXES:
   from zoo.cfg on Solaris 10 (grep issue, manifests as FAILED TO WRITE PID)
   (Chris Nauroth via rgs)
 
+  ZOOKEEPER-2033: zookeeper follower fails to start after
+  a restart immediately following a new epoch (Asad Saeed via fpj)
+
 IMPROVEMENTS:
 
   ZOOKEEPER-1575. adding .gitattributes to prevent CRLF and LF mismatches for

Modified: zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java?rev=1701146&r1=1701145&r2=1701146&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java (original)
+++ zookeeper/branches/branch-3.4/src/java/main/org/apache/zookeeper/server/quorum/LearnerHandler.java Thu Sep  3 23:00:20 2015
@@ -388,7 +388,13 @@ public class LearnerHandler extends ZooK
 
                 LinkedList<Proposal> proposals = leader.zk.getZKDatabase().getCommittedLog();
 
-                if (proposals.size() != 0) {
+                if (peerLastZxid == leader.zk.getZKDatabase().getDataTreeLastProcessedZxid()) {
+                    // Follower is already sync with us, send empty diff
+                    LOG.info("leader and follower are in sync, zxid=0x{}",
+                            Long.toHexString(peerLastZxid));
+                    packetToSend = Leader.DIFF;
+                    zxidToSend = peerLastZxid;
+                } else if (proposals.size() != 0) {
                     LOG.debug("proposal size is {}", proposals.size());
                     if ((maxCommittedLog >= peerLastZxid)
                             && (minCommittedLog <= peerLastZxid)) {
@@ -444,15 +450,6 @@ public class LearnerHandler extends ZooK
                     } else {
                         LOG.warn("Unhandled proposal scenario");
                     }
-                } else if (peerLastZxid == leader.zk.getZKDatabase().getDataTreeLastProcessedZxid()) {
-                    // The leader may recently take a snapshot, so the committedLog
-                    // is empty. We don't need to send snapshot if the follow
-                    // is already sync with in-memory db.
-                    LOG.debug("committedLog is empty but leader and follower "
-                            + "are in sync, zxid=0x{}",
-                            Long.toHexString(peerLastZxid));
-                    packetToSend = Leader.DIFF;
-                    zxidToSend = peerLastZxid;
                 } else {
                     // just let the state transfer happen
                     LOG.debug("proposals is empty");

Modified: zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java?rev=1701146&r1=1701145&r2=1701146&view=diff
==============================================================================
--- zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java (original)
+++ zookeeper/branches/branch-3.4/src/java/test/org/apache/zookeeper/server/quorum/QuorumPeerMainTest.java Thu Sep  3 23:00:20 2015
@@ -795,4 +795,55 @@ public class QuorumPeerMainTest extends
         Assert.assertFalse("updatingEpoch file should get deleted",
                            updatingEpochFile.exists());
     }
+
+    @Test
+    public void testNewFollowerRestartAfterNewEpoch() throws Exception {
+        numServers = 3;
+
+        servers = LaunchServers(numServers);
+        waitForAll(servers.zk, States.CONNECTED);
+        String inputString = "test";
+        byte[] input = inputString.getBytes();
+        byte[] output;
+        String path = "/newepochzxidtest";
+
+        // Create a couple of nodes
+        servers.zk[0].create(path, input, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+        servers.zk[0].setData(path, input, -1);
+
+        // make sure the updates indeed committed. If it is not
+        // the following statement will throw.
+        output = servers.zk[1].getData(path, false, null);
+
+        // Shutdown every one
+        for (int i=0; i < numServers; i++) {
+            servers.mt[i].shutdown();
+        }
+
+        LOG.info("resetting follower");
+        MainThread follower = servers.mt[0];
+        // delete followers information
+        File followerDataDir = new File(follower.dataDir, "version-2");
+        for(File file: followerDataDir.listFiles()) {
+            LOG.info("deleting " + file.getName());
+            file.delete();
+        }
+
+        // Startup everyone except follower, wait for election.
+        for (int i=1; i < numServers; i++) {
+            servers.mt[i].start();
+        }
+        for (int i=1; i < numServers; i++) {
+            waitForOne(servers.zk[i], States.CONNECTED);
+        }
+
+        follower.start();
+        waitForAll(servers.zk, States.CONNECTED); // snapshot should be recieved
+
+        follower.shutdown();
+        follower.start();
+
+        Assert.assertFalse(follower.mainFailed.await(10, TimeUnit.SECONDS));
+        waitForAll(servers.zk, States.CONNECTED);
+    }
 }