You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ma...@apache.org on 2011/12/09 20:03:26 UTC

svn commit: r1212578 - in /zookeeper/trunk: ./ src/java/main/org/apache/zookeeper/server/quorum/ src/java/test/org/apache/zookeeper/server/quorum/ src/java/test/org/apache/zookeeper/test/

Author: mahadev
Date: Fri Dec  9 19:03:26 2011
New Revision: 1212578

URL: http://svn.apache.org/viewvc?rev=1212578&view=rev
Log:
ZOOKEEPER-1319. Missing data after restarting+expanding a cluster. (phunt and breed via mahadev)

Modified:
    zookeeper/trunk/CHANGES.txt
    zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java
    zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java
    zookeeper/trunk/src/java/test/org/apache/zookeeper/test/FollowerResyncConcurrencyTest.java

Modified: zookeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/trunk/CHANGES.txt?rev=1212578&r1=1212577&r2=1212578&view=diff
==============================================================================
--- zookeeper/trunk/CHANGES.txt (original)
+++ zookeeper/trunk/CHANGES.txt Fri Dec  9 19:03:26 2011
@@ -73,6 +73,9 @@ BUGFIXES:
   (Akira Kitada via mahadev)
 
   ZOOKEEPER-1317. Possible segfault in zookeeper_init. (Akira Kitada via mahadev)
+
+  ZOOKEEPER-1319. Missing data after restarting+expanding a cluster.
+  (phunt and breed via mahadev)
  
 IMPROVEMENTS:
 

Modified: zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java
URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java?rev=1212578&r1=1212577&r2=1212578&view=diff
==============================================================================
--- zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java (original)
+++ zookeeper/trunk/src/java/main/org/apache/zookeeper/server/quorum/Leader.java Fri Dec  9 19:03:26 2011
@@ -313,11 +313,9 @@ public class Leader {
 
             zk.setZxid(ZxidUtils.makeZxid(epoch, 0));
 
-            /*
             synchronized(this){
                 lastProposed = zk.getZxid();
             }
-            */
 
             newLeaderProposal.packet = new QuorumPacket(NEWLEADER, zk.getZxid(),
                     null, null);

Modified: zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java
URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java?rev=1212578&r1=1212577&r2=1212578&view=diff
==============================================================================
--- zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java (original)
+++ zookeeper/trunk/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java Fri Dec  9 19:03:26 2011
@@ -18,6 +18,8 @@
 
 package org.apache.zookeeper.server.quorum;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -36,8 +38,8 @@ import org.apache.jute.InputArchive;
 import org.apache.jute.OutputArchive;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooDefs;
 import org.apache.zookeeper.Watcher.Event.EventType;
+import org.apache.zookeeper.ZooDefs;
 import org.apache.zookeeper.data.Stat;
 import org.apache.zookeeper.server.ByteBufferInputStream;
 import org.apache.zookeeper.server.ByteBufferOutputStream;
@@ -506,7 +508,8 @@ public class Zab1_0Test {
                 oa.writeRecord(qp, null);
                 
                 readPacketSkippingPing(ia, qp);
-                Assert.assertEquals(Leader.DIFF, qp.getType());
+                Assert.assertEquals(Leader.SNAP, qp.getType());
+                deserializeSnapshot(ia);
                
                 readPacketSkippingPing(ia, qp);
                 Assert.assertEquals(Leader.NEWLEADER, qp.getType());
@@ -518,11 +521,28 @@ public class Zab1_0Test {
                 oa.writeRecord(qp, null);
 
                 readPacketSkippingPing(ia, qp);
+                Assert.assertEquals(Leader.NEWLEADER, qp.getType());
+                Assert.assertEquals(ZxidUtils.makeZxid(1, 0), qp.getZxid());
+                Assert.assertEquals(1, l.self.getAcceptedEpoch());
+                Assert.assertEquals(1, l.self.getCurrentEpoch());
+                
+                qp = new QuorumPacket(Leader.ACK, qp.getZxid(), null, null);
+                oa.writeRecord(qp, null);
+
+                readPacketSkippingPing(ia, qp);
                 Assert.assertEquals(Leader.UPTODATE, qp.getType());
             }
         });
     }
-    
+
+    private void deserializeSnapshot(InputArchive ia)
+            throws IOException {
+        ZKDatabase zkdb = new ZKDatabase(null);
+        zkdb.deserializeSnapshot(ia);
+        String signature = ia.readString("signature");
+        assertEquals("BenWasHere", signature);
+    }
+
     @Test
     public void testLeaderBehind() throws Exception {
         testLeaderConversation(new LeaderConversation() {
@@ -545,12 +565,23 @@ public class Zab1_0Test {
                 qp = new QuorumPacket(Leader.ACKEPOCH, 0, new byte[4], null);
                 oa.writeRecord(qp, null);
                 readPacketSkippingPing(ia, qp);
-                Assert.assertEquals(Leader.DIFF, qp.getType());
+                Assert.assertEquals(Leader.SNAP, qp.getType());
+                deserializeSnapshot(ia);
+
                 readPacketSkippingPing(ia, qp);
                 Assert.assertEquals(Leader.NEWLEADER, qp.getType());
                 Assert.assertEquals(ZxidUtils.makeZxid(21, 0), qp.getZxid());
+
                 qp = new QuorumPacket(Leader.ACK, qp.getZxid(), null, null);
                 oa.writeRecord(qp, null);
+
+                readPacketSkippingPing(ia, qp);
+                Assert.assertEquals(Leader.NEWLEADER, qp.getType());
+                Assert.assertEquals(ZxidUtils.makeZxid(21, 0), qp.getZxid());
+
+                qp = new QuorumPacket(Leader.ACK, qp.getZxid(), null, null);
+                oa.writeRecord(qp, null);
+
                 readPacketSkippingPing(ia, qp);
                 Assert.assertEquals(Leader.UPTODATE, qp.getType());
             }

Modified: zookeeper/trunk/src/java/test/org/apache/zookeeper/test/FollowerResyncConcurrencyTest.java
URL: http://svn.apache.org/viewvc/zookeeper/trunk/src/java/test/org/apache/zookeeper/test/FollowerResyncConcurrencyTest.java?rev=1212578&r1=1212577&r2=1212578&view=diff
==============================================================================
--- zookeeper/trunk/src/java/test/org/apache/zookeeper/test/FollowerResyncConcurrencyTest.java (original)
+++ zookeeper/trunk/src/java/test/org/apache/zookeeper/test/FollowerResyncConcurrencyTest.java Fri Dec  9 19:03:26 2011
@@ -40,6 +40,7 @@ import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.server.ZKDatabase;
 import org.apache.zookeeper.server.quorum.Leader;
 import org.apache.zookeeper.test.ClientBase.CountdownWatcher;
+import org.junit.Assert;
 import org.junit.Test;
 
 
@@ -50,6 +51,83 @@ public class FollowerResyncConcurrencyTe
     private volatile int counter = 0;
 
     /**
+     * See ZOOKEEPER-1319 - verify that a lagging follwer resyncs correctly
+     * 
+     * 1) start with down quorum
+     * 2) start leader/follower1, add some data
+     * 3) restart leader/follower1
+     * 4) start follower2
+     * 5) verify data consistency across the ensemble
+     * 
+     * @throws Exception
+     */
+    @Test
+    public void testLaggingFollowerResyncsUnderNewEpoch() throws Exception {
+        CountdownWatcher watcher1 = new CountdownWatcher();
+        CountdownWatcher watcher2 = new CountdownWatcher();
+        CountdownWatcher watcher3 = new CountdownWatcher();
+
+        QuorumUtil qu = new QuorumUtil(1);
+        qu.shutdownAll();
+
+        qu.start(1);
+        qu.start(2);
+        Assert.assertTrue("Waiting for server up", ClientBase.waitForServerUp("127.0.0.1:"
+                + qu.getPeer(1).clientPort, ClientBase.CONNECTION_TIMEOUT));
+        Assert.assertTrue("Waiting for server up", ClientBase.waitForServerUp("127.0.0.1:"
+                + qu.getPeer(2).clientPort, ClientBase.CONNECTION_TIMEOUT));
+
+        ZooKeeper zk1 =
+                createClient(qu.getPeer(1).peer.getClientPort(), watcher1);
+        LOG.info("zk1 has session id 0x" + Long.toHexString(zk1.getSessionId()));
+
+        final String resyncPath = "/resyncundernewepoch";
+        zk1.create(resyncPath, null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+        zk1.close();
+
+        qu.shutdown(1);
+        qu.shutdown(2);
+        Assert.assertTrue("Waiting for server down", ClientBase.waitForServerDown("127.0.0.1:"
+                + qu.getPeer(1).clientPort, ClientBase.CONNECTION_TIMEOUT));
+        Assert.assertTrue("Waiting for server down", ClientBase.waitForServerDown("127.0.0.1:"
+                + qu.getPeer(2).clientPort, ClientBase.CONNECTION_TIMEOUT));
+        
+        qu.start(1);
+        qu.start(2);
+        Assert.assertTrue("Waiting for server up", ClientBase.waitForServerUp("127.0.0.1:"
+                + qu.getPeer(1).clientPort, ClientBase.CONNECTION_TIMEOUT));
+        Assert.assertTrue("Waiting for server up", ClientBase.waitForServerUp("127.0.0.1:"
+                + qu.getPeer(2).clientPort, ClientBase.CONNECTION_TIMEOUT));
+
+        qu.start(3);
+        Assert.assertTrue("Waiting for server up", ClientBase.waitForServerUp("127.0.0.1:"
+                + qu.getPeer(3).clientPort, ClientBase.CONNECTION_TIMEOUT));
+
+        zk1 = createClient(qu.getPeer(1).peer.getClientPort(), watcher1);
+        LOG.info("zk1 has session id 0x" + Long.toHexString(zk1.getSessionId()));
+        
+        assertNotNull("zk1 has data", zk1.exists(resyncPath, false));
+
+        final ZooKeeper zk2 =
+                createClient(qu.getPeer(2).peer.getClientPort(), watcher2);
+        LOG.info("zk2 has session id 0x" + Long.toHexString(zk2.getSessionId()));
+
+        assertNotNull("zk2 has data", zk2.exists(resyncPath, false));
+
+        final ZooKeeper zk3 =
+            createClient(qu.getPeer(3).peer.getClientPort(), watcher3);
+        LOG.info("zk3 has session id 0x" + Long.toHexString(zk3.getSessionId()));
+
+        assertNotNull("zk3 has data", zk3.exists(resyncPath, false));
+
+        zk1.close();
+        zk2.close();
+        zk3.close();
+        
+        qu.shutdownAll();
+    }      
+
+    /**
      * See ZOOKEEPER-962. This tests for one of the bugs hit while fixing this,
      * setting the ZXID of the SNAP packet
      * Starts up 3 ZKs. Shut down F1, write a node, restart the one that was shut down