You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by an...@apache.org on 2018/11/08 18:56:12 UTC

zookeeper git commit: ZOOKEEPER-3125: Fixing pzxid consistent issue when replaying a txn for a deleted node

Repository: zookeeper
Updated Branches:
  refs/heads/branch-3.5 bdfa704a6 -> 14364ebb4


ZOOKEEPER-3125: Fixing pzxid consistent issue when replaying a txn for a deleted node

Port this to 3.5, this also fixed the issue where the pzxid might be overwritten by a smaller one when replaying deleteNode txn, which will be fixed in a separate PR for 3.6.

Author: Fangmin Lyu <al...@fb.com>

Reviewers: andor@apache.org

Closes #647 from lvfangmin/ZOOKEEPER-3125-3.5 and squashes the following commits:

53670f9c7 [Fangmin Lyu] remove RetryRule in the patch
cee39c742 [Fangmin Lyu] move RetryRule to the zookeeper-server/src/test folder
a8b8d6669 [Fangmin Lyu] waiting States.CONNECTING after shutdown
7528d1559 [Fangmin Lyu] retry test if it is due to the connection loss
5fbf7da3b [Fangmin Lyu] ZOOKEEPER-3125: Fixing pzxid consistent issue when replaying a txn for a deleted node


Project: http://git-wip-us.apache.org/repos/asf/zookeeper/repo
Commit: http://git-wip-us.apache.org/repos/asf/zookeeper/commit/14364ebb
Tree: http://git-wip-us.apache.org/repos/asf/zookeeper/tree/14364ebb
Diff: http://git-wip-us.apache.org/repos/asf/zookeeper/diff/14364ebb

Branch: refs/heads/branch-3.5
Commit: 14364ebb4ce6eacabca7dee22b442fe42519f703
Parents: bdfa704
Author: Fangmin Lyu <al...@fb.com>
Authored: Thu Nov 8 10:56:08 2018 -0800
Committer: Andor Molnar <an...@apache.org>
Committed: Thu Nov 8 10:56:08 2018 -0800

----------------------------------------------------------------------
 .../org/apache/zookeeper/server/DataTree.java   |  38 ++++--
 .../apache/zookeeper/server/DataTreeTest.java   |  25 ++++
 .../server/quorum/FuzzySnapshotRelatedTest.java | 121 +++++++++++++++++++
 3 files changed, 176 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zookeeper/blob/14364ebb/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
----------------------------------------------------------------------
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
index 90e92c5..2e24d63 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
@@ -527,6 +527,24 @@ public class DataTree {
         int lastSlash = path.lastIndexOf('/');
         String parentName = path.substring(0, lastSlash);
         String childName = path.substring(lastSlash + 1);
+
+        // The child might already be deleted during taking fuzzy snapshot,
+        // but we still need to update the pzxid here before throw exception
+        // for no such child
+        DataNode parent = nodes.get(parentName);
+        if (parent == null) {
+            throw new KeeperException.NoNodeException();
+        }
+        synchronized (parent) {
+            parent.removeChild(childName);
+            // Only update pzxid when the zxid is larger than the current pzxid,
+            // otherwise we might override higher pzxid set by a following create 
+            // Txn, which could cause the cversion and pzxid inconsistent
+            if (zxid > parent.stat.getPzxid()) {
+                parent.stat.setPzxid(zxid);
+            }
+        }
+
         DataNode node = nodes.get(path);
         if (node == null) {
             throw new KeeperException.NoNodeException();
@@ -535,13 +553,11 @@ public class DataTree {
         synchronized (node) {
             aclCache.removeUsage(node.acl);
         }
-        DataNode parent = nodes.get(parentName);
-        if (parent == null) {
-            throw new KeeperException.NoNodeException();
-        }
+
+        // Synchronized to sync the containers and ttls change, probably
+        // only need to sync on containers and ttls, will update it in a
+        // separate patch.
         synchronized (parent) {
-            parent.removeChild(childName);
-            parent.stat.setPzxid(zxid);
             long eowner = node.stat.getEphemeralOwner();
             EphemeralType ephemeralType = EphemeralType.get(eowner);
             if (ephemeralType == EphemeralType.CONTAINER) {
@@ -557,6 +573,7 @@ public class DataTree {
                 }
             }
         }
+
         if (parentName.startsWith(procZookeeper) && Quotas.limitNode.equals(childName)) {
             // delete the node in the trie.
             // we need to update the trie as well
@@ -1178,8 +1195,7 @@ public class DataTree {
             Set<String> childs = node.getChildren();
             children = childs.toArray(new String[childs.size()]);
         }
-        oa.writeString(pathString, "path");
-        oa.writeRecord(nodeCopy, "node");
+        serializeNodeData(oa, pathString, nodeCopy);
         path.append('/');
         int off = path.length();
         for (String child : children) {
@@ -1192,6 +1208,12 @@ public class DataTree {
         }
     }
 
+    // visiable for test
+    public void serializeNodeData(OutputArchive oa, String path, DataNode node) throws IOException {
+        oa.writeString(path, "path");
+        oa.writeRecord(node, "node");
+    }
+
     public void serialize(OutputArchive oa, String tag) throws IOException {
         aclCache.serialize(oa);
         serializeNode(oa, new StringBuilder(""));

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/14364ebb/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
----------------------------------------------------------------------
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
index 57497ce..def2921 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
@@ -153,6 +153,31 @@ public class DataTreeTest extends ZKTestCase {
                 (newCversion == prevCversion + 1 && newPzxid == prevPzxid + 1));
     }
 
+    @Test
+    public void testPzxidUpdatedWhenDeletingNonExistNode() throws Exception {
+        DataNode root = dt.getNode("/");
+        long currentPzxid = root.stat.getPzxid();
+
+        // pzxid updated with deleteNode on higher zxid
+        long zxid = currentPzxid + 1;
+        try {
+            dt.deleteNode("/testPzxidUpdatedWhenDeletingNonExistNode", zxid);
+        } catch (NoNodeException e) { /* expected */ }
+        root = dt.getNode("/");
+        currentPzxid = root.stat.getPzxid();
+        Assert.assertEquals(currentPzxid, zxid);
+
+        // pzxid not updated with smaller zxid
+        long prevPzxid = currentPzxid;
+        zxid = prevPzxid - 1;
+        try {
+            dt.deleteNode("/testPzxidUpdatedWhenDeletingNonExistNode", zxid);
+        } catch (NoNodeException e) { /* expected */ }
+        root = dt.getNode("/");
+        currentPzxid = root.stat.getPzxid();
+        Assert.assertEquals(currentPzxid, prevPzxid);
+    }
+
     @Test(timeout = 60000)
     public void testPathTrieClearOnDeserialize() throws Exception {
 

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/14364ebb/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
----------------------------------------------------------------------
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
index b49a426..1499c8c 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
@@ -18,22 +18,28 @@
 
 package org.apache.zookeeper.server.quorum;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.security.sasl.SaslException;
 
+import org.apache.jute.OutputArchive;
+
 import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException.ConnectionLossException;
 import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.apache.zookeeper.Op;
+
 import org.apache.zookeeper.PortAssignment;
 import org.apache.zookeeper.ZooDefs.Ids;
 import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.ZooKeeper.States;
 import org.apache.zookeeper.data.ACL;
 import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.server.DataNode;
 import org.apache.zookeeper.server.DataTree;
 import org.apache.zookeeper.server.ZKDatabase;
 import org.apache.zookeeper.server.ZooKeeperServer;
@@ -42,6 +48,7 @@ import org.apache.zookeeper.test.ClientBase;
 import org.junit.Assert;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -162,6 +169,100 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
                 new String(zk[followerA].getData(node2, null, null)));
     }
 
+    /**
+     * It's possibel during SNAP sync, the parent is serialized before the
+     * child get deleted during sending the snapshot over.
+     *
+     * In which case, we need to make sure the pzxid get correctly updated
+     * when applying the txns received.
+     */
+    @Test
+    public void testPZxidUpdatedDuringSnapSyncing() throws Exception {
+        LOG.info("Enable force snapshot sync");
+        System.setProperty(LearnerHandler.FORCE_SNAP_SYNC, "true");
+
+        final String parent = "/testPZxidUpdatedWhenDeletingNonExistNode";
+        final String child = parent + "/child";
+        createEmptyNode(zk[leaderId], parent);
+        createEmptyNode(zk[leaderId], child);
+
+        LOG.info("shutdown follower {}", followerA);
+        mt[followerA].shutdown();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTING);
+
+        LOG.info("Set up ZKDatabase to catch the node serializing in DataTree");
+        addSerializeListener(leaderId, parent, child);
+
+        LOG.info("Restart follower A to trigger a SNAP sync with leader");
+        mt[followerA].start();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTED);
+
+        LOG.info("Check and make sure the pzxid of the parent is the same " +
+                "on leader and follower A");
+        compareStat(parent, leaderId, followerA);
+    }
+
+    /**
+     * It's possible during taking fuzzy snapshot, the parent is serialized
+     * before the child get deleted in the fuzzy range.
+     *
+     * In which case, we need to make sure the pzxid get correctly updated
+     * when replaying the txns.
+     */
+    @Test
+    public void testPZxidUpdatedWhenLoadingSnapshot() throws Exception {
+
+        final String parent = "/testPZxidUpdatedDuringTakingSnapshot";
+        final String child = parent + "/child";
+        createEmptyNode(zk[followerA], parent);
+        createEmptyNode(zk[followerA], child);
+
+        LOG.info("Set up ZKDatabase to catch the node serializing in DataTree");
+        addSerializeListener(followerA, parent, child);
+
+        LOG.info("Take snapshot on follower A");
+        ZooKeeperServer zkServer = mt[followerA].main.quorumPeer.getActiveServer();
+        zkServer.takeSnapshot();
+
+        LOG.info("Restarting follower A to load snapshot");
+        mt[followerA].shutdown();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTING);
+
+        mt[followerA].start();
+        QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTED);
+
+        LOG.info("Check and make sure the pzxid of the parent is the same " +
+                "on leader and follower A");
+        compareStat(parent, leaderId, followerA);
+    }
+
+    private void addSerializeListener(int sid, String parent, String child) {
+        final ZooKeeper zkClient = zk[followerA];
+        CustomDataTree dt =
+                (CustomDataTree) mt[sid].main.quorumPeer.getZkDb().getDataTree();
+        dt.addListener(parent, new NodeSerializeListener() {
+            @Override
+            public void nodeSerialized(String path) {
+                try {
+                    zkClient.delete(child, -1);
+                    LOG.info("Deleted the child node after the parent is serialized");
+                } catch (Exception e) {
+                    LOG.error("Error when deleting node {}", e);
+                }
+            }
+        });
+    }
+
+    private void compareStat(String path, int sid, int compareWithSid) throws Exception {
+        Stat stat1 = new Stat();
+        zk[sid].getData(path, null, stat1);
+
+        Stat stat2 = new Stat();
+        zk[compareWithSid].getData(path, null, stat2);
+
+        Assert.assertEquals(stat1, stat2);
+    }
+
     private void createEmptyNode(ZooKeeper zk, String path) throws Exception {
         zk.create(path, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
     }
@@ -174,6 +275,22 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
     static class CustomDataTree extends DataTree {
         Map<String, NodeCreateListener> nodeCreateListeners =
                 new HashMap<String, NodeCreateListener>();
+        Map<String, NodeSerializeListener> listeners =
+                new HashMap<String, NodeSerializeListener>();
+
+        @Override
+        public void serializeNodeData(OutputArchive oa, String path,
+                DataNode node) throws IOException {
+            super.serializeNodeData(oa, path, node);
+            NodeSerializeListener listener = listeners.get(path);
+            if (listener != null) {
+                listener.nodeSerialized(path);
+            }
+        }
+
+        public void addListener(String path, NodeSerializeListener listener) {
+            listeners.put(path, listener);
+        }
 
         @Override
         public void createNode(final String path, byte data[], List<ACL> acl,
@@ -193,6 +310,10 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
         }
     }
 
+    static interface NodeSerializeListener {
+        public void nodeSerialized(String path);
+    }
+
     static class CustomizedQPMain extends TestQPMain {
         @Override
         protected QuorumPeer getQuorumPeer() throws SaslException {