You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by an...@apache.org on 2018/11/08 18:56:12 UTC
zookeeper git commit: ZOOKEEPER-3125: Fixing pzxid consistent issue
when replaying a txn for a deleted node
Repository: zookeeper
Updated Branches:
refs/heads/branch-3.5 bdfa704a6 -> 14364ebb4
ZOOKEEPER-3125: Fixing pzxid consistent issue when replaying a txn for a deleted node
Port this to 3.5, this also fixed the issue where the pzxid might be overwritten by a smaller one when replaying deleteNode txn, which will be fixed in a separate PR for 3.6.
Author: Fangmin Lyu <al...@fb.com>
Reviewers: andor@apache.org
Closes #647 from lvfangmin/ZOOKEEPER-3125-3.5 and squashes the following commits:
53670f9c7 [Fangmin Lyu] remove RetryRule in the patch
cee39c742 [Fangmin Lyu] move RetryRule to the zookeeper-server/src/test folder
a8b8d6669 [Fangmin Lyu] waiting States.CONNECTING after shutdown
7528d1559 [Fangmin Lyu] retry test if it is due to the connection loss
5fbf7da3b [Fangmin Lyu] ZOOKEEPER-3125: Fixing pzxid consistent issue when replaying a txn for a deleted node
Project: http://git-wip-us.apache.org/repos/asf/zookeeper/repo
Commit: http://git-wip-us.apache.org/repos/asf/zookeeper/commit/14364ebb
Tree: http://git-wip-us.apache.org/repos/asf/zookeeper/tree/14364ebb
Diff: http://git-wip-us.apache.org/repos/asf/zookeeper/diff/14364ebb
Branch: refs/heads/branch-3.5
Commit: 14364ebb4ce6eacabca7dee22b442fe42519f703
Parents: bdfa704
Author: Fangmin Lyu <al...@fb.com>
Authored: Thu Nov 8 10:56:08 2018 -0800
Committer: Andor Molnar <an...@apache.org>
Committed: Thu Nov 8 10:56:08 2018 -0800
----------------------------------------------------------------------
.../org/apache/zookeeper/server/DataTree.java | 38 ++++--
.../apache/zookeeper/server/DataTreeTest.java | 25 ++++
.../server/quorum/FuzzySnapshotRelatedTest.java | 121 +++++++++++++++++++
3 files changed, 176 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/14364ebb/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
----------------------------------------------------------------------
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
index 90e92c5..2e24d63 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java
@@ -527,6 +527,24 @@ public class DataTree {
int lastSlash = path.lastIndexOf('/');
String parentName = path.substring(0, lastSlash);
String childName = path.substring(lastSlash + 1);
+
+ // The child might already be deleted during taking fuzzy snapshot,
+ // but we still need to update the pzxid here before throw exception
+ // for no such child
+ DataNode parent = nodes.get(parentName);
+ if (parent == null) {
+ throw new KeeperException.NoNodeException();
+ }
+ synchronized (parent) {
+ parent.removeChild(childName);
+ // Only update pzxid when the zxid is larger than the current pzxid,
+ // otherwise we might override higher pzxid set by a following create
+ // Txn, which could cause the cversion and pzxid inconsistent
+ if (zxid > parent.stat.getPzxid()) {
+ parent.stat.setPzxid(zxid);
+ }
+ }
+
DataNode node = nodes.get(path);
if (node == null) {
throw new KeeperException.NoNodeException();
@@ -535,13 +553,11 @@ public class DataTree {
synchronized (node) {
aclCache.removeUsage(node.acl);
}
- DataNode parent = nodes.get(parentName);
- if (parent == null) {
- throw new KeeperException.NoNodeException();
- }
+
+ // Synchronized to sync the containers and ttls change, probably
+ // only need to sync on containers and ttls, will update it in a
+ // separate patch.
synchronized (parent) {
- parent.removeChild(childName);
- parent.stat.setPzxid(zxid);
long eowner = node.stat.getEphemeralOwner();
EphemeralType ephemeralType = EphemeralType.get(eowner);
if (ephemeralType == EphemeralType.CONTAINER) {
@@ -557,6 +573,7 @@ public class DataTree {
}
}
}
+
if (parentName.startsWith(procZookeeper) && Quotas.limitNode.equals(childName)) {
// delete the node in the trie.
// we need to update the trie as well
@@ -1178,8 +1195,7 @@ public class DataTree {
Set<String> childs = node.getChildren();
children = childs.toArray(new String[childs.size()]);
}
- oa.writeString(pathString, "path");
- oa.writeRecord(nodeCopy, "node");
+ serializeNodeData(oa, pathString, nodeCopy);
path.append('/');
int off = path.length();
for (String child : children) {
@@ -1192,6 +1208,12 @@ public class DataTree {
}
}
+ // visiable for test
+ public void serializeNodeData(OutputArchive oa, String path, DataNode node) throws IOException {
+ oa.writeString(path, "path");
+ oa.writeRecord(node, "node");
+ }
+
public void serialize(OutputArchive oa, String tag) throws IOException {
aclCache.serialize(oa);
serializeNode(oa, new StringBuilder(""));
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/14364ebb/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
----------------------------------------------------------------------
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
index 57497ce..def2921 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java
@@ -153,6 +153,31 @@ public class DataTreeTest extends ZKTestCase {
(newCversion == prevCversion + 1 && newPzxid == prevPzxid + 1));
}
+ @Test
+ public void testPzxidUpdatedWhenDeletingNonExistNode() throws Exception {
+ DataNode root = dt.getNode("/");
+ long currentPzxid = root.stat.getPzxid();
+
+ // pzxid updated with deleteNode on higher zxid
+ long zxid = currentPzxid + 1;
+ try {
+ dt.deleteNode("/testPzxidUpdatedWhenDeletingNonExistNode", zxid);
+ } catch (NoNodeException e) { /* expected */ }
+ root = dt.getNode("/");
+ currentPzxid = root.stat.getPzxid();
+ Assert.assertEquals(currentPzxid, zxid);
+
+ // pzxid not updated with smaller zxid
+ long prevPzxid = currentPzxid;
+ zxid = prevPzxid - 1;
+ try {
+ dt.deleteNode("/testPzxidUpdatedWhenDeletingNonExistNode", zxid);
+ } catch (NoNodeException e) { /* expected */ }
+ root = dt.getNode("/");
+ currentPzxid = root.stat.getPzxid();
+ Assert.assertEquals(currentPzxid, prevPzxid);
+ }
+
@Test(timeout = 60000)
public void testPathTrieClearOnDeserialize() throws Exception {
http://git-wip-us.apache.org/repos/asf/zookeeper/blob/14364ebb/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
----------------------------------------------------------------------
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
index b49a426..1499c8c 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FuzzySnapshotRelatedTest.java
@@ -18,22 +18,28 @@
package org.apache.zookeeper.server.quorum;
+import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.security.sasl.SaslException;
+import org.apache.jute.OutputArchive;
+
import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException.ConnectionLossException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.Op;
+
import org.apache.zookeeper.PortAssignment;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.ZooKeeper.States;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.server.DataNode;
import org.apache.zookeeper.server.DataTree;
import org.apache.zookeeper.server.ZKDatabase;
import org.apache.zookeeper.server.ZooKeeperServer;
@@ -42,6 +48,7 @@ import org.apache.zookeeper.test.ClientBase;
import org.junit.Assert;
import org.junit.After;
import org.junit.Before;
+import org.junit.Rule;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -162,6 +169,100 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
new String(zk[followerA].getData(node2, null, null)));
}
+ /**
+ * It's possibel during SNAP sync, the parent is serialized before the
+ * child get deleted during sending the snapshot over.
+ *
+ * In which case, we need to make sure the pzxid get correctly updated
+ * when applying the txns received.
+ */
+ @Test
+ public void testPZxidUpdatedDuringSnapSyncing() throws Exception {
+ LOG.info("Enable force snapshot sync");
+ System.setProperty(LearnerHandler.FORCE_SNAP_SYNC, "true");
+
+ final String parent = "/testPZxidUpdatedWhenDeletingNonExistNode";
+ final String child = parent + "/child";
+ createEmptyNode(zk[leaderId], parent);
+ createEmptyNode(zk[leaderId], child);
+
+ LOG.info("shutdown follower {}", followerA);
+ mt[followerA].shutdown();
+ QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTING);
+
+ LOG.info("Set up ZKDatabase to catch the node serializing in DataTree");
+ addSerializeListener(leaderId, parent, child);
+
+ LOG.info("Restart follower A to trigger a SNAP sync with leader");
+ mt[followerA].start();
+ QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTED);
+
+ LOG.info("Check and make sure the pzxid of the parent is the same " +
+ "on leader and follower A");
+ compareStat(parent, leaderId, followerA);
+ }
+
+ /**
+ * It's possible during taking fuzzy snapshot, the parent is serialized
+ * before the child get deleted in the fuzzy range.
+ *
+ * In which case, we need to make sure the pzxid get correctly updated
+ * when replaying the txns.
+ */
+ @Test
+ public void testPZxidUpdatedWhenLoadingSnapshot() throws Exception {
+
+ final String parent = "/testPZxidUpdatedDuringTakingSnapshot";
+ final String child = parent + "/child";
+ createEmptyNode(zk[followerA], parent);
+ createEmptyNode(zk[followerA], child);
+
+ LOG.info("Set up ZKDatabase to catch the node serializing in DataTree");
+ addSerializeListener(followerA, parent, child);
+
+ LOG.info("Take snapshot on follower A");
+ ZooKeeperServer zkServer = mt[followerA].main.quorumPeer.getActiveServer();
+ zkServer.takeSnapshot();
+
+ LOG.info("Restarting follower A to load snapshot");
+ mt[followerA].shutdown();
+ QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTING);
+
+ mt[followerA].start();
+ QuorumPeerMainTest.waitForOne(zk[followerA], States.CONNECTED);
+
+ LOG.info("Check and make sure the pzxid of the parent is the same " +
+ "on leader and follower A");
+ compareStat(parent, leaderId, followerA);
+ }
+
+ private void addSerializeListener(int sid, String parent, String child) {
+ final ZooKeeper zkClient = zk[followerA];
+ CustomDataTree dt =
+ (CustomDataTree) mt[sid].main.quorumPeer.getZkDb().getDataTree();
+ dt.addListener(parent, new NodeSerializeListener() {
+ @Override
+ public void nodeSerialized(String path) {
+ try {
+ zkClient.delete(child, -1);
+ LOG.info("Deleted the child node after the parent is serialized");
+ } catch (Exception e) {
+ LOG.error("Error when deleting node {}", e);
+ }
+ }
+ });
+ }
+
+ private void compareStat(String path, int sid, int compareWithSid) throws Exception {
+ Stat stat1 = new Stat();
+ zk[sid].getData(path, null, stat1);
+
+ Stat stat2 = new Stat();
+ zk[compareWithSid].getData(path, null, stat2);
+
+ Assert.assertEquals(stat1, stat2);
+ }
+
private void createEmptyNode(ZooKeeper zk, String path) throws Exception {
zk.create(path, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
}
@@ -174,6 +275,22 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
static class CustomDataTree extends DataTree {
Map<String, NodeCreateListener> nodeCreateListeners =
new HashMap<String, NodeCreateListener>();
+ Map<String, NodeSerializeListener> listeners =
+ new HashMap<String, NodeSerializeListener>();
+
+ @Override
+ public void serializeNodeData(OutputArchive oa, String path,
+ DataNode node) throws IOException {
+ super.serializeNodeData(oa, path, node);
+ NodeSerializeListener listener = listeners.get(path);
+ if (listener != null) {
+ listener.nodeSerialized(path);
+ }
+ }
+
+ public void addListener(String path, NodeSerializeListener listener) {
+ listeners.put(path, listener);
+ }
@Override
public void createNode(final String path, byte data[], List<ACL> acl,
@@ -193,6 +310,10 @@ public class FuzzySnapshotRelatedTest extends QuorumPeerTestBase {
}
}
+ static interface NodeSerializeListener {
+ public void nodeSerialized(String path);
+ }
+
static class CustomizedQPMain extends TestQPMain {
@Override
protected QuorumPeer getQuorumPeer() throws SaslException {