You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by rg...@apache.org on 2015/06/06 18:42:46 UTC
svn commit: r1683930 - in /zookeeper/branches/branch-3.5: CHANGES.txt
src/java/main/org/apache/zookeeper/server/DataTree.java
src/java/test/org/apache/zookeeper/server/DataTreeTest.java
Author: rgs
Date: Sat Jun 6 16:42:45 2015
New Revision: 1683930
URL: http://svn.apache.org/r1683930
Log:
ZOOKEEPER-2201: Network issues can cause cluster to hang due to near-deadlock
(Donny Nadolny via rgs)
Modified:
zookeeper/branches/branch-3.5/CHANGES.txt
zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java
zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java
Modified: zookeeper/branches/branch-3.5/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/CHANGES.txt?rev=1683930&r1=1683929&r2=1683930&view=diff
==============================================================================
--- zookeeper/branches/branch-3.5/CHANGES.txt (original)
+++ zookeeper/branches/branch-3.5/CHANGES.txt Sat Jun 6 16:42:45 2015
@@ -117,6 +117,9 @@ BUGFIXES:
ZOOKEEPER-2204: LearnerSnapshotThrottlerTest.testHighContentionWithTimeout fails occasionally
(Donny Nadolny via rgs)
+ ZOOKEEPER-2201: Network issues can cause cluster to hang due to near-deadlock
+ (Donny Nadolny via rgs)
+
IMPROVEMENTS:
ZOOKEEPER-1660 Documentation for Dynamic Reconfiguration (Reed Wanderman-Milne via shralex)
Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java?rev=1683930&r1=1683929&r2=1683930&view=diff
==============================================================================
--- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java (original)
+++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java Sat Jun 6 16:42:45 2015
@@ -1138,14 +1138,20 @@ public class DataTree {
return;
}
String children[] = null;
+ DataNode nodeCopy;
synchronized (node) {
- oa.writeString(pathString, "path");
- oa.writeRecord(node, "node");
+ StatPersisted statCopy = new StatPersisted();
+ copyStatPersisted(node.stat, statCopy);
+ //we do not need to make a copy of node.data because the contents
+ //are never changed
+ nodeCopy = new DataNode(node.data, node.acl, statCopy);
Set<String> childs = node.getChildren();
if (childs != null) {
children = childs.toArray(new String[childs.size()]);
}
}
+ oa.writeString(pathString, "path");
+ oa.writeRecord(nodeCopy, "node");
path.append('/');
int off = path.length();
if (children != null) {
Modified: zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java?rev=1683930&r1=1683929&r2=1683930&view=diff
==============================================================================
--- zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java (original)
+++ zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java Sat Jun 6 16:42:45 2015
@@ -34,14 +34,19 @@ import org.junit.Test;
import org.apache.zookeeper.server.DataNode;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import org.apache.zookeeper.Quotas;
import org.apache.jute.BinaryInputArchive;
import org.apache.jute.BinaryOutputArchive;
+import org.apache.jute.Record;
import org.apache.zookeeper.common.PathTrie;
import java.lang.reflect.*;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
public class DataTreeTest extends ZKTestCase {
@@ -145,18 +150,18 @@ public class DataTreeTest extends ZKTest
newCversion + ", " + newPzxid + ">",
(newCversion == prevCversion + 1 && newPzxid == prevPzxid + 1));
}
-
+
@Test(timeout = 60000)
public void testPathTrieClearOnDeserialize() throws Exception {
//Create a DataTree with quota nodes so PathTrie get updated
DataTree dserTree = new DataTree();
-
+
dserTree.createNode("/bug", new byte[20], null, -1, 1, 1, 1);
dserTree.createNode(Quotas.quotaZookeeper+"/bug", null, null, -1, 1, 1, 1);
dserTree.createNode(Quotas.quotaPath("/bug"), new byte[20], null, -1, 1, 1, 1);
dserTree.createNode(Quotas.statPath("/bug"), new byte[20], null, -1, 1, 1, 1);
-
+
//deserialize a DataTree; this should clear the old /bug nodes and pathTrie
DataTree tree = new DataTree();
@@ -176,4 +181,55 @@ public class DataTreeTest extends ZKTest
//Check that the node path is removed from pTrie
Assert.assertEquals("/bug is still in pTrie", "", pTrie.findMaxPrefix("/bug"));
}
+
+ /*
+ * ZOOKEEPER-2201 - OutputArchive.writeRecord can block for long periods of
+ * time, we must call it outside of the node lock.
+ * We call tree.serialize, which calls our modified writeRecord method that
+ * blocks until it can verify that a separate thread can lock the DataNode
+ * currently being written, i.e. that DataTree.serializeNode does not hold
+ * the DataNode lock while calling OutputArchive.writeRecord.
+ */
+ @Test(timeout = 60000)
+ public void testSerializeDoesntLockDataNodeWhileWriting() throws Exception {
+ DataTree tree = new DataTree();
+ tree.createNode("/marker", new byte[] {42}, null, -1, 1, 1, 1);
+ final DataNode markerNode = tree.getNode("/marker");
+ final AtomicBoolean ranTestCase = new AtomicBoolean();
+ DataOutputStream out = new DataOutputStream(new ByteArrayOutputStream());
+ BinaryOutputArchive oa = new BinaryOutputArchive(out) {
+ @Override
+ public void writeRecord(Record r, String tag) throws IOException {
+ DataNode node = (DataNode) r;
+ if (node.data.length == 1 && node.data[0] == 42) {
+ final Semaphore semaphore = new Semaphore(0);
+ new Thread(new Runnable() {
+ @Override
+ public void run() {
+ synchronized (markerNode) {
+ //When we lock markerNode, allow writeRecord to continue
+ semaphore.release();
+ }
+ }
+ }).start();
+
+ try {
+ boolean acquired = semaphore.tryAcquire(30, TimeUnit.SECONDS);
+ //This is the real assertion - could another thread lock
+ //the DataNode we're currently writing
+ Assert.assertTrue("Couldn't acquire a lock on the DataNode while we were calling tree.serialize", acquired);
+ } catch (InterruptedException e1) {
+ throw new RuntimeException(e1);
+ }
+ ranTestCase.set(true);
+ }
+ super.writeRecord(r, tag);
+ }
+ };
+
+ tree.serialize(oa, "test");
+
+ //Let's make sure that we hit the code that ran the real assertion above
+ Assert.assertTrue("Didn't find the expected node", ranTestCase.get());
+ }
}