You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by rg...@apache.org on 2015/06/06 18:42:46 UTC

svn commit: r1683930 - in /zookeeper/branches/branch-3.5: CHANGES.txt src/java/main/org/apache/zookeeper/server/DataTree.java src/java/test/org/apache/zookeeper/server/DataTreeTest.java

Author: rgs
Date: Sat Jun  6 16:42:45 2015
New Revision: 1683930

URL: http://svn.apache.org/r1683930
Log:
ZOOKEEPER-2201: Network issues can cause cluster to hang due to near-deadlock
(Donny Nadolny via rgs)

Modified:
    zookeeper/branches/branch-3.5/CHANGES.txt
    zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java
    zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java

Modified: zookeeper/branches/branch-3.5/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/CHANGES.txt?rev=1683930&r1=1683929&r2=1683930&view=diff
==============================================================================
--- zookeeper/branches/branch-3.5/CHANGES.txt (original)
+++ zookeeper/branches/branch-3.5/CHANGES.txt Sat Jun  6 16:42:45 2015
@@ -117,6 +117,9 @@ BUGFIXES:
   ZOOKEEPER-2204: LearnerSnapshotThrottlerTest.testHighContentionWithTimeout fails occasionally
   (Donny Nadolny via rgs)
 
+  ZOOKEEPER-2201: Network issues can cause cluster to hang due to near-deadlock
+  (Donny Nadolny via rgs)
+
 IMPROVEMENTS:
   ZOOKEEPER-1660 Documentation for Dynamic Reconfiguration (Reed Wanderman-Milne via shralex)
 

Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java?rev=1683930&r1=1683929&r2=1683930&view=diff
==============================================================================
--- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java (original)
+++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/DataTree.java Sat Jun  6 16:42:45 2015
@@ -1138,14 +1138,20 @@ public class DataTree {
             return;
         }
         String children[] = null;
+        DataNode nodeCopy;
         synchronized (node) {
-            oa.writeString(pathString, "path");
-            oa.writeRecord(node, "node");
+            StatPersisted statCopy = new StatPersisted();
+            copyStatPersisted(node.stat, statCopy);
+            //we do not need to make a copy of node.data because the contents
+            //are never changed
+            nodeCopy = new DataNode(node.data, node.acl, statCopy);
             Set<String> childs = node.getChildren();
             if (childs != null) {
                 children = childs.toArray(new String[childs.size()]);
             }
         }
+        oa.writeString(pathString, "path");
+        oa.writeRecord(nodeCopy, "node");
         path.append('/');
         int off = path.length();
         if (children != null) {

Modified: zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java
URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java?rev=1683930&r1=1683929&r2=1683930&view=diff
==============================================================================
--- zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java (original)
+++ zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/DataTreeTest.java Sat Jun  6 16:42:45 2015
@@ -34,14 +34,19 @@ import org.junit.Test;
 import org.apache.zookeeper.server.DataNode;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 
 import org.apache.zookeeper.Quotas;
 import org.apache.jute.BinaryInputArchive;
 import org.apache.jute.BinaryOutputArchive;
+import org.apache.jute.Record;
 import org.apache.zookeeper.common.PathTrie;
 import java.lang.reflect.*;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 public class DataTreeTest extends ZKTestCase {
@@ -145,18 +150,18 @@ public class DataTreeTest extends ZKTest
                 newCversion + ", " + newPzxid + ">",
                 (newCversion == prevCversion + 1 && newPzxid == prevPzxid + 1));
     }
-   
+
     @Test(timeout = 60000)
     public void testPathTrieClearOnDeserialize() throws Exception {
 
         //Create a DataTree with quota nodes so PathTrie get updated
         DataTree dserTree = new DataTree();
-        
+
         dserTree.createNode("/bug", new byte[20], null, -1, 1, 1, 1);
         dserTree.createNode(Quotas.quotaZookeeper+"/bug", null, null, -1, 1, 1, 1);
         dserTree.createNode(Quotas.quotaPath("/bug"), new byte[20], null, -1, 1, 1, 1);
         dserTree.createNode(Quotas.statPath("/bug"), new byte[20], null, -1, 1, 1, 1);
-        
+
         //deserialize a DataTree; this should clear the old /bug nodes and pathTrie
         DataTree tree = new DataTree();
 
@@ -176,4 +181,55 @@ public class DataTreeTest extends ZKTest
         //Check that the node path is removed from pTrie
         Assert.assertEquals("/bug is still in pTrie", "", pTrie.findMaxPrefix("/bug"));       
     }
+
+    /*
+     * ZOOKEEPER-2201 - OutputArchive.writeRecord can block for long periods of
+     * time, we must call it outside of the node lock.
+     * We call tree.serialize, which calls our modified writeRecord method that
+     * blocks until it can verify that a separate thread can lock the DataNode
+     * currently being written, i.e. that DataTree.serializeNode does not hold
+     * the DataNode lock while calling OutputArchive.writeRecord.
+     */
+    @Test(timeout = 60000)
+    public void testSerializeDoesntLockDataNodeWhileWriting() throws Exception {
+        DataTree tree = new DataTree();
+        tree.createNode("/marker", new byte[] {42}, null, -1, 1, 1, 1);
+        final DataNode markerNode = tree.getNode("/marker");
+        final AtomicBoolean ranTestCase = new AtomicBoolean();
+        DataOutputStream out = new DataOutputStream(new ByteArrayOutputStream());
+        BinaryOutputArchive oa = new BinaryOutputArchive(out) {
+            @Override
+            public void writeRecord(Record r, String tag) throws IOException {
+                DataNode node = (DataNode) r;
+                if (node.data.length == 1 && node.data[0] == 42) {
+                    final Semaphore semaphore = new Semaphore(0);
+                    new Thread(new Runnable() {
+                        @Override
+                        public void run() {
+                            synchronized (markerNode) {
+                                //When we lock markerNode, allow writeRecord to continue
+                                semaphore.release();
+                            }
+                        }
+                    }).start();
+
+                    try {
+                        boolean acquired = semaphore.tryAcquire(30, TimeUnit.SECONDS);
+                        //This is the real assertion - could another thread lock
+                        //the DataNode we're currently writing
+                        Assert.assertTrue("Couldn't acquire a lock on the DataNode while we were calling tree.serialize", acquired);
+                    } catch (InterruptedException e1) {
+                        throw new RuntimeException(e1);
+                    }
+                    ranTestCase.set(true);
+                }
+                super.writeRecord(r, tag);
+            }
+        };
+
+        tree.serialize(oa, "test");
+
+        //Let's make sure that we hit the code that ran the real assertion above
+        Assert.assertTrue("Didn't find the expected node", ranTestCase.get());
+    }
 }