You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by re...@apache.org on 2017/03/24 13:29:54 UTC

svn commit: r1788441 - in /jackrabbit/oak/branches/1.4: ./ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentLeaseUpdateRetryTest.java

Author: reschke
Date: Fri Mar 24 13:29:54 2017
New Revision: 1788441

URL: http://svn.apache.org/viewvc?rev=1788441&view=rev
Log:
OAK-5528: leaseUpdateThread might be blocked by leaseUpdateCheck (ported to 1.4)

- add test
- move cluster state update out of background leaseUpdateThread
- make sure that cluster state update bypasses the leaseCheckWrapper

Modified:
    jackrabbit/oak/branches/1.4/   (props changed)
    jackrabbit/oak/branches/1.4/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
    jackrabbit/oak/branches/1.4/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentLeaseUpdateRetryTest.java

Propchange: jackrabbit/oak/branches/1.4/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Mar 24 13:29:54 2017
@@ -1,3 +1,3 @@
 /jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk
 ,1750457,1750462,1750465,1750495,1750626,1750809,1750886-1750887,1751396,1751410,1751419,1751445-1751446,1751478,1751748,1751753,1751755,1751871,1752198,1752202,1752259,1752273-1752274,1752283,1752292,1752438,1752447-1752448,1752508,1752596,1752616,1752659,1752672,1753262,1753331-1753332,1753335-1753336,1753355,1753444,1754117,1754239,1755157,1755191,1756520,1756580,1757119,1757166,1758213,1758713,1759433,1759795,1759826,1760326,1760340,1760373,1760387,1760486,1760492,1760494,1760661-1760662,1760677,1760701,1760709,1760946,1761412,1761444,1761571,1761762,1761787,1761866,1761876,1762453,1762612,1762632,1762635,1763347,1763355-1763356,1763378,1763465,1763735,1764678,1764705,1764814,1764898,1765817,1765983,1766071,1766390,1766423,1766496,1766519,1766554,1766644,1767025,1767265,1767502,1767704,1768446,1768637,1769078,1770694,1770982,1771022,1771093,1771098,1771739,1771852,1771870,1771902,1772155,1772162,1772228,1772593,1772768,1773190,1774497,1774787,1775474,1775622,1775628,1775757,1778
 112,1778423,1778968,1779137,1779478,1780388,1780538,1780543,1781068,1781075,1781386,1781846,1781907,1782476,1783066,1783089,1783104-1783105,1783619,1783720,1783738,1783855,1784023,1784130,1784251,1784574,1784689,1785283,1787074,1787217
+/jackrabbit/oak/trunk
 ,1750457,1750462,1750465,1750495,1750626,1750809,1750886-1750887,1751396,1751410,1751419,1751445-1751446,1751478,1751748,1751753,1751755,1751871,1752198,1752202,1752259,1752273-1752274,1752283,1752292,1752438,1752447-1752448,1752508,1752596,1752616,1752659,1752672,1753262,1753331-1753332,1753335-1753336,1753355,1753444,1754117,1754239,1755157,1755191,1756520,1756580,1757119,1757166,1758213,1758713,1759433,1759795,1759826,1760326,1760340,1760373,1760387,1760486,1760492,1760494,1760661-1760662,1760677,1760701,1760709,1760946,1761412,1761444,1761571,1761762,1761787,1761866,1761876,1762453,1762612,1762632,1762635,1763347,1763355-1763356,1763378,1763465,1763735,1764678,1764705,1764814,1764898,1765817,1765983,1766071,1766390,1766423,1766496,1766519,1766554,1766644,1767025,1767265,1767502,1767704,1768446,1768637,1769078,1770694,1770982,1771022,1771093,1771098,1771739,1771852,1771870,1771902,1772155,1772162,1772228,1772593,1772768,1773190,1774497,1774787,1775474,1775622,1775628,1775757,1778
 112,1778423,1778968,1779137,1779478,1780388,1780424,1780538,1780543,1781068,1781075,1781386,1781846,1781907,1782476,1783066,1783089,1783104-1783105,1783619,1783720,1783738,1783855,1784023,1784130,1784251,1784574,1784689,1785283,1787074,1787217
 /jackrabbit/trunk:1345480

Modified: jackrabbit/oak/branches/1.4/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.4/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java?rev=1788441&r1=1788440&r2=1788441&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.4/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java (original)
+++ jackrabbit/oak/branches/1.4/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java Fri Mar 24 13:29:54 2017
@@ -153,6 +153,11 @@ public final class DocumentNodeStore
             Boolean.getBoolean("oak.disableJournalDiff");
 
     /**
+     * The document store without potentially lease checking wrapper.
+     */
+    private final DocumentStore nonLeaseCheckingStore;
+
+    /**
      * The document store (might be used by multiple node stores).
      */
     protected final DocumentStore store;
@@ -267,6 +272,12 @@ public final class DocumentNodeStore
     private Thread leaseUpdateThread;
 
     /**
+     * Background thread performing the cluster update
+     */
+    @Nonnull
+    private Thread clusterUpdateThread;
+
+    /**
      * Read/Write lock for background operations. Regular commits will acquire
      * a shared lock, while a background write acquires an exclusive lock.
      */
@@ -410,6 +421,8 @@ public final class DocumentNodeStore
         // this cluster id
         cid = clusterNodeInfo.getId();
 
+        this.nonLeaseCheckingStore = s;
+
         if (builder.getLeaseCheck()) {
             s = new LeaseCheckDocumentStoreWrapper(s, clusterNodeInfo);
             clusterNodeInfo.setLeaseFailureHandler(builder.getLeaseFailureHandler());
@@ -510,7 +523,12 @@ public final class DocumentNodeStore
         // on a very busy machine - so as to prevent lease timeout.
         leaseUpdateThread.setPriority(Thread.MAX_PRIORITY);
         leaseUpdateThread.start();
-        
+
+        clusterUpdateThread = new Thread(new BackgroundClusterUpdate(this, isDisposed),
+                "DocumentNodeStore cluster update thread " + threadNamePostfix);
+        clusterUpdateThread.setDaemon(true);
+        clusterUpdateThread.start();
+
         PersistentCache pc = builder.getPersistentCache();
         if (pc != null) {
             DynamicBroadcastConfig broadcastConfig = new DocumentBroadcastConfig(this);
@@ -567,6 +585,12 @@ public final class DocumentNodeStore
         }
 
         try {
+            clusterUpdateThread.join();
+        } catch (InterruptedException e) {
+            // ignore
+        }
+
+        try {
             leaseUpdateThread.join();
         } catch (InterruptedException e) {
             // ignore
@@ -1868,7 +1892,7 @@ public final class DocumentNodeStore
     boolean updateClusterState() {
         boolean hasChanged = false;
         Set<Integer> clusterIds = Sets.newHashSet();
-        for (ClusterNodeInfoDocument doc : ClusterNodeInfoDocument.all(store)) {
+        for (ClusterNodeInfoDocument doc : ClusterNodeInfoDocument.all(nonLeaseCheckingStore)) {
             int cId = doc.getClusterId();
             clusterIds.add(cId);
             ClusterNodeInfoDocument old = clusterNodes.get(cId);
@@ -2776,7 +2800,7 @@ public final class DocumentNodeStore
 
         /** OAK-4859 : log if time between two renewClusterIdLease calls is too long **/
         private long lastRenewClusterIdLeaseCall = -1;
-        
+
         BackgroundLeaseUpdate(DocumentNodeStore nodeStore,
                               AtomicBoolean isDisposed) {
             super(nodeStore, isDisposed, Suppliers.ofInstance(1000));
@@ -2798,11 +2822,19 @@ public final class DocumentNodeStore
             }
             // first renew the clusterId lease
             nodeStore.renewClusterIdLease();
+        }
+    }
+
+    static class BackgroundClusterUpdate extends NodeStoreTask {
 
-            // then, independently if the lease had to be updated or not, check
-            // the status:
+        BackgroundClusterUpdate(DocumentNodeStore nodeStore,
+                              AtomicBoolean isDisposed) {
+            super(nodeStore, isDisposed, Suppliers.ofInstance(1000));
+        }
+
+        @Override
+        protected void execute(@Nonnull DocumentNodeStore nodeStore) {
             if (nodeStore.updateClusterState()) {
-                // then inform the discovery lite listener - if it is registered
                 nodeStore.signalClusterStateChange();
             }
         }

Modified: jackrabbit/oak/branches/1.4/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentLeaseUpdateRetryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.4/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentLeaseUpdateRetryTest.java?rev=1788441&r1=1788440&r2=1788441&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.4/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentLeaseUpdateRetryTest.java (original)
+++ jackrabbit/oak/branches/1.4/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentLeaseUpdateRetryTest.java Fri Mar 24 13:29:54 2017
@@ -17,8 +17,10 @@
 package org.apache.jackrabbit.oak.plugins.document;
 
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
+import java.util.List;
+
+import org.apache.jackrabbit.oak.plugins.document.memory.MemoryDocumentStore;
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.apache.jackrabbit.oak.stats.Clock.Virtual;
 import org.junit.After;
@@ -31,21 +33,35 @@ public class DocumentLeaseUpdateRetryTes
 
     private DocumentNodeStore ns;
     private Virtual clock;
+    private TestStore ds;
 
     @Before
     public void setup() throws Exception {
         clock = new Clock.Virtual();
         ClusterNodeInfo.setClock(clock);
-        ns = new DocumentMK.Builder().clock(clock).getNodeStore();
+        ds = new TestStore();
+        ns = new DocumentMK.Builder().clock(clock).setDocumentStore(ds).setLeaseCheck(true).getNodeStore();
     }
 
     @After
     public void tearDown() throws Exception {
         ClusterNodeInfo.resetClockToDefault();
+        ns.dispose();
     }
 
     @Test
     public void testLeaseRetryLoop() throws Exception {
+        internalTestLeaseRetryLoop(false);
+    }
+
+    @Test
+    public void testLeaseRetryLoopWithDelay() throws Exception {
+        // see OAK-5446
+        // (simulates a very slow read access on the clusterNodes collection)
+        internalTestLeaseRetryLoop(true);
+    }
+
+    private void internalTestLeaseRetryLoop(boolean withDelay) throws Exception {
         ClusterNodeInfo clusterInfo = ns.getClusterInfo();
         long leaseTime = clusterInfo.getLeaseTime();
         long leaseEndTime1 = clusterInfo.getLeaseEndTime();
@@ -73,18 +89,45 @@ public class DocumentLeaseUpdateRetryTes
         // again assert that lease is fine -> do some dummy ns call
         ns.checkpoint(2);
 
+        if (withDelay) {
+            // mark the TestStore as delaying from now on
+            ds.setDelaying(true);
+            Thread.sleep(1200);
+        }
+
         // now forward the virtual clock by more than the lease time - which
         // should cause lease to time out
         clock.waitUntil(clock.getTime() + leaseTime + leaseUpdateInterval + 1000);
 
         // so the next call to the lease check wrapper should now run into the
         // retry loop, as the lease has timed out
-        try {
-            ns.checkpoint(3);
-        } catch (Exception e) {
-            // it should not fail however, since we should be able to do the
-            // retry
-            fail("call should not have failed: " + e);
+        ns.checkpoint(3); // should not fail
+    }
+
+    final class TestStore extends DocumentStoreWrapper {
+
+        private boolean delaying = false;
+
+        TestStore() {
+            super(new MemoryDocumentStore());
+        }
+
+        void setDelaying(boolean delaying) {
+            this.delaying = delaying;
+        }
+
+        @Override
+        public <T extends Document> List<T> query(Collection<T> collection, String fromKey, String toKey, int limit) {
+            if (delaying && collection == Collection.CLUSTER_NODES) {
+                try {
+                    // make the lookup on the clusterNodes collection *really*
+                    // slow
+                    Thread.sleep(10000);
+                } catch (InterruptedException e) {
+                    e.printStackTrace();
+                }
+            }
+            return super.query(collection, fromKey, toKey, limit);
         }
     }
 }