You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/12/14 22:15:09 UTC

[16/32] lucene-solr:jira/solr-5944: LUCENE-7570: don't run merges while holding the commitLock to prevent deadlock when merges are stalled and a tragic merge exception strikes

LUCENE-7570: don't run merges while holding the commitLock to prevent deadlock when merges are stalled and a tragic merge exception strikes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2b073a2f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2b073a2f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2b073a2f

Branch: refs/heads/jira/solr-5944
Commit: 2b073a2f296289617bea8256d7efec06049df739
Parents: 7cffae3
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 9 18:41:30 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 9 18:41:30 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++
 .../org/apache/lucene/index/IndexWriter.java    | 26 ++++++--
 .../index/TestTragicIndexWriterDeadlock.java    | 69 +++++++++++++++++++-
 3 files changed, 91 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b073a2f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da6e3d2..15b89f0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -94,6 +94,10 @@ Bug Fixes
   in the index sort, since this would lead to corruption.  (Jim
   Ferenczi via Mike McCandless)
 
+* LUCENE-7570: IndexWriter may deadlock if a commit is running while
+  there are too many merges running and one of the merges hits a
+  tragic exception (Joey Echeverria via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b073a2f/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 3ee87b1..4789505 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -2952,11 +2952,16 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
   @Override
   public final long prepareCommit() throws IOException {
     ensureOpen();
-    pendingSeqNo = prepareCommitInternal(config.getMergePolicy());
+    boolean[] doMaybeMerge = new boolean[1];
+    pendingSeqNo = prepareCommitInternal(doMaybeMerge);
+    // we must do this outside of the commitLock else we can deadlock:
+    if (doMaybeMerge[0]) {
+      maybeMerge(config.getMergePolicy(), MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);      
+    }
     return pendingSeqNo;
   }
 
-  private long prepareCommitInternal(MergePolicy mergePolicy) throws IOException {
+  private long prepareCommitInternal(boolean[] doMaybeMerge) throws IOException {
     startCommitTime = System.nanoTime();
     synchronized(commitLock) {
       ensureOpen(false);
@@ -3063,7 +3068,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       boolean success = false;
       try {
         if (anySegmentsFlushed) {
-          maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);
+          doMaybeMerge[0] = true;
         }
         startCommit(toCommit);
         success = true;
@@ -3184,6 +3189,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       infoStream.message("IW", "commit: start");
     }
 
+    boolean[] doMaybeMerge = new boolean[1];
+
+    long seqNo;
+
     synchronized(commitLock) {
       ensureOpen(false);
 
@@ -3191,13 +3200,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
         infoStream.message("IW", "commit: enter lock");
       }
 
-      long seqNo;
-
       if (pendingCommit == null) {
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: now prepare");
         }
-        seqNo = prepareCommitInternal(mergePolicy);
+        seqNo = prepareCommitInternal(doMaybeMerge);
       } else {
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: already prepared");
@@ -3206,9 +3213,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       }
 
       finishCommit();
+    }
 
-      return seqNo;
+    // we must do this outside of the commitLock else we can deadlock:
+    if (doMaybeMerge[0]) {
+      maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);      
     }
+    
+    return seqNo;
   }
 
   private final void finishCommit() throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b073a2f/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
index 3cce698..80f9392 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
@@ -14,13 +14,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.index;
 
+package org.apache.lucene.index;
 
+import java.io.IOException;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -92,4 +94,69 @@ public class TestTragicIndexWriterDeadlock extends LuceneTestCase {
     w.close();
     dir.close();
   }
+
+  // LUCENE-7570
+  public void testDeadlockStalledMerges() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig();
+
+    // so we merge every 2 segments:
+    LogMergePolicy mp = new LogDocMergePolicy();
+    mp.setMergeFactor(2);
+    iwc.setMergePolicy(mp);
+    CountDownLatch done = new CountDownLatch(1);
+    ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() {
+        @Override
+        protected void doMerge(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException {
+          // let merge takes forever, until commit thread is stalled
+          try {
+            done.await();
+          } catch (InterruptedException ie) {
+            Thread.currentThread().interrupt();
+            throw new RuntimeException(ie);
+          }
+          super.doMerge(writer, merge);
+        }
+
+        @Override
+        protected synchronized void doStall() {
+          done.countDown();
+          super.doStall();
+        }
+
+        @Override
+        protected void handleMergeException(Directory dir, Throwable exc) {
+        }
+      };
+
+    // so we stall once the 2nd merge wants to run:
+    cms.setMaxMergesAndThreads(1, 1);
+    iwc.setMergeScheduler(cms);
+
+    // so we write a segment every 2 indexed docs:
+    iwc.setMaxBufferedDocs(2);
+
+    final IndexWriter w = new IndexWriter(dir, iwc) {
+      @Override
+      void mergeSuccess(MergePolicy.OneMerge merge) {
+        // tragedy strikes!
+        throw new OutOfMemoryError();
+      }
+      };
+
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes first segment
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes second segment, and kicks off merge, that takes forever (done.await)
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes third segment
+    w.addDocument(new Document());
+    w.commit();
+    // w writes fourth segment, and commit flushes and kicks off merge that stalls
+    w.close();
+    dir.close();
+  }
 }