You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/07/13 16:23:48 UTC

svn commit: r556010 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/IndexWriter.java src/test/org/apache/lucene/index/TestIndexWriterDelete.java src/test/org/apache/lucene/store/MockRAMDirectory.java

Author: mikemccand
Date: Fri Jul 13 07:23:47 2007
New Revision: 556010

URL: http://svn.apache.org/viewvc?view=rev&rev=556010
Log:
LUCENE-938: fixed certain cases where an unhandled exception could cause IndexWriter to lose buffered deletes

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
    lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Jul 13 07:23:47 2007
@@ -31,6 +31,10 @@
     that was thrown after a call of TermPositions.seek(). 
     (Rich Johnson via Michael Busch)
     
+ 4. LUCENE-938: Fixed cases where an unhandled exception in
+    IndexWriter's methods could cause deletes to be lost.
+    (Steven Parkes via Mike McCandless)
+      
 New features
 
  1. LUCENE-906: Elision filter for French.

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Fri Jul 13 07:23:47 2007
@@ -1331,8 +1331,18 @@
    * must make a matched (try/finally) call to
    * commitTransaction() or rollbackTransaction() to finish
    * the transaction.
+   *
+   * Note that buffered documents and delete terms are not handled
+   * within the transactions, so they must be flushed before the
+   * transaction is started.
    */
   private void startTransaction() throws IOException {
+
+    assert numBufferedDeleteTerms == 0 :
+           "calling startTransaction with buffered delete terms not supported";
+    assert docWriter.getNumDocsInRAM() == 0 :
+           "calling startTransaction with buffered documents not supported";
+
     localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
     localAutoCommit = autoCommit;
     if (localAutoCommit) {
@@ -1907,6 +1917,9 @@
 
         SegmentInfos rollback = null;
 
+        HashMap saveBufferedDeleteTerms = null;
+        int saveNumBufferedDeleteTerms = 0;
+
         if (flushDeletes)
           rollback = (SegmentInfos) segmentInfos.clone();
 
@@ -1941,6 +1954,8 @@
             // buffer deletes longer and then flush them to
             // multiple flushed segments, when
             // autoCommit=false
+            saveBufferedDeleteTerms = bufferedDeleteTerms;
+            saveNumBufferedDeleteTerms = numBufferedDeleteTerms;
             applyDeletes(flushDocs);
             doAfterFlush();
           }
@@ -1955,6 +1970,12 @@
               // SegmentInfo instances:
               segmentInfos.clear();
               segmentInfos.addAll(rollback);
+
+              if (saveBufferedDeleteTerms != null) {
+                numBufferedDeleteTerms = saveNumBufferedDeleteTerms;
+                bufferedDeleteTerms = saveBufferedDeleteTerms;
+              }
+              
             } else {
               // Remove segment we added, if any:
               if (newSegment != null && 
@@ -2330,7 +2351,13 @@
       }
 
       // Clean up bufferedDeleteTerms.
-      bufferedDeleteTerms.clear();
+
+      // Rollbacks of buffered deletes are based on restoring the old
+      // map, so don't modify this one. Rare enough that the gc
+      // overhead is almost certainly lower than the alternate, which
+      // would be clone to support rollback.
+
+      bufferedDeleteTerms = new HashMap();
       numBufferedDeleteTerms = 0;
     }
   }

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Fri Jul 13 07:23:47 2007
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.lang.StackTraceElement;
 
 import junit.framework.TestCase;
 
@@ -531,6 +532,146 @@
         // Try again with 10 more bytes of free space:
         diskFree += 10;
       }
+    }
+  }
+
+  // This test tests that buffered deletes are not lost due to i/o
+  // errors occurring after the buffered deletes have been flushed but
+  // before the segmentInfos have been successfully written
+
+  public void testErrorAfterApplyDeletes() throws IOException {
+    
+    MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() {
+        boolean sawMaybe = false;
+        boolean failed = false;
+        public MockRAMDirectory.Failure reset() {
+          sawMaybe = false;
+          failed = false;
+          return this;
+        }
+        public void eval(MockRAMDirectory dir)  throws IOException {
+          if (sawMaybe && !failed) {
+            failed = true;
+            throw new IOException("fail after applyDeletes");
+          }
+          if (!failed) {
+            StackTraceElement[] trace = new Exception().getStackTrace();
+            for (int i = 0; i < trace.length; i++) {
+              if ("applyDeletes".equals(trace[i].getMethodName())) {
+                sawMaybe = true;
+                break;
+              }
+            }
+          }
+        }
+      };
+
+    // create a couple of files
+
+    String[] keywords = { "1", "2" };
+    String[] unindexed = { "Netherlands", "Italy" };
+    String[] unstored = { "Amsterdam has lots of bridges",
+        "Venice has lots of canals" };
+    String[] text = { "Amsterdam", "Venice" };
+
+    for(int pass=0;pass<2;pass++) {
+      boolean autoCommit = (0==pass);
+      Directory ramDir = new RAMDirectory();
+      MockRAMDirectory dir = new MockRAMDirectory(ramDir);
+      IndexWriter modifier = new IndexWriter(dir, autoCommit,
+                                             new WhitespaceAnalyzer(), true);
+      modifier.setUseCompoundFile(true);
+      modifier.setMaxBufferedDeleteTerms(2);
+
+      dir.failOn(failure.reset());
+
+      for (int i = 0; i < keywords.length; i++) {
+        Document doc = new Document();
+        doc.add(new Field("id", keywords[i], Field.Store.YES,
+                          Field.Index.UN_TOKENIZED));
+        doc.add(new Field("country", unindexed[i], Field.Store.YES,
+                          Field.Index.NO));
+        doc.add(new Field("contents", unstored[i], Field.Store.NO,
+                          Field.Index.TOKENIZED));
+        doc.add(new Field("city", text[i], Field.Store.YES,
+                          Field.Index.TOKENIZED));
+        modifier.addDocument(doc);
+      }
+      // flush (and commit if ac)
+
+      modifier.optimize();
+
+      // commit if !ac
+
+      if (!autoCommit) {
+        modifier.close();
+      }
+      // one of the two files hits
+
+      Term term = new Term("city", "Amsterdam");
+      int hitCount = getHitCount(dir, term);
+      assertEquals(1, hitCount);
+
+      // open the writer again (closed above)
+
+      if (!autoCommit) {
+        modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+        modifier.setUseCompoundFile(true);
+      }
+
+      // delete the doc
+      // max buf del terms is two, so this is buffered
+
+      modifier.deleteDocuments(term);
+
+      // add a doc (needed for the !ac case; see below)
+      // doc remains buffered
+
+      Document doc = new Document();
+      modifier.addDocument(doc);
+
+      // flush the changes, the buffered deletes, and the new doc
+
+      // The failure object will fail on the first write after the del
+      // file gets created when processing the buffered delete
+
+      // in the ac case, this will be when writing the new segments
+      // files so we really don't need the new doc, but it's harmless
+
+      // in the !ac case, a new segments file won't be created but in
+      // this case, creation of the cfs file happens next so we need
+      // the doc (to test that it's okay that we don't lose deletes if
+      // failing while creating the cfs file
+
+      boolean failed = false;
+      try {
+        modifier.flush();
+      } catch (IOException ioe) {
+        failed = true;
+      }
+
+      assertTrue(failed);
+
+      // The flush above failed, so we need to retry it (which will
+      // succeed, because the failure is a one-shot)
+
+      if (!autoCommit) {
+        modifier.close();
+      } else {
+        modifier.flush();
+      }
+
+      hitCount = getHitCount(dir, term);
+
+      // If we haven't lost the delete the hit count will be zero
+
+      assertEquals(0, hitCount);
+
+      if (autoCommit) {
+        modifier.close();
+      }
+
+      dir.close();
     }
   }
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java Fri Jul 13 07:23:47 2007
@@ -24,6 +24,7 @@
 import java.util.Random;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.ArrayList;
 
 /**
  * This is a subclass of RAMDirectory that adds methods
@@ -116,6 +117,7 @@
   }
 
   void maybeThrowIOException() throws IOException {
+    maybeThrowDeterministicException();
     if (randomIOExceptionRate > 0.0) {
       int number = Math.abs(randomState.nextInt() % 1000);
       if (number < randomIOExceptionRate*1000) {
@@ -213,4 +215,59 @@
       }
     }
   }
+
+  /**
+   * Objects that represent fail-able conditions. Objects of a derived
+   * class are created and registered with the mock directory. After
+   * register, each object will be invoked once for each first write
+   * of a file, giving the object a chance to throw an IOException.
+   */
+  public static class Failure {
+    /**
+     * eval is called on the first write of every new file.
+     */
+    public void eval(MockRAMDirectory dir) throws IOException { }
+
+    /**
+     * reset should set the state of the failure to its default
+     * (freshly constructed) state. Reset is convenient for tests
+     * that want to create one failure object and then reuse it in
+     * multiple cases. This, combined with the fact that Failure
+     * subclasses are often anonymous classes makes reset difficult to
+     * do otherwise.
+     *
+     * A typical example of use is
+     * Failure failure = new Failure() { ... };
+     * ...
+     * mock.failOn(failure.reset())
+     */
+    public Failure reset() { return this; }
+  }
+
+  ArrayList failures;
+
+  /**
+   * add a Failure object to the list of objects to be evaluated
+   * at every potential failure point
+   */
+  public void failOn(Failure fail) {
+    if (failures == null) {
+      failures = new ArrayList();
+    }
+    failures.add(fail);
+  }
+
+  /**
+   * Itterate through the failures list, giving each object a
+   * chance to throw an IOE
+   */
+  void maybeThrowDeterministicException() throws IOException {
+    if (failures != null) {
+      for(int i = 0; i < failures.size(); i++) {
+        ((Failure)failures.get(i)).eval(this);
+      }
+    }
+  }
+
+
 }