You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/07/13 16:23:48 UTC
svn commit: r556010 - in /lucene/java/trunk: CHANGES.txt
src/java/org/apache/lucene/index/IndexWriter.java
src/test/org/apache/lucene/index/TestIndexWriterDelete.java
src/test/org/apache/lucene/store/MockRAMDirectory.java
Author: mikemccand
Date: Fri Jul 13 07:23:47 2007
New Revision: 556010
URL: http://svn.apache.org/viewvc?view=rev&rev=556010
Log:
LUCENE-938: fixed certain cases where an unhandled exception could cause IndexWriter to lose buffered deletes
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Jul 13 07:23:47 2007
@@ -31,6 +31,10 @@
that was thrown after a call of TermPositions.seek().
(Rich Johnson via Michael Busch)
+ 4. LUCENE-938: Fixed cases where an unhandled exception in
+ IndexWriter's methods could cause deletes to be lost.
+ (Steven Parkes via Mike McCandless)
+
New features
1. LUCENE-906: Elision filter for French.
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Fri Jul 13 07:23:47 2007
@@ -1331,8 +1331,18 @@
* must make a matched (try/finally) call to
* commitTransaction() or rollbackTransaction() to finish
* the transaction.
+ *
+ * Note that buffered documents and delete terms are not handled
+ * within the transactions, so they must be flushed before the
+ * transaction is started.
*/
private void startTransaction() throws IOException {
+
+ assert numBufferedDeleteTerms == 0 :
+ "calling startTransaction with buffered delete terms not supported";
+ assert docWriter.getNumDocsInRAM() == 0 :
+ "calling startTransaction with buffered documents not supported";
+
localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
localAutoCommit = autoCommit;
if (localAutoCommit) {
@@ -1907,6 +1917,9 @@
SegmentInfos rollback = null;
+ HashMap saveBufferedDeleteTerms = null;
+ int saveNumBufferedDeleteTerms = 0;
+
if (flushDeletes)
rollback = (SegmentInfos) segmentInfos.clone();
@@ -1941,6 +1954,8 @@
// buffer deletes longer and then flush them to
// multiple flushed segments, when
// autoCommit=false
+ saveBufferedDeleteTerms = bufferedDeleteTerms;
+ saveNumBufferedDeleteTerms = numBufferedDeleteTerms;
applyDeletes(flushDocs);
doAfterFlush();
}
@@ -1955,6 +1970,12 @@
// SegmentInfo instances:
segmentInfos.clear();
segmentInfos.addAll(rollback);
+
+ if (saveBufferedDeleteTerms != null) {
+ numBufferedDeleteTerms = saveNumBufferedDeleteTerms;
+ bufferedDeleteTerms = saveBufferedDeleteTerms;
+ }
+
} else {
// Remove segment we added, if any:
if (newSegment != null &&
@@ -2330,7 +2351,13 @@
}
// Clean up bufferedDeleteTerms.
- bufferedDeleteTerms.clear();
+
+ // Rollbacks of buffered deletes are based on restoring the old
+ // map, so don't modify this one. Rare enough that the gc
+ // overhead is almost certainly lower than the alternate, which
+ // would be clone to support rollback.
+
+ bufferedDeleteTerms = new HashMap();
numBufferedDeleteTerms = 0;
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Fri Jul 13 07:23:47 2007
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.Arrays;
+import java.lang.StackTraceElement;
import junit.framework.TestCase;
@@ -531,6 +532,146 @@
// Try again with 10 more bytes of free space:
diskFree += 10;
}
+ }
+ }
+
+ // This test tests that buffered deletes are not lost due to i/o
+ // errors occurring after the buffered deletes have been flushed but
+ // before the segmentInfos have been successfully written
+
+ public void testErrorAfterApplyDeletes() throws IOException {
+
+ MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() {
+ boolean sawMaybe = false;
+ boolean failed = false;
+ public MockRAMDirectory.Failure reset() {
+ sawMaybe = false;
+ failed = false;
+ return this;
+ }
+ public void eval(MockRAMDirectory dir) throws IOException {
+ if (sawMaybe && !failed) {
+ failed = true;
+ throw new IOException("fail after applyDeletes");
+ }
+ if (!failed) {
+ StackTraceElement[] trace = new Exception().getStackTrace();
+ for (int i = 0; i < trace.length; i++) {
+ if ("applyDeletes".equals(trace[i].getMethodName())) {
+ sawMaybe = true;
+ break;
+ }
+ }
+ }
+ }
+ };
+
+ // create a couple of files
+
+ String[] keywords = { "1", "2" };
+ String[] unindexed = { "Netherlands", "Italy" };
+ String[] unstored = { "Amsterdam has lots of bridges",
+ "Venice has lots of canals" };
+ String[] text = { "Amsterdam", "Venice" };
+
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
+ Directory ramDir = new RAMDirectory();
+ MockRAMDirectory dir = new MockRAMDirectory(ramDir);
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setUseCompoundFile(true);
+ modifier.setMaxBufferedDeleteTerms(2);
+
+ dir.failOn(failure.reset());
+
+ for (int i = 0; i < keywords.length; i++) {
+ Document doc = new Document();
+ doc.add(new Field("id", keywords[i], Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ doc.add(new Field("country", unindexed[i], Field.Store.YES,
+ Field.Index.NO));
+ doc.add(new Field("contents", unstored[i], Field.Store.NO,
+ Field.Index.TOKENIZED));
+ doc.add(new Field("city", text[i], Field.Store.YES,
+ Field.Index.TOKENIZED));
+ modifier.addDocument(doc);
+ }
+ // flush (and commit if ac)
+
+ modifier.optimize();
+
+ // commit if !ac
+
+ if (!autoCommit) {
+ modifier.close();
+ }
+ // one of the two files hits
+
+ Term term = new Term("city", "Amsterdam");
+ int hitCount = getHitCount(dir, term);
+ assertEquals(1, hitCount);
+
+ // open the writer again (closed above)
+
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+ modifier.setUseCompoundFile(true);
+ }
+
+ // delete the doc
+ // max buf del terms is two, so this is buffered
+
+ modifier.deleteDocuments(term);
+
+ // add a doc (needed for the !ac case; see below)
+ // doc remains buffered
+
+ Document doc = new Document();
+ modifier.addDocument(doc);
+
+ // flush the changes, the buffered deletes, and the new doc
+
+ // The failure object will fail on the first write after the del
+ // file gets created when processing the buffered delete
+
+ // in the ac case, this will be when writing the new segments
+ // files so we really don't need the new doc, but it's harmless
+
+ // in the !ac case, a new segments file won't be created but in
+ // this case, creation of the cfs file happens next so we need
+ // the doc (to test that it's okay that we don't lose deletes if
+ // failing while creating the cfs file
+
+ boolean failed = false;
+ try {
+ modifier.flush();
+ } catch (IOException ioe) {
+ failed = true;
+ }
+
+ assertTrue(failed);
+
+ // The flush above failed, so we need to retry it (which will
+ // succeed, because the failure is a one-shot)
+
+ if (!autoCommit) {
+ modifier.close();
+ } else {
+ modifier.flush();
+ }
+
+ hitCount = getHitCount(dir, term);
+
+ // If we haven't lost the delete the hit count will be zero
+
+ assertEquals(0, hitCount);
+
+ if (autoCommit) {
+ modifier.close();
+ }
+
+ dir.close();
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java?view=diff&rev=556010&r1=556009&r2=556010
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/store/MockRAMDirectory.java Fri Jul 13 07:23:47 2007
@@ -24,6 +24,7 @@
import java.util.Random;
import java.util.Map;
import java.util.HashMap;
+import java.util.ArrayList;
/**
* This is a subclass of RAMDirectory that adds methods
@@ -116,6 +117,7 @@
}
void maybeThrowIOException() throws IOException {
+ maybeThrowDeterministicException();
if (randomIOExceptionRate > 0.0) {
int number = Math.abs(randomState.nextInt() % 1000);
if (number < randomIOExceptionRate*1000) {
@@ -213,4 +215,59 @@
}
}
}
+
+ /**
+ * Objects that represent fail-able conditions. Objects of a derived
+ * class are created and registered with the mock directory. After
+ * register, each object will be invoked once for each first write
+ * of a file, giving the object a chance to throw an IOException.
+ */
+ public static class Failure {
+ /**
+ * eval is called on the first write of every new file.
+ */
+ public void eval(MockRAMDirectory dir) throws IOException { }
+
+ /**
+ * reset should set the state of the failure to its default
+ * (freshly constructed) state. Reset is convenient for tests
+ * that want to create one failure object and then reuse it in
+ * multiple cases. This, combined with the fact that Failure
+ * subclasses are often anonymous classes makes reset difficult to
+ * do otherwise.
+ *
+ * A typical example of use is
+ * Failure failure = new Failure() { ... };
+ * ...
+ * mock.failOn(failure.reset())
+ */
+ public Failure reset() { return this; }
+ }
+
+ ArrayList failures;
+
+ /**
+ * add a Failure object to the list of objects to be evaluated
+ * at every potential failure point
+ */
+ public void failOn(Failure fail) {
+ if (failures == null) {
+ failures = new ArrayList();
+ }
+ failures.add(fail);
+ }
+
+ /**
+ * Itterate through the failures list, giving each object a
+ * chance to throw an IOE
+ */
+ void maybeThrowDeterministicException() throws IOException {
+ if (failures != null) {
+ for(int i = 0; i < failures.size(); i++) {
+ ((Failure)failures.get(i)).eval(this);
+ }
+ }
+ }
+
+
}