You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/01/25 16:40:52 UTC
svn commit: r1063323 - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
src/test/org/apache/lucene/search/
Author: mikemccand
Date: Tue Jan 25 15:40:51 2011
New Revision: 1063323
URL: http://svn.apache.org/viewvc?rev=1063323&view=rev
Log:
LUCENE-2010: drop segments that are 100% deleted docs in IW/IR commit
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jan 25 15:40:51 2011
@@ -486,6 +486,9 @@ Changes in runtime behavior
* LUCENE-2829: Improve the performance of "primary key" lookup use
case (running a TermQuery that matches one document) on a
multi-segment index. (Robert Muir, Mike McCandless)
+
+* LUCENE-2010: Segments with 100% deleted documents are now removed on
+ IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless)
API Changes
@@ -905,6 +908,9 @@ Optimizations
* LUCENE-2824: Optimize BufferedIndexInput to do less bounds checks.
(Robert Muir)
+* LUCENE-2010: Segments with 100% deleted documents are now removed on
+ IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless)
+
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Tue Jan 25 15:40:51 2011
@@ -270,9 +270,9 @@ class BufferedDeletes {
}
private synchronized long applyDeletes(IndexWriter.ReaderPool readerPool,
- SegmentInfo info,
- SegmentDeletes coalescedDeletes,
- SegmentDeletes segmentDeletes) throws IOException {
+ SegmentInfo info,
+ SegmentDeletes coalescedDeletes,
+ SegmentDeletes segmentDeletes) throws IOException {
assert readerPool.infoIsLive(info);
assert coalescedDeletes == null || coalescedDeletes.docIDs.size() == 0;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Tue Jan 25 15:40:51 2011
@@ -710,6 +710,9 @@ class DirectoryReader extends IndexReade
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
+ // Remove segments that contain only 100% deleted docs:
+ segmentInfos.pruneDeletedSegments();
+
// Sync all files we just wrote
directory.sync(segmentInfos.files(directory, false));
segmentInfos.commit(directory);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java Tue Jan 25 15:40:51 2011
@@ -1163,7 +1163,14 @@ public abstract class IndexReader implem
return n;
}
- /** Undeletes all documents currently marked as deleted in this index.
+ /** Undeletes all documents currently marked as deleted in
+ * this index.
+ *
+ * <p>NOTE: this is only a best-effort process. For
+ * example, if all documents in a given segment were
+ * deleted, Lucene now drops that segment from the index,
+ * which means its documents will not be recovered by this
+ * method.
*
* @throws StaleReaderException if the index has changed
* since this reader was opened
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Tue Jan 25 15:40:51 2011
@@ -3276,6 +3276,15 @@ public class IndexWriter implements Clos
}
}
+ private boolean keepFullyDeletedSegments;
+
+ /** Only for testing.
+ *
+ * @lucene.internal */
+ void keepFullyDeletedSegments() {
+ keepFullyDeletedSegments = true;
+ }
+
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection<String> files = toSync.files(directory, false);
@@ -3334,6 +3343,10 @@ public class IndexWriter implements Clos
readerPool.commit();
toSync = (SegmentInfos) segmentInfos.clone();
+ if (!keepFullyDeletedSegments) {
+ toSync.pruneDeletedSegments();
+ }
+
assert filesExist(toSync);
if (commitUserData != null)
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Tue Jan 25 15:40:51 2011
@@ -308,6 +308,19 @@ public final class SegmentInfos extends
}
}
+ /** Prunes any segment whose docs are all deleted. */
+ public void pruneDeletedSegments() {
+ int segIdx = 0;
+ while(segIdx < size()) {
+ final SegmentInfo info = info(segIdx);
+ if (info.getDelCount() == info.docCount) {
+ remove(segIdx);
+ } else {
+ segIdx++;
+ }
+ }
+ }
+
/**
* Returns a copy of this instance, also copying each
* SegmentInfo.
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Tue Jan 25 15:40:51 2011
@@ -428,7 +428,7 @@ public class TestAddIndexes extends Luce
);
writer.addIndexes(aux, new MockDirectoryWrapper(random, new RAMDirectory(aux)));
- assertEquals(1060, writer.maxDoc());
+ assertEquals(1020, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
dir.close();
@@ -480,7 +480,7 @@ public class TestAddIndexes extends Luce
);
writer.addIndexes(aux, aux2);
- assertEquals(1060, writer.maxDoc());
+ assertEquals(1040, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
dir.close();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Tue Jan 25 15:40:51 2011
@@ -360,7 +360,7 @@ public class TestIndexReader extends Luc
// CREATE A NEW READER and re-test
reader = IndexReader.open(dir, false);
- assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
+ assertEquals("deleted docFreq", 0, reader.docFreq(searchTerm));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
reader.close();
reader2.close();
@@ -697,7 +697,6 @@ public class TestIndexReader extends Luc
// CREATE A NEW READER and re-test
reader = IndexReader.open(dir, false);
- assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100);
@@ -838,7 +837,6 @@ public class TestIndexReader extends Luc
writer.close();
IndexReader reader = IndexReader.open(dir, false);
reader.deleteDocument(0);
- reader.deleteDocument(1);
reader.close();
reader = IndexReader.open(dir, false);
reader.undeleteAll();
@@ -855,7 +853,6 @@ public class TestIndexReader extends Luc
writer.close();
IndexReader reader = IndexReader.open(dir, false);
reader.deleteDocument(0);
- reader.deleteDocument(1);
reader.close();
reader = IndexReader.open(dir, false);
reader.undeleteAll();
@@ -1290,9 +1287,6 @@ public class TestIndexReader extends Luc
// Open another reader to confirm that everything is deleted
reader2 = IndexReader.open(dir, false);
- assertEquals("reopened 2", 100, reader2.docFreq(searchTerm1));
- assertEquals("reopened 2", 100, reader2.docFreq(searchTerm2));
- assertEquals("reopened 2", 100, reader2.docFreq(searchTerm3));
assertTermDocsCount("reopened 2", reader2, searchTerm1, 0);
assertTermDocsCount("reopened 2", reader2, searchTerm2, 0);
assertTermDocsCount("reopened 2", reader2, searchTerm3, 100);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Tue Jan 25 15:40:51 2011
@@ -1211,7 +1211,6 @@ public class TestIndexReaderReopen exten
IndexReader r = IndexReader.open(dir, false);
assertEquals(0, r.numDocs());
- assertEquals(4, r.maxDoc());
Collection<IndexCommit> commits = IndexReader.listCommits(dir);
for (final IndexCommit commit : commits) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue Jan 25 15:40:51 2011
@@ -101,19 +101,12 @@ public class TestIndexWriter extends Luc
}
reader.close();
- // test doc count before segments are merged/index is optimized
- writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
- assertEquals(100, writer.maxDoc());
- writer.close();
-
reader = IndexReader.open(dir, true);
- assertEquals(100, reader.maxDoc());
assertEquals(60, reader.numDocs());
reader.close();
// optimize the index and check that the new doc count is correct
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
- assertEquals(100, writer.maxDoc());
assertEquals(60, writer.numDocs());
writer.optimize();
assertEquals(60, writer.maxDoc());
@@ -1431,7 +1424,6 @@ public class TestIndexWriter extends Luc
w.close();
IndexReader ir = IndexReader.open(dir, true);
- assertEquals(1, ir.maxDoc());
assertEquals(0, ir.numDocs());
ir.close();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Tue Jan 25 15:40:51 2011
@@ -567,24 +567,25 @@ public class TestIndexWriterExceptions e
System.out.println("TEST: open reader");
}
IndexReader reader = IndexReader.open(dir, true);
- int expected = 3+(1-i)*2;
- assertEquals(expected, reader.docFreq(new Term("contents", "here")));
- assertEquals(expected, reader.maxDoc());
- int numDel = 0;
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
- assertNotNull(delDocs);
- for(int j=0;j<reader.maxDoc();j++) {
- if (delDocs.get(j))
- numDel++;
- else {
- reader.document(j);
- reader.getTermFreqVectors(j);
+ if (i == 0) {
+ int expected = 5;
+ assertEquals(expected, reader.docFreq(new Term("contents", "here")));
+ assertEquals(expected, reader.maxDoc());
+ int numDel = 0;
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ assertNotNull(delDocs);
+ for(int j=0;j<reader.maxDoc();j++) {
+ if (delDocs.get(j))
+ numDel++;
+ else {
+ reader.document(j);
+ reader.getTermFreqVectors(j);
+ }
}
+ assertEquals(1, numDel);
}
reader.close();
- assertEquals(1, numDel);
-
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
analyzer).setMaxBufferedDocs(10));
doc = new Document();
@@ -596,10 +597,10 @@ public class TestIndexWriterExceptions e
writer.close();
reader = IndexReader.open(dir, true);
- expected = 19+(1-i)*2;
+ int expected = 19+(1-i)*2;
assertEquals(expected, reader.docFreq(new Term("contents", "here")));
assertEquals(expected, reader.maxDoc());
- numDel = 0;
+ int numDel = 0;
assertNull(MultiFields.getDeletedDocs(reader));
for(int j=0;j<reader.maxDoc();j++) {
reader.document(j);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java?rev=1063323&r1=1063322&r2=1063323&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java Tue Jan 25 15:40:51 2011
@@ -6,6 +6,7 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Random;
+import java.lang.reflect.Method;
import junit.framework.Assert;
@@ -172,6 +173,16 @@ public class QueryUtils {
}
w.commit();
w.deleteDocuments( new MatchAllDocsQuery() );
+ try {
+ // Carefully invoke what is a package-private (test
+ // only, internal) method on IndexWriter:
+ Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments");
+ m.setAccessible(true);
+ m.invoke(w);
+ } catch (Exception e) {
+ // Should not happen?
+ throw new RuntimeException(e);
+ }
w.commit();
if (0 < numDeletedDocs)