You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/08/14 18:43:23 UTC

svn commit: r1513955 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/index/ test-framework/src/java/org/apache/lucene/index/ test-framework/src/java/org/apache/lucene/util/

Author: rmuir
Date: Wed Aug 14 16:43:22 2013
New Revision: 1513955

URL: http://svn.apache.org/r1513955
Log:
LUCENE-5173: add checkindex piece of LUCENE-5116, prevent 0 document segments completely

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Aug 14 16:43:22 2013
@@ -172,6 +172,9 @@ API Changes
   Analyzer. Analyzer adds a getter to retrieve the strategy for this use-case.
   (Uwe Schindler, Robert Muir, Shay Banon)
 
+* LUCENE-5173: Lucene never writes segments with 0 documents anymore.
+  (Shai Erera, Uwe Schindler, Robert Muir)
+
 Optimizations
 
 * LUCENE-5088: Added TermFilter to filter docs by a specific term.

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Wed Aug 14 16:43:22 2013
@@ -496,6 +496,11 @@ public class CheckIndex {
       msg(infoStream, "  " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " docCount=" + info.info.getDocCount());
       segInfoStat.name = info.info.name;
       segInfoStat.docCount = info.info.getDocCount();
+      
+      final String version = info.info.getVersion();
+      if (info.info.getDocCount() <= 0 && version != null && versionComparator.compare(version, "4.5") >= 0) {
+        throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.getDocCount());
+      }
 
       int toLoseDocCount = info.info.getDocCount();
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Wed Aug 14 16:43:22 2013
@@ -2462,20 +2462,12 @@ public class IndexWriter implements Clos
       String mergedName = newSegmentName();
       final List<AtomicReader> mergeReaders = new ArrayList<AtomicReader>();
       for (IndexReader indexReader : readers) {
-        if (indexReader.numDocs() > 0) {
-          numDocs += indexReader.numDocs();
-          for (AtomicReaderContext ctx : indexReader.leaves()) {
-            if (ctx.reader().numDocs() > 0) { // drop empty (or all deleted) segments
-              mergeReaders.add(ctx.reader());
-            }
-          }
+        numDocs += indexReader.numDocs();
+        for (AtomicReaderContext ctx : indexReader.leaves()) {
+          mergeReaders.add(ctx.reader());
         }
       }
       
-      if (mergeReaders.isEmpty()) { // no segments with documents to add
-        return;
-      }
-      
       final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
 
       // TODO: somehow we should fix this merge so it's
@@ -2487,6 +2479,10 @@ public class IndexWriter implements Clos
 
       SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
                                                MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
+      
+      if (!merger.shouldMerge()) {
+        return;
+      }
 
       MergeState mergeState;
       boolean success = false;
@@ -3733,7 +3729,12 @@ public class IndexWriter implements Clos
       MergeState mergeState;
       boolean success3 = false;
       try {
-        mergeState = merger.merge();
+        if (!merger.shouldMerge()) {
+          // would result in a 0 document segment: nothing to merge!
+          mergeState = new MergeState(new ArrayList<AtomicReader>(), merge.info.info, infoStream, checkAbort);
+        } else {
+          mergeState = merger.merge();
+        }
         success3 = true;
       } finally {
         if (!success3) {
@@ -3748,12 +3749,16 @@ public class IndexWriter implements Clos
       // Record which codec was used to write the segment
 
       if (infoStream.isEnabled("IW")) {
-        infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " +
+        if (merge.info.info.getDocCount() == 0) {
+          infoStream.message("IW", "merge away fully deleted segments");
+        } else {
+          infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " +
                            (mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " +
                            (mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + 
                            (mergeState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + 
                            (mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + 
                            (mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs"));
+        }
       }
 
       // Very important to do this before opening the reader
@@ -3958,8 +3963,8 @@ public class IndexWriter implements Clos
   /** Only for testing.
    *
    * @lucene.internal */
-  void keepFullyDeletedSegments() {
-    keepFullyDeletedSegments = true;
+  void setKeepFullyDeletedSegments(boolean v) {
+    keepFullyDeletedSegments = v;
   }
 
   boolean getKeepFullyDeletedSegments() {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Wed Aug 14 16:43:22 2013
@@ -52,12 +52,18 @@ final class SegmentMerger {
 
   // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
   SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
-                MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) {
+                MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
     mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
     directory = dir;
     this.codec = segmentInfo.getCodec();
     this.context = context;
     this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
+    mergeState.segmentInfo.setDocCount(setDocMaps());
+  }
+  
+  /** True if any merging should happen */
+  boolean shouldMerge() {
+    return mergeState.segmentInfo.getDocCount() > 0;
   }
 
   /**
@@ -67,14 +73,15 @@ final class SegmentMerger {
    * @throws IOException if there is a low-level IO error
    */
   MergeState merge() throws IOException {
+    if (!shouldMerge()) {
+      throw new IllegalStateException("Merge would result in 0 document segment");
+    }
     // NOTE: it's important to add calls to
     // checkAbort.work(...) if you make any changes to this
     // method that will spend alot of time.  The frequency
     // of this check impacts how long
     // IndexWriter.close(false) takes to actually stop the
     // threads.
-    
-    mergeState.segmentInfo.setDocCount(setDocMaps());
     mergeFieldInfos();
     setMatchingSegmentReaders();
     long t0 = 0;

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Wed Aug 14 16:43:22 2013
@@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -68,6 +69,7 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.SetOnce;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.util.packed.PackedInts;
@@ -2188,4 +2190,32 @@ public class TestIndexWriter extends Luc
     writer.close();
     dir.close();
   }
+  
+  public void testMergeAllDeleted() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    final SetOnce<IndexWriter> iwRef = new SetOnce<IndexWriter>();
+    iwc.setInfoStream(new RandomIndexWriter.TestPointInfoStream(iwc.getInfoStream(), new RandomIndexWriter.TestPoint() {
+      @Override
+      public void apply(String message) {
+        if ("startCommitMerge".equals(message)) {
+          iwRef.get().setKeepFullyDeletedSegments(false);
+        } else if ("startMergeInit".equals(message)) {
+          iwRef.get().setKeepFullyDeletedSegments(true);
+        }
+      }
+    }));
+    IndexWriter evilWriter = new IndexWriter(dir, iwc);
+    iwRef.set(evilWriter);
+    for (int i = 0; i < 1000; i++) {
+      addDoc(evilWriter);
+      if (random().nextInt(17) == 0) {
+        evilWriter.commit();
+      }
+    }
+    evilWriter.deleteDocuments(new MatchAllDocsQuery());
+    evilWriter.forceMerge(1);
+    evilWriter.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java Wed Aug 14 16:43:22 2013
@@ -340,7 +340,7 @@ public class RandomIndexWriter implement
     w.forceMerge(maxSegmentCount);
   }
   
-  private static final class TestPointInfoStream extends InfoStream {
+  static final class TestPointInfoStream extends InfoStream {
     private final InfoStream delegate;
     private final TestPoint testPoint;
     

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Wed Aug 14 16:43:22 2013
@@ -793,9 +793,9 @@ public class _TestUtil {
     try {
       // Carefully invoke what is a package-private (test
       // only, internal) method on IndexWriter:
-      Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments");
+      Method m = IndexWriter.class.getDeclaredMethod("setKeepFullyDeletedSegments", boolean.class);
       m.setAccessible(true);
-      m.invoke(w);
+      m.invoke(w, Boolean.TRUE);
     } catch (Exception e) {
       // Should not happen?
       throw new RuntimeException(e);