You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/02/22 00:34:34 UTC

svn commit: r1448853 - in /lucene/dev/trunk/lucene: CHANGES.txt core/src/java/org/apache/lucene/index/MergeState.java core/src/test/org/apache/lucene/index/TestSegmentMerger.java

Author: jpountz
Date: Thu Feb 21 23:34:34 2013
New Revision: 1448853

URL: http://svn.apache.org/r1448853
Log:
LUCENE-4792: Reduction of the memory required to build the doc ID maps used when merging segments.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1448853&r1=1448852&r2=1448853&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Feb 21 23:34:34 2013
@@ -112,6 +112,9 @@ Optimizations
   table-compression, etc), and memory addresses use MonotonicBlockPackedWriter.
   (Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir)
 
+* LUCENE-4792: Reduction of the memory required to build the doc ID maps used
+  when merging segments. (Adrien Grand)
+
 New Features
 
 * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1448853&r1=1448852&r2=1448853&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java Thu Feb 21 23:34:34 2013
@@ -22,7 +22,7 @@ import java.util.List;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
 
 /** Holds common state used during segment merging.
  *
@@ -33,86 +33,73 @@ public class MergeState {
    * Remaps docids around deletes during merge
    */
   public static abstract class DocMap {
-    private final Bits liveDocs;
 
-    /** Sole constructor. (For invocation by subclass 
-     *  constructors, typically implicit.) */
-    protected DocMap(Bits liveDocs) {
-      this.liveDocs = liveDocs;
+    DocMap() {}
+
+    /** Returns the mapped docID corresponding to the provided one. */
+    public abstract int get(int docID);
+
+    /** Returns the total number of documents, ignoring
+     *  deletions. */
+    public abstract int maxDoc();
+
+    /** Returns the number of not-deleted documents. */
+    public final int numDocs() {
+      return maxDoc() - numDeletedDocs();
+    }
+
+    /** Returns the number of deleted documents. */
+    public abstract int numDeletedDocs();
+
+    /** Returns true if there are any deletions. */
+    public boolean hasDeletions() {
+      return numDeletedDocs() > 0;
     }
 
     /** Creates a {@link DocMap} instance appropriate for
      *  this reader. */
     public static DocMap build(AtomicReader reader) {
       final int maxDoc = reader.maxDoc();
-      final int numDeletes = reader.numDeletedDocs();
-      final int numDocs = maxDoc - numDeletes;
-      assert reader.getLiveDocs() != null || numDeletes == 0;
-      if (numDeletes == 0) {
+      if (!reader.hasDeletions()) {
         return new NoDelDocMap(maxDoc);
-      } else if (numDeletes < numDocs) {
-        return buildDelCountDocmap(maxDoc, numDeletes, reader.getLiveDocs(), PackedInts.COMPACT);
-      } else {
-        return buildDirectDocMap(maxDoc, numDocs, reader.getLiveDocs(), PackedInts.COMPACT);
       }
+      final Bits liveDocs = reader.getLiveDocs();
+      return build(maxDoc, liveDocs);
     }
 
-    static DocMap buildDelCountDocmap(int maxDoc, int numDeletes, Bits liveDocs, float acceptableOverheadRatio) {
-      PackedInts.Mutable numDeletesSoFar = PackedInts.getMutable(maxDoc,
-          PackedInts.bitsRequired(numDeletes), acceptableOverheadRatio);
+    static DocMap build(final int maxDoc, final Bits liveDocs) {
+      assert liveDocs != null;
+      final MonotonicAppendingLongBuffer docMap = new MonotonicAppendingLongBuffer();
       int del = 0;
       for (int i = 0; i < maxDoc; ++i) {
+        docMap.add(i - del);
         if (!liveDocs.get(i)) {
           ++del;
         }
-        numDeletesSoFar.set(i, del);
       }
-      assert del == numDeletes : "del=" + del + ", numdeletes=" + numDeletes;
-      return new DelCountDocMap(liveDocs, numDeletesSoFar);
-    }
-
-    static DocMap buildDirectDocMap(int maxDoc, int numDocs, Bits liveDocs, float acceptableOverheadRatio) {
-      PackedInts.Mutable docIds = PackedInts.getMutable(maxDoc,
-          PackedInts.bitsRequired(Math.max(0, numDocs - 1)), acceptableOverheadRatio);
-      int del = 0;
-      for (int i = 0; i < maxDoc; ++i) {
-        if (liveDocs.get(i)) {
-          docIds.set(i, i - del);
-        } else {
-          ++del;
+      final int numDeletedDocs = del;
+      assert docMap.size() == maxDoc;
+      return new DocMap() {
+
+        @Override
+        public int get(int docID) {
+          if (!liveDocs.get(docID)) {
+            return -1;
+          }
+          return (int) docMap.get(docID);
         }
-      }
-      assert numDocs + del == maxDoc : "maxDoc=" + maxDoc + ", del=" + del + ", numDocs=" + numDocs;
-      return new DirectDocMap(liveDocs, docIds, del);
-    }
-
-    /** Returns the mapped docID corresponding to the provided one. */
-    public int get(int docId) {
-      if (liveDocs == null || liveDocs.get(docId)) {
-        return remap(docId);
-      } else {
-        return -1;
-      }
-    }
-
-    /** Returns the mapped docID corresponding to the provided one. */
-    public abstract int remap(int docId);
 
-    /** Returns the total number of documents, ignoring
-     *  deletions. */
-    public abstract int maxDoc();
-
-    /** Returns the number of not-deleted documents. */
-    public final int numDocs() {
-      return maxDoc() - numDeletedDocs();
-    }
+        @Override
+        public int maxDoc() {
+          return maxDoc;
+        }
 
-    /** Returns the number of deleted documents. */
-    public abstract int numDeletedDocs();
+        @Override
+        public int numDeletedDocs() {
+          return numDeletedDocs;
+        }
 
-    /** Returns true if there are any deletions. */
-    public boolean hasDeletions() {
-      return numDeletedDocs() > 0;
+      };
     }
 
   }
@@ -122,13 +109,12 @@ public class MergeState {
     private final int maxDoc;
 
     private NoDelDocMap(int maxDoc) {
-      super(null);
       this.maxDoc = maxDoc;
     }
 
     @Override
-    public int remap(int docId) {
-      return docId;
+    public int get(int docID) {
+      return docID;
     }
 
     @Override
@@ -142,59 +128,6 @@ public class MergeState {
     }
   }
 
-  private static class DirectDocMap extends DocMap {
-
-    private final PackedInts.Mutable docIds;
-    private final int numDeletedDocs;
-
-    private DirectDocMap(Bits liveDocs, PackedInts.Mutable docIds, int numDeletedDocs) {
-      super(liveDocs);
-      this.docIds = docIds;
-      this.numDeletedDocs = numDeletedDocs;
-    }
-
-    @Override
-    public int remap(int docId) {
-      return (int) docIds.get(docId);
-    }
-
-    @Override
-    public int maxDoc() {
-      return docIds.size();
-    }
-
-    @Override
-    public int numDeletedDocs() {
-      return numDeletedDocs;
-    }
-  }
-
-  private static class DelCountDocMap extends DocMap {
-
-    private final PackedInts.Mutable numDeletesSoFar;
-
-    private DelCountDocMap(Bits liveDocs, PackedInts.Mutable numDeletesSoFar) {
-      super(liveDocs);
-      this.numDeletesSoFar = numDeletesSoFar;
-    }
-
-    @Override
-    public int remap(int docId) {
-      return docId - (int) numDeletesSoFar.get(docId);
-    }
-
-    @Override
-    public int maxDoc() {
-      return numDeletesSoFar.size();
-    }
-
-    @Override
-    public int numDeletedDocs() {
-      final int maxDoc = maxDoc();
-      return (int) numDeletesSoFar.get(maxDoc - 1);
-    }
-  }
-
   /** {@link SegmentInfo} of the newly merged segment. */
   public SegmentInfo segmentInfo;
 
@@ -213,13 +146,13 @@ public class MergeState {
   /** Holds the CheckAbort instance, which is invoked
    *  periodically to see if the merge has been aborted. */
   public CheckAbort checkAbort;
-  
+
   /** InfoStream for debugging messages. */
   public InfoStream infoStream;
 
   // TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
   // but is this really so expensive to compute again in different components, versus once in SM?
-  
+
   /** {@link SegmentReader}s that have identical field
    * name/number mapping, so their stored fields and term
    * vectors may be bulk merged. */
@@ -231,7 +164,7 @@ public class MergeState {
   /** Sole constructor. */
   MergeState() {
   }
-  
+
   /**
    * Class for recording units of work when merging segments.
    */
@@ -261,7 +194,7 @@ public class MergeState {
         workCount = 0;
       }
     }
-    
+
     /** If you use this: IW.close(false) cannot abort your merge!
      * @lucene.internal */
     static final MergeState.CheckAbort NONE = new MergeState.CheckAbort(null, null) {

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1448853&r1=1448852&r2=1448853&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java Thu Feb 21 23:34:34 2013
@@ -154,27 +154,33 @@ public class TestSegmentMerger extends L
   }
 
   public void testBuildDocMap() {
-    final int maxDoc = 128;
+    final int maxDoc = _TestUtil.nextInt(random(), 1, 128);
+    final int numDocs = _TestUtil.nextInt(random(), 0, maxDoc);
+    final int numDeletedDocs = maxDoc - numDocs;
     final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
-
-    MergeState.DocMap docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
-    MergeState.DocMap docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
-    assertTrue(equals(docMap1, docMap2));
-    
-    liveDocs.set(1);
-    for (int i = 7; i < 79; ++i) {
-      liveDocs.set(i);
+    for (int i = 0; i < numDocs; ++i) {
+      while (true) {
+        final int docID = random().nextInt(maxDoc);
+        if (!liveDocs.get(docID)) {
+          liveDocs.set(docID);
+          break;
+        }
+      }
     }
-    liveDocs.set(80);
-    liveDocs.set(88);
-    int numDocs = liveDocs.cardinality();
-    docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc - numDocs, liveDocs, PackedInts.COMPACT);
-    docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, numDocs, liveDocs, PackedInts.COMPACT);
-    assertTrue(equals(docMap1, docMap2));
 
-    liveDocs.set(0, maxDoc);
-    docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
-    docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
-    assertTrue(equals(docMap1, docMap2));
+    final MergeState.DocMap docMap = MergeState.DocMap.build(maxDoc, liveDocs);
+
+    assertEquals(maxDoc, docMap.maxDoc());
+    assertEquals(numDocs, docMap.numDocs());
+    assertEquals(numDeletedDocs, docMap.numDeletedDocs());
+    // assert the mapping is compact
+    for (int i = 0, del = 0; i < maxDoc; ++i) {
+      if (!liveDocs.get(i)) {
+        assertEquals(-1, docMap.get(i));
+        ++del;
+      } else {
+        assertEquals(i - del, docMap.get(i));
+      }
+    }
   }
 }