You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/07/09 23:21:57 UTC

svn commit: r1690176 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/BitSet.java lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java

Author: mikemccand
Date: Thu Jul  9 21:21:57 2015
New Revision: 1690176

URL: http://svn.apache.org/r1690176
Log:
LUCENE-6645: optimize DocIdSetBuilder a bit more

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java?rev=1690176&r1=1690175&r2=1690176&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java Thu Jul  9 21:21:57 2015
@@ -30,7 +30,7 @@ import org.apache.lucene.search.DocIdSet
 public abstract class BitSet implements MutableBits, Accountable {
 
   /** Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}.
-   *  NOTE: this will consume the {@link BitSet}. */
+   *  NOTE: this will fully consume the {@link DocIdSetIterator}. */
   public static BitSet of(DocIdSetIterator it, int maxDoc) throws IOException {
     final long cost = it.cost();
     final int threshold = maxDoc >>> 7;

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java?rev=1690176&r1=1690175&r2=1690176&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java Thu Jul  9 21:21:57 2015
@@ -23,7 +23,9 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.DocIdSetIterator;
 
 /**
- * A builder of {@link DocIdSet}s.
+ * A builder of {@link DocIdSet}s.  At first it uses a sparse structure to gather
+ * documents, and then upgrades to a non-sparse bit set once enough hits match.
+ *
  * @lucene.internal
  */
 public final class DocIdSetBuilder {
@@ -62,6 +64,17 @@ public final class DocIdSetBuilder {
     this.bufferSize = 0;
   }
 
+  /** Grows the buffer to at least minSize, but never larger than threshold. */
+  private void growBuffer(int minSize) {
+    assert minSize < threshold;
+    if (buffer.length < minSize) {
+      int nextSize = Math.min(threshold, ArrayUtil.oversize(minSize, RamUsageEstimator.NUM_BYTES_INT));
+      int[] newBuffer = new int[nextSize];
+      System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
+      buffer = newBuffer;
+    }
+  }
+
   /**
    * Add the content of the provided {@link DocIdSetIterator} to this builder.
    * NOTE: if you need to build a {@link DocIdSet} out of a single
@@ -74,7 +87,8 @@ public final class DocIdSetBuilder {
       bitSet.or(iter);
     } else {
       while (true) {  
-        final int end = Math.min(threshold, buffer.length);
+        assert buffer.length <= threshold;
+        final int end = buffer.length;
         for (int i = bufferSize; i < end; ++i) {
           final int doc = iter.nextDoc();
           if (doc == DocIdSetIterator.NO_MORE_DOCS) {
@@ -89,7 +103,7 @@ public final class DocIdSetBuilder {
           break;
         }
 
-        buffer = ArrayUtil.grow(buffer, bufferSize + 1);
+        growBuffer(bufferSize+1);
       }
 
       upgradeToBitSet();
@@ -105,8 +119,8 @@ public final class DocIdSetBuilder {
   public void grow(int numDocs) {
     if (bitSet == null) {
       final long newLength = bufferSize + numDocs;
-      if (newLength <= threshold) {
-        buffer = ArrayUtil.grow(buffer, (int) newLength);
+      if (newLength < threshold) {
+        growBuffer((int) newLength);
       } else {
         upgradeToBitSet();
       }
@@ -123,13 +137,13 @@ public final class DocIdSetBuilder {
     if (bitSet != null) {
       bitSet.set(doc);
     } else {
-      if (bufferSize + 1 >= threshold) {
-        upgradeToBitSet();
-        bitSet.set(doc);
-        return;
-      }
       if (bufferSize + 1 > buffer.length) {
-        buffer = ArrayUtil.grow(buffer, bufferSize + 1);
+        if (bufferSize + 1 >= threshold) {
+          upgradeToBitSet();
+          bitSet.set(doc);
+          return;
+        }
+        growBuffer(bufferSize+1);
       }
       buffer[bufferSize++] = doc;
     }
@@ -175,6 +189,7 @@ public final class DocIdSetBuilder {
         LSBRadixSorter sorter = new LSBRadixSorter();
         sorter.sort(buffer, 0, bufferSize);
         final int l = dedup(buffer, bufferSize);
+        assert l <= bufferSize;
         buffer = ArrayUtil.grow(buffer, l + 1);
         buffer[l] = DocIdSetIterator.NO_MORE_DOCS;
         return new IntArrayDocIdSet(buffer, l);