You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/07/09 23:21:57 UTC
svn commit: r1690176 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/util/BitSet.java
lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
Author: mikemccand
Date: Thu Jul 9 21:21:57 2015
New Revision: 1690176
URL: http://svn.apache.org/r1690176
Log:
LUCENE-6645: optimize DocIdSetBuilder a bit more
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java?rev=1690176&r1=1690175&r2=1690176&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java Thu Jul 9 21:21:57 2015
@@ -30,7 +30,7 @@ import org.apache.lucene.search.DocIdSet
public abstract class BitSet implements MutableBits, Accountable {
/** Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}.
- * NOTE: this will consume the {@link BitSet}. */
+ * NOTE: this will fully consume the {@link DocIdSetIterator}. */
public static BitSet of(DocIdSetIterator it, int maxDoc) throws IOException {
final long cost = it.cost();
final int threshold = maxDoc >>> 7;
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java?rev=1690176&r1=1690175&r2=1690176&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java Thu Jul 9 21:21:57 2015
@@ -23,7 +23,9 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.search.DocIdSetIterator;
/**
- * A builder of {@link DocIdSet}s.
+ * A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
+ * documents, and then upgrades to a non-sparse bit set once enough hits match.
+ *
* @lucene.internal
*/
public final class DocIdSetBuilder {
@@ -62,6 +64,17 @@ public final class DocIdSetBuilder {
this.bufferSize = 0;
}
+ /** Grows the buffer to at least minSize, but never larger than threshold. */
+ private void growBuffer(int minSize) {
+ assert minSize < threshold;
+ if (buffer.length < minSize) {
+ int nextSize = Math.min(threshold, ArrayUtil.oversize(minSize, RamUsageEstimator.NUM_BYTES_INT));
+ int[] newBuffer = new int[nextSize];
+ System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
+ buffer = newBuffer;
+ }
+ }
+
/**
* Add the content of the provided {@link DocIdSetIterator} to this builder.
* NOTE: if you need to build a {@link DocIdSet} out of a single
@@ -74,7 +87,8 @@ public final class DocIdSetBuilder {
bitSet.or(iter);
} else {
while (true) {
- final int end = Math.min(threshold, buffer.length);
+ assert buffer.length <= threshold;
+ final int end = buffer.length;
for (int i = bufferSize; i < end; ++i) {
final int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
@@ -89,7 +103,7 @@ public final class DocIdSetBuilder {
break;
}
- buffer = ArrayUtil.grow(buffer, bufferSize + 1);
+ growBuffer(bufferSize+1);
}
upgradeToBitSet();
@@ -105,8 +119,8 @@ public final class DocIdSetBuilder {
public void grow(int numDocs) {
if (bitSet == null) {
final long newLength = bufferSize + numDocs;
- if (newLength <= threshold) {
- buffer = ArrayUtil.grow(buffer, (int) newLength);
+ if (newLength < threshold) {
+ growBuffer((int) newLength);
} else {
upgradeToBitSet();
}
@@ -123,13 +137,13 @@ public final class DocIdSetBuilder {
if (bitSet != null) {
bitSet.set(doc);
} else {
- if (bufferSize + 1 >= threshold) {
- upgradeToBitSet();
- bitSet.set(doc);
- return;
- }
if (bufferSize + 1 > buffer.length) {
- buffer = ArrayUtil.grow(buffer, bufferSize + 1);
+ if (bufferSize + 1 >= threshold) {
+ upgradeToBitSet();
+ bitSet.set(doc);
+ return;
+ }
+ growBuffer(bufferSize+1);
}
buffer[bufferSize++] = doc;
}
@@ -175,6 +189,7 @@ public final class DocIdSetBuilder {
LSBRadixSorter sorter = new LSBRadixSorter();
sorter.sort(buffer, 0, bufferSize);
final int l = dedup(buffer, bufferSize);
+ assert l <= bufferSize;
buffer = ArrayUtil.grow(buffer, l + 1);
buffer[l] = DocIdSetIterator.NO_MORE_DOCS;
return new IntArrayDocIdSet(buffer, l);