You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/10/27 09:57:05 UTC
svn commit: r1634479 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/util/
lucene/core/src/test/org/apache/lucene/util/ lucene/test-framework/
lucene/test-framework/src/java/org/apache/lucene/util/
Author: jpountz
Date: Mon Oct 27 08:57:05 2014
New Revision: 1634479
URL: http://svn.apache.org/r1634479
Log:
LUCENE-6024: Speed-up BitSet.or/and/andNot.
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
lucene/dev/branches/branch_5x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java Mon Oct 27 08:57:05 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import java.io.IOException;
+import java.util.Collections;
import org.apache.lucene.search.DocIdSetIterator;
@@ -58,37 +59,90 @@ public abstract class BitSet implements
*/
public abstract int nextSetBit(int i);
- /** Does in-place OR of the bits provided by the
- * iterator. */
+ /** Assert that the current doc is -1. */
+ protected final void assertUnpositioned(DocIdSetIterator iter) {
+ if (iter.docID() != -1) {
+ throw new IllegalStateException("This operation only works with an unpositioned iterator, got current position = " + iter.docID());
+ }
+ }
+
+ /** Does in-place OR of the bits provided by the iterator. The state of the
+ * iterator after this operation terminates is undefined. */
public void or(DocIdSetIterator iter) throws IOException {
+ assertUnpositioned(iter);
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
set(doc);
}
}
- /** Does in-place AND of the bits provided by the
- * iterator. */
- public void and(DocIdSetIterator iter) throws IOException {
+ private static abstract class LeapFrogCallBack {
+ abstract void onMatch(int doc);
+ void finish() {}
+ }
+
+ /** Performs a leap frog between this and the provided iterator in order to find common documents. */
+ private void leapFrog(DocIdSetIterator iter, LeapFrogCallBack callback) throws IOException {
final int length = length();
- if (length == 0) {
- return;
- }
- int disiDoc, bitSetDoc = nextSetBit(0);
- while (bitSetDoc != DocIdSetIterator.NO_MORE_DOCS && (disiDoc = iter.advance(bitSetDoc)) < length) {
- clear(bitSetDoc, disiDoc);
- disiDoc++;
- bitSetDoc = (disiDoc < length) ? nextSetBit(disiDoc) : DocIdSetIterator.NO_MORE_DOCS;
- }
- if (bitSetDoc != DocIdSetIterator.NO_MORE_DOCS) {
- clear(bitSetDoc, length);
+ int bitSetDoc = -1;
+ int disiDoc = iter.nextDoc();
+ while (true) {
+ // invariant: bitSetDoc <= disiDoc
+ assert bitSetDoc <= disiDoc;
+ if (disiDoc >= length) {
+ callback.finish();
+ return;
+ }
+ if (bitSetDoc < disiDoc) {
+ bitSetDoc = nextSetBit(disiDoc);
+ }
+ if (bitSetDoc == disiDoc) {
+ callback.onMatch(bitSetDoc);
+ disiDoc = iter.nextDoc();
+ } else {
+ disiDoc = iter.advance(bitSetDoc);
+ }
}
}
- /** this = this AND NOT other */
+ /** Does in-place AND of the bits provided by the iterator. The state of the
+ * iterator after this operation terminates is undefined. */
+ public void and(DocIdSetIterator iter) throws IOException {
+ assertUnpositioned(iter);
+ leapFrog(iter, new LeapFrogCallBack() {
+ int previous = -1;
+
+ @Override
+ public void onMatch(int doc) {
+ clear(previous + 1, doc);
+ previous = doc;
+ }
+
+ @Override
+ public void finish() {
+ if (previous + 1 < length()) {
+ clear(previous + 1, length());
+ }
+ }
+
+ });
+ }
+
+ /** this = this AND NOT other. The state of the iterator after this operation
+ * terminates is undefined. */
public void andNot(DocIdSetIterator iter) throws IOException {
- for (int doc = iter.nextDoc(), len = length(); doc < len; doc = iter.nextDoc()) {
- clear(doc);
- }
+ assertUnpositioned(iter);
+ leapFrog(iter, new LeapFrogCallBack() {
+
+ @Override
+ public void onMatch(int doc) {
+ clear(doc);
+ }
+
+ });
}
+ @Override
+ public Iterable<? extends Accountable> getChildResources() {
+ return Collections.emptyList();
+ }
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java Mon Oct 27 08:57:05 2014
@@ -42,6 +42,11 @@ public class BitSetIterator extends DocI
return getBitSet(iterator, FixedBitSet.class);
}
+ /** If the provided iterator wraps a {@link SparseFixedBitSet}, returns it, otherwise returns null. */
+ public static SparseFixedBitSet getSparseFixedBitSetOrNull(DocIdSetIterator iterator) {
+ return getBitSet(iterator, SparseFixedBitSet.class);
+ }
+
private final BitSet bits;
private final int length;
private final long cost;
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java Mon Oct 27 08:57:05 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.util;
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collections;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
@@ -131,11 +130,6 @@ public final class FixedBitSet extends B
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
}
- @Override
- public Iterable<? extends Accountable> getChildResources() {
- return Collections.emptyList();
- }
-
/** Expert. */
public long[] getBits() {
return bits;
@@ -234,12 +228,10 @@ public final class FixedBitSet extends B
@Override
public void or(DocIdSetIterator iter) throws IOException {
- if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+ if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
+ assertUnpositioned(iter);
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
or(bits);
- // advance after last doc that would be accepted if standard
- // iteration is used (to exhaust it):
- iter.advance(numBits);
} else {
super.or(iter);
}
@@ -266,12 +258,10 @@ public final class FixedBitSet extends B
/** Does in-place XOR of the bits provided by the iterator. */
public void xor(DocIdSetIterator iter) throws IOException {
- if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+ assertUnpositioned(iter);
+ if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
xor(bits);
- // advance after last doc that would be accepted if standard
- // iteration is used (to exhaust it):
- iter.advance(numBits);
} else {
int doc;
while ((doc = iter.nextDoc()) < numBits) {
@@ -291,12 +281,10 @@ public final class FixedBitSet extends B
@Override
public void and(DocIdSetIterator iter) throws IOException {
- if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+ if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
+ assertUnpositioned(iter);
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
and(bits);
- // advance after last doc that would be accepted if standard
- // iteration is used (to exhaust it):
- iter.advance(numBits);
} else {
super.and(iter);
}
@@ -329,12 +317,10 @@ public final class FixedBitSet extends B
@Override
public void andNot(DocIdSetIterator iter) throws IOException {
- if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+ if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
+ assertUnpositioned(iter);
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
andNot(bits);
- // advance after last doc that would be accepted if standard
- // iteration is used (to exhaust it):
- iter.advance(numBits);
} else {
super.andNot(iter);
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java Mon Oct 27 08:57:05 2014
@@ -18,7 +18,7 @@ package org.apache.lucene.util;
*/
import java.io.IOException;
-import java.util.Collections;
+import java.util.Arrays;
import org.apache.lucene.search.DocIdSetIterator;
@@ -156,7 +156,7 @@ public class SparseFixedBitSet extends B
insertLong(i4096, i64, i, index);
}
}
-
+
private void insertBlock(int i4096, int i64, int i) {
indices[i4096] = 1L << i64; // shifts are mod 64 in java
assert bits[i4096] == null;
@@ -196,10 +196,10 @@ public class SparseFixedBitSet extends B
assert consistent(i);
final int i4096 = i >>> 12;
final int i64 = i >>> 6;
- clearWithinLong(i4096, i64, ~(1L << i));
+ and(i4096, i64, ~(1L << i));
}
- private void clearWithinLong(int i4096, int i64, long mask) {
+ private void and(int i4096, int i64, long mask) {
final long index = indices[i4096];
if ((index & (1L << i64)) != 0) {
// offset of the long bits we are interested in in the array
@@ -225,6 +225,7 @@ public class SparseFixedBitSet extends B
System.arraycopy(bitArray, o + 1, bitArray, o, length - o);
bitArray[length] = 0L;
}
+ nonZeroLongCount -= 1;
}
@Override
@@ -241,6 +242,7 @@ public class SparseFixedBitSet extends B
} else {
clearWithinBlock(firstBlock, from & MASK_4096, MASK_4096);
for (int i = firstBlock + 1; i < lastBlock; ++i) {
+ nonZeroLongCount -= Long.bitCount(indices[i]);
indices[i] = 0;
bits[i] = null;
}
@@ -258,14 +260,14 @@ public class SparseFixedBitSet extends B
int lastLong = to >>> 6;
if (firstLong == lastLong) {
- clearWithinLong(i4096, firstLong, ~mask(from, to));
+ and(i4096, firstLong, ~mask(from, to));
} else {
assert firstLong < lastLong;
- clearWithinLong(i4096, lastLong, ~mask(0, to));
+ and(i4096, lastLong, ~mask(0, to));
for (int i = lastLong - 1; i >= firstLong + 1; --i) {
- clearWithinLong(i4096, i, 0L);
+ and(i4096, i, 0L);
}
- clearWithinLong(i4096, firstLong, ~mask(from, 63));
+ and(i4096, firstLong, ~mask(from, 63));
}
}
@@ -343,20 +345,163 @@ public class SparseFixedBitSet extends B
}
}
+ /** Return the long bits at the given <code>i64</code> index. */
+ private long longBits(long index, long[] bits, int i64) {
+ if ((index & (1L << i64)) == 0) {
+ return 0L;
+ } else {
+ return bits[Long.bitCount(index & ((1L << i64) - 1))];
+ }
+ }
+
+ private void or(final int i4096, final long index, long[] bits, int nonZeroLongCount) {
+ assert Long.bitCount(index) == nonZeroLongCount;
+ final long currentIndex = indices[i4096];
+ if (currentIndex == 0) {
+ // fast path: if we currently have nothing in the block, just copy the data
+ // this especially happens all the time if you call OR on an empty set
+ indices[i4096] = index;
+ this.bits[i4096] = Arrays.copyOf(bits, nonZeroLongCount);
+ this.nonZeroLongCount += nonZeroLongCount;
+ return;
+ }
+ final long[] currentBits = this.bits[i4096];
+ final long[] newBits;
+ final long newIndex = currentIndex | index;
+ final int requiredCapacity = Long.bitCount(newIndex);
+ if (currentBits.length >= requiredCapacity) {
+ newBits = currentBits;
+ } else {
+ newBits = new long[oversize(requiredCapacity)];
+ }
+ // we iterate backwards in order to not override data we might need on the next iteration if the
+ // array is reused
+ for (int i = Long.numberOfLeadingZeros(newIndex), newO = Long.bitCount(newIndex) - 1;
+ i < 64;
+ i += 1 + Long.numberOfLeadingZeros(newIndex << (i + 1)), newO -= 1) {
+ // bitIndex is the index of a bit which is set in newIndex and newO is the number of 1 bits on its right
+ final int bitIndex = 63 - i;
+ assert newO == Long.bitCount(newIndex & ((1L << bitIndex) - 1));
+ newBits[newO] = longBits(currentIndex, currentBits, bitIndex) | longBits(index, bits, bitIndex);
+ }
+ indices[i4096] = newIndex;
+ this.bits[i4096] = newBits;
+ this.nonZeroLongCount += nonZeroLongCount - Long.bitCount(currentIndex & index);
+ }
+
+ private void or(SparseFixedBitSet other) {
+ for (int i = 0; i < other.indices.length; ++i) {
+ final long index = other.indices[i];
+ if (index != 0) {
+ or(i, index, other.bits[i], Long.bitCount(index));
+ }
+ }
+ }
+
+ /**
+ * {@link #or(DocIdSetIterator)} impl that works best when <code>it</code> is dense
+ */
+ private void orDense(DocIdSetIterator it) throws IOException {
+ assertUnpositioned(it);
+ // The goal here is to try to take advantage of the ordering of documents
+ // to build the data-structure more efficiently
+ // NOTE: this heavily relies on the fact that shifts are mod 64
+ final int firstDoc = it.nextDoc();
+ if (firstDoc == DocIdSetIterator.NO_MORE_DOCS) {
+ return;
+ }
+ int i4096 = firstDoc >>> 12;
+ int i64 = firstDoc >>> 6;
+ long index = 1L << i64;
+ long currentLong = 1L << firstDoc;
+ // we store at most 64 longs per block so preallocate in order never to have to resize
+ long[] longs = new long[64];
+ int numLongs = 0;
+
+ for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+ final int doc64 = doc >>> 6;
+ if (doc64 == i64) {
+ // still in the same long, just set the bit
+ currentLong |= 1L << doc;
+ } else {
+ longs[numLongs++] = currentLong;
+
+ final int doc4096 = doc >>> 12;
+ if (doc4096 == i4096) {
+ index |= 1L << doc64;
+ } else {
+ // we are on a new block, flush what we buffered
+ or(i4096, index, longs, numLongs);
+ // and reset state for the new block
+ i4096 = doc4096;
+ index = 1L << doc64;
+ numLongs = 0;
+ }
+
+ // we are on a new long, reset state
+ i64 = doc64;
+ currentLong = 1L << doc;
+ }
+ }
+
+ // flush
+ longs[numLongs++] = currentLong;
+ or(i4096, index, longs, numLongs);
+ }
+
@Override
public void or(DocIdSetIterator it) throws IOException {
- for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
- set(doc);
+ {
+ // specialize union with another SparseFixedBitSet
+ final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
+ if (other != null) {
+ assertUnpositioned(it);
+ or(other);
+ return;
+ }
+ }
+
+ // We do not specialize the union with a FixedBitSet since FixedBitSets are
+ // supposed to be used for dense data and sparse fixed bit sets for sparse
+ // data, so a sparse set would likely get upgraded by DocIdSetBuilder before
+ // being or'ed with a FixedBitSet
+
+ if (it.cost() < indices.length) {
+ // the default impl is good for sparse iterators
+ super.or(it);
+ } else {
+ orDense(it);
}
}
+ // AND and AND_NOT do not need much specialization here since this sparse set
+ // is supposed to be used on sparse data and the default AND/AND_NOT impl
+ // (leap frog) is efficient when at least one of the sets contains sparse data
+
@Override
- public long ramBytesUsed() {
- return ramBytesUsed;
+ public void and(DocIdSetIterator it) throws IOException {
+ final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
+ if (other != null) {
+ // if we are merging with another SparseFixedBitSet, a quick win is
+ // to clear up some blocks by only looking at their index. Then the set
+ // is sparser and the leap-frog approach of the parent class is more
+ // efficient. Since SparseFixedBitSet is supposed to be used for sparse
+ // sets, the intersection of two SparseFixedBitSet is likely very sparse
+ final int numCommonBlocks = Math.min(indices.length, other.indices.length);
+ for (int i = 0; i < numCommonBlocks; ++i) {
+ if ((indices[i] & other.indices[i]) == 0) {
+ this.nonZeroLongCount -= Long.bitCount(this.indices[i]);
+ this.indices[i] = 0;
+ this.bits[i] = null;
+ }
+ }
+ }
+ super.and(it);
}
@Override
- public Iterable<? extends Accountable> getChildResources() {
- return Collections.emptyList();
+ public long ramBytesUsed() {
+ return ramBytesUsed;
}
+
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java Mon Oct 27 08:57:05 2014
@@ -32,6 +32,23 @@ public class TestSparseFixedBitSet exten
return set;
}
+ @Override
+ protected void assertEquals(BitSet set1, SparseFixedBitSet set2, int maxDoc) {
+ super.assertEquals(set1, set2, maxDoc);
+ // check invariants of the sparse set
+ int nonZeroLongCount = 0;
+ for (int i = 0; i < set2.indices.length; ++i) {
+ final int n = Long.bitCount(set2.indices[i]);
+ if (n != 0) {
+ nonZeroLongCount += n;
+ for (int j = n; j < set2.bits[i].length; ++j) {
+ assertEquals(0, set2.bits[i][j]);
+ }
+ }
+ }
+ assertEquals(nonZeroLongCount, set2.nonZeroLongCount);
+ }
+
public void testApproximateCardinality() {
final SparseFixedBitSet set = new SparseFixedBitSet(10000);
final int first = random().nextInt(1000);
Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java Mon Oct 27 08:57:05 2014
@@ -58,7 +58,7 @@ public abstract class BaseBitSetTestCase
return randomSet(numBits, (int) (percentSet * numBits));
}
- private void assertEquals(BitSet set1, BitSet set2, int maxDoc) {
+ protected void assertEquals(BitSet set1, T set2, int maxDoc) {
for (int i = 0; i < maxDoc; ++i) {
assertEquals("Different at " + i, set1.get(i), set2.get(i));
}
@@ -77,11 +77,15 @@ public abstract class BaseBitSetTestCase
/** Test the {@link BitSet#set} method. */
public void testSet() throws IOException {
final int numBits = 1 + random().nextInt(100000);
- for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
- BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
- T set2 = copyOf(set1, numBits);
- assertEquals(set1, set2, numBits);
+ BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
+ T set2 = copyOf(set1, numBits);
+ final int iters = 10000 + random().nextInt(10000);
+ for (int i = 0; i < iters; ++i) {
+ final int index = random().nextInt(numBits);
+ set1.set(index);
+ set2.set(index);
}
+ assertEquals(set1, set2, numBits);
}
/** Test the {@link BitSet#clear(int)} method. */
@@ -118,16 +122,28 @@ public abstract class BaseBitSetTestCase
}
private DocIdSet randomCopy(BitSet set, int numBits) throws IOException {
- if (random().nextBoolean()) {
- return new BitDocIdSet(copyOf(set, numBits), set.cardinality());
- } else if (random().nextBoolean()) {
- final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(numBits);
- for (int i = set.nextSetBit(0); i != DocIdSetIterator.NO_MORE_DOCS; i = i + 1 >= numBits ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(i + 1)) {
- builder.add(i);
- }
- return builder.build();
- } else {
- return new BitDocIdSet(set, set.cardinality());
+ switch (random().nextInt(5)) {
+ case 0:
+ return new BitDocIdSet(set, set.cardinality());
+ case 1:
+ return new BitDocIdSet(copyOf(set, numBits), set.cardinality());
+ case 2:
+ final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(numBits);
+ for (int i = set.nextSetBit(0); i != DocIdSetIterator.NO_MORE_DOCS; i = i + 1 >= numBits ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(i + 1)) {
+ builder.add(i);
+ }
+ return builder.build();
+ case 3:
+ FixedBitSet fbs = new FixedBitSet(numBits);
+ fbs.or(new BitSetIterator(set, 0));
+ return new BitDocIdSet(fbs);
+ case 4:
+ SparseFixedBitSet sfbs = new SparseFixedBitSet(numBits);
+ sfbs.or(new BitSetIterator(set, 0));
+ return new BitDocIdSet(sfbs);
+ default:
+ fail();
+ return null;
}
}
@@ -136,10 +152,10 @@ public abstract class BaseBitSetTestCase
final int numBits = 1 + random().nextInt(100000);
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
T set2 = copyOf(set1, numBits);
- final int iters = 10 + random().nextInt(100);
+ final int iters = 50 + random().nextInt(50);
for (int i = 0; i < iters; ++i) {
// make extreme percents more likely
- float percentSet2 = (float) Math.pow(random().nextDouble(), 2);
+ float percentSet2 = rarely() ? 0 : (float) Math.pow(random().nextDouble(), 2);
if (random().nextBoolean()) {
percentSet2 = 1 - percentSet2;
}
@@ -150,18 +166,20 @@ public abstract class BaseBitSetTestCase
if (bulkSetCopy.iterator() == null) {
continue;
}
+ DocIdSetIterator it1 = bulkSetCopy.iterator();
+ DocIdSetIterator it2 = bulkSetCopy.iterator();
switch (random().nextInt(3)) {
case 0:
- set1.or(bulkSetCopy.iterator());
- set2.or(bulkSetCopy.iterator());
+ set1.or(it1);
+ set2.or(it2);
break;
case 1:
- set1.and(bulkSetCopy.iterator());
- set2.and(bulkSetCopy.iterator());
+ set1.and(it1);
+ set2.and(it2);
break;
default:
- set1.andNot(bulkSetCopy.iterator());
- set2.andNot(bulkSetCopy.iterator());
+ set1.andNot(it1);
+ set2.andNot(it2);
break;
}
assertEquals(set1, set2, numBits);