You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/10/27 09:57:05 UTC

svn commit: r1634479 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/ lucene/core/src/test/org/apache/lucene/util/ lucene/test-framework/ lucene/test-framework/src/java/org/apache/lucene/util/

Author: jpountz
Date: Mon Oct 27 08:57:05 2014
New Revision: 1634479

URL: http://svn.apache.org/r1634479
Log:
LUCENE-6024: Speed-up BitSet.or/and/andNot.

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
    lucene/dev/branches/branch_5x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java Mon Oct 27 08:57:05 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
  */
 
 import java.io.IOException;
+import java.util.Collections;
 
 import org.apache.lucene.search.DocIdSetIterator;
 
@@ -58,37 +59,90 @@ public abstract class BitSet implements 
    */
   public abstract int nextSetBit(int i);
 
-  /** Does in-place OR of the bits provided by the
-   *  iterator. */
+  /** Assert that the current doc is -1. */
+  protected final void assertUnpositioned(DocIdSetIterator iter) {
+    if (iter.docID() != -1) {
+      throw new IllegalStateException("This operation only works with an unpositioned iterator, got current position = " + iter.docID());
+    }
+  }
+
+  /** Does in-place OR of the bits provided by the iterator. The state of the
+   *  iterator after this operation terminates is undefined. */
   public void or(DocIdSetIterator iter) throws IOException {
+    assertUnpositioned(iter);
     for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
       set(doc);
     }
   }
 
-  /** Does in-place AND of the bits provided by the
-   *  iterator. */
-  public void and(DocIdSetIterator iter) throws IOException {
+  private static abstract class LeapFrogCallBack {
+    abstract void onMatch(int doc);
+    void finish() {}
+  }
+
+  /** Performs a leap frog between this and the provided iterator in order to find common documents. */
+  private void leapFrog(DocIdSetIterator iter, LeapFrogCallBack callback) throws IOException {
     final int length = length();
-    if (length == 0) {
-      return;
-    }
-    int disiDoc, bitSetDoc = nextSetBit(0);
-    while (bitSetDoc != DocIdSetIterator.NO_MORE_DOCS && (disiDoc = iter.advance(bitSetDoc)) < length) {
-      clear(bitSetDoc, disiDoc);
-      disiDoc++;
-      bitSetDoc = (disiDoc < length) ? nextSetBit(disiDoc) : DocIdSetIterator.NO_MORE_DOCS;
-    }
-    if (bitSetDoc != DocIdSetIterator.NO_MORE_DOCS) {
-      clear(bitSetDoc, length);
+    int bitSetDoc = -1;
+    int disiDoc = iter.nextDoc();
+    while (true) {
+      // invariant: bitSetDoc <= disiDoc
+      assert bitSetDoc <= disiDoc;
+      if (disiDoc >= length) {
+        callback.finish();
+        return;
+      }
+      if (bitSetDoc < disiDoc) {
+        bitSetDoc = nextSetBit(disiDoc);
+      }
+      if (bitSetDoc == disiDoc) {
+        callback.onMatch(bitSetDoc);
+        disiDoc = iter.nextDoc();
+      } else {
+        disiDoc = iter.advance(bitSetDoc);
+      }
     }
   }
 
-  /** this = this AND NOT other */
+  /** Does in-place AND of the bits provided by the iterator. The state of the
+   *  iterator after this operation terminates is undefined. */
+  public void and(DocIdSetIterator iter) throws IOException {
+    assertUnpositioned(iter);
+    leapFrog(iter, new LeapFrogCallBack() {
+      int previous = -1;
+
+      @Override
+      public void onMatch(int doc) {
+        clear(previous + 1, doc);
+        previous = doc;
+      }
+
+      @Override
+      public void finish() {
+        if (previous + 1 < length()) {
+          clear(previous + 1, length());
+        }
+      }
+
+    });
+  }
+
+  /** this = this AND NOT other. The state of the iterator after this operation
+   *  terminates is undefined. */
   public void andNot(DocIdSetIterator iter) throws IOException {
-    for (int doc = iter.nextDoc(), len = length(); doc < len; doc = iter.nextDoc()) {
-      clear(doc);
-    }
+    assertUnpositioned(iter);
+    leapFrog(iter, new LeapFrogCallBack() {
+
+      @Override
+      public void onMatch(int doc) {
+        clear(doc);
+      }
+
+    });
   }
 
+  @Override
+  public Iterable<? extends Accountable> getChildResources() {
+    return Collections.emptyList();
+  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java Mon Oct 27 08:57:05 2014
@@ -42,6 +42,11 @@ public class BitSetIterator extends DocI
     return getBitSet(iterator, FixedBitSet.class);
   }
 
+  /** If the provided iterator wraps a {@link SparseFixedBitSet}, returns it, otherwise returns null. */
+  public static SparseFixedBitSet getSparseFixedBitSetOrNull(DocIdSetIterator iterator) {
+    return getBitSet(iterator, SparseFixedBitSet.class);
+  }
+
   private final BitSet bits;
   private final int length;
   private final long cost;

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java Mon Oct 27 08:57:05 2014
@@ -19,7 +19,6 @@ package org.apache.lucene.util;
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Collections;
 
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -131,11 +130,6 @@ public final class FixedBitSet extends B
     return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
   }
 
-  @Override
-  public Iterable<? extends Accountable> getChildResources() {
-    return Collections.emptyList();
-  }
-
   /** Expert. */
   public long[] getBits() {
     return bits;
@@ -234,12 +228,10 @@ public final class FixedBitSet extends B
 
   @Override
   public void or(DocIdSetIterator iter) throws IOException {
-    if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+    if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
+      assertUnpositioned(iter);
       final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter); 
       or(bits);
-      // advance after last doc that would be accepted if standard
-      // iteration is used (to exhaust it):
-      iter.advance(numBits);
     } else {
       super.or(iter);
     }
@@ -266,12 +258,10 @@ public final class FixedBitSet extends B
   
   /** Does in-place XOR of the bits provided by the iterator. */
   public void xor(DocIdSetIterator iter) throws IOException {
-    if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+    assertUnpositioned(iter);
+    if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
       final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter); 
       xor(bits);
-      // advance after last doc that would be accepted if standard
-      // iteration is used (to exhaust it):
-      iter.advance(numBits);
     } else {
       int doc;
       while ((doc = iter.nextDoc()) < numBits) {
@@ -291,12 +281,10 @@ public final class FixedBitSet extends B
 
   @Override
   public void and(DocIdSetIterator iter) throws IOException {
-    if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+    if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
+      assertUnpositioned(iter);
       final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter); 
       and(bits);
-      // advance after last doc that would be accepted if standard
-      // iteration is used (to exhaust it):
-      iter.advance(numBits);
     } else {
       super.and(iter);
     }
@@ -329,12 +317,10 @@ public final class FixedBitSet extends B
 
   @Override
   public void andNot(DocIdSetIterator iter) throws IOException {
-    if (BitSetIterator.getFixedBitSetOrNull(iter) != null && iter.docID() == -1) {
+    if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
+      assertUnpositioned(iter);
       final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter); 
       andNot(bits);
-      // advance after last doc that would be accepted if standard
-      // iteration is used (to exhaust it):
-      iter.advance(numBits);
     } else {
       super.andNot(iter);
     }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java Mon Oct 27 08:57:05 2014
@@ -18,7 +18,7 @@ package org.apache.lucene.util;
  */
 
 import java.io.IOException;
-import java.util.Collections;
+import java.util.Arrays;
 
 import org.apache.lucene.search.DocIdSetIterator;
 
@@ -156,7 +156,7 @@ public class SparseFixedBitSet extends B
       insertLong(i4096, i64, i, index);
     }
   }
-  
+
   private void insertBlock(int i4096, int i64, int i) {
     indices[i4096] = 1L << i64; // shifts are mod 64 in java
     assert bits[i4096] == null;
@@ -196,10 +196,10 @@ public class SparseFixedBitSet extends B
     assert consistent(i);
     final int i4096 = i >>> 12;
     final int i64 = i >>> 6;
-    clearWithinLong(i4096, i64, ~(1L << i));
+    and(i4096, i64, ~(1L << i));
   }
 
-  private void clearWithinLong(int i4096, int i64, long mask) {
+  private void and(int i4096, int i64, long mask) {
     final long index = indices[i4096];
     if ((index & (1L << i64)) != 0) {
       // offset of the long bits we are interested in in the array
@@ -225,6 +225,7 @@ public class SparseFixedBitSet extends B
       System.arraycopy(bitArray, o + 1, bitArray, o, length - o);
       bitArray[length] = 0L;
     }
+    nonZeroLongCount -= 1;
   }
 
   @Override
@@ -241,6 +242,7 @@ public class SparseFixedBitSet extends B
     } else {
       clearWithinBlock(firstBlock, from & MASK_4096, MASK_4096);
       for (int i = firstBlock + 1; i < lastBlock; ++i) {
+        nonZeroLongCount -= Long.bitCount(indices[i]);
         indices[i] = 0;
         bits[i] = null;
       }
@@ -258,14 +260,14 @@ public class SparseFixedBitSet extends B
     int lastLong = to >>> 6;
 
     if (firstLong == lastLong) {
-      clearWithinLong(i4096, firstLong, ~mask(from, to));
+      and(i4096, firstLong, ~mask(from, to));
     } else {
       assert firstLong < lastLong;
-      clearWithinLong(i4096, lastLong, ~mask(0, to));
+      and(i4096, lastLong, ~mask(0, to));
       for (int i = lastLong - 1; i >= firstLong + 1; --i) {
-        clearWithinLong(i4096, i, 0L);
+        and(i4096, i, 0L);
       }
-      clearWithinLong(i4096, firstLong, ~mask(from, 63));
+      and(i4096, firstLong, ~mask(from, 63));
     }
   }
 
@@ -343,20 +345,163 @@ public class SparseFixedBitSet extends B
     }
   }
 
+  /** Return the long bits at the given <code>i64</code> index. */
+  private long longBits(long index, long[] bits, int i64) {
+    if ((index & (1L << i64)) == 0) {
+      return 0L;
+    } else {
+      return bits[Long.bitCount(index & ((1L << i64) - 1))];
+    }
+  }
+
+  private void or(final int i4096, final long index, long[] bits, int nonZeroLongCount) {
+    assert Long.bitCount(index) == nonZeroLongCount;
+    final long currentIndex = indices[i4096];
+    if (currentIndex == 0) {
+      // fast path: if we currently have nothing in the block, just copy the data
+      // this especially happens all the time if you call OR on an empty set
+      indices[i4096] = index;
+      this.bits[i4096] = Arrays.copyOf(bits, nonZeroLongCount);
+      this.nonZeroLongCount += nonZeroLongCount;
+      return;
+    }
+    final long[] currentBits = this.bits[i4096];
+    final long[] newBits;
+    final long newIndex = currentIndex | index;
+    final int requiredCapacity = Long.bitCount(newIndex);
+    if (currentBits.length >= requiredCapacity) {
+      newBits = currentBits;
+    } else {
+      newBits = new long[oversize(requiredCapacity)];
+    }
+    // we iterate backwards in order to not override data we might need on the next iteration if the
+    // array is reused
+    for (int i = Long.numberOfLeadingZeros(newIndex), newO = Long.bitCount(newIndex) - 1;
+        i < 64;
+        i += 1 + Long.numberOfLeadingZeros(newIndex << (i + 1)), newO -= 1) {
+      // bitIndex is the index of a bit which is set in newIndex and newO is the number of 1 bits on its right
+      final int bitIndex = 63 - i;
+      assert newO == Long.bitCount(newIndex & ((1L << bitIndex) - 1));
+      newBits[newO] = longBits(currentIndex, currentBits, bitIndex) | longBits(index, bits, bitIndex);
+    }
+    indices[i4096] = newIndex;
+    this.bits[i4096] = newBits;
+    this.nonZeroLongCount += nonZeroLongCount - Long.bitCount(currentIndex & index);
+  }
+
+  private void or(SparseFixedBitSet other) {
+    for (int i = 0; i < other.indices.length; ++i) {
+      final long index = other.indices[i];
+      if (index != 0) {
+        or(i, index, other.bits[i], Long.bitCount(index));
+      }
+    }
+  }
+
+  /**
+   * {@link #or(DocIdSetIterator)} impl that works best when <code>it</code> is dense
+   */
+  private void orDense(DocIdSetIterator it) throws IOException {
+    assertUnpositioned(it);
+    // The goal here is to try to take advantage of the ordering of documents
+    // to build the data-structure more efficiently
+    // NOTE: this heavily relies on the fact that shifts are mod 64
+    final int firstDoc = it.nextDoc();
+    if (firstDoc == DocIdSetIterator.NO_MORE_DOCS) {
+      return;
+    }
+    int i4096 = firstDoc >>> 12;
+    int i64 = firstDoc >>> 6;
+    long index = 1L << i64;
+    long currentLong = 1L << firstDoc;
+    // we store at most 64 longs per block so preallocate in order never to have to resize
+    long[] longs = new long[64];
+    int numLongs = 0;
+
+    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+      final int doc64 = doc >>> 6;
+      if (doc64 == i64) {
+        // still in the same long, just set the bit
+        currentLong |= 1L << doc;
+      } else {
+        longs[numLongs++] = currentLong;
+
+        final int doc4096 = doc >>> 12;
+        if (doc4096 == i4096) {
+          index |= 1L << doc64;
+        } else {
+          // we are on a new block, flush what we buffered
+          or(i4096, index, longs, numLongs);
+          // and reset state for the new block
+          i4096 = doc4096;
+          index = 1L << doc64;
+          numLongs = 0;
+        }
+
+        // we are on a new long, reset state
+        i64 = doc64;
+        currentLong = 1L << doc;
+      }
+    }
+
+    // flush
+    longs[numLongs++] = currentLong;
+    or(i4096, index, longs, numLongs);
+  }
+
   @Override
   public void or(DocIdSetIterator it) throws IOException {
-    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
-      set(doc);
+    {
+      // specialize union with another SparseFixedBitSet
+      final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
+      if (other != null) {
+        assertUnpositioned(it);
+        or(other);
+        return;
+      }
+    }
+
+    // We do not specialize the union with a FixedBitSet since FixedBitSets are
+    // supposed to be used for dense data and sparse fixed bit sets for sparse
+    // data, so a sparse set would likely get upgraded by DocIdSetBuilder before
+    // being or'ed with a FixedBitSet
+
+    if (it.cost() < indices.length) {
+      // the default impl is good for sparse iterators
+      super.or(it);
+    } else {
+      orDense(it);
     }
   }
 
+  // AND and AND_NOT do not need much specialization here since this sparse set
+  // is supposed to be used on sparse data and the default AND/AND_NOT impl
+  // (leap frog) is efficient when at least one of the sets contains sparse data
+
   @Override
-  public long ramBytesUsed() {
-    return ramBytesUsed;
+  public void and(DocIdSetIterator it) throws IOException {
+    final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
+    if (other != null) {
+      // if we are merging with another SparseFixedBitSet, a quick win is
+      // to clear up some blocks by only looking at their index. Then the set
+      // is sparser and the leap-frog approach of the parent class is more
+      // efficient. Since SparseFixedBitSet is supposed to be used for sparse
+      // sets, the intersection of two SparseFixedBitSet is likely very sparse
+      final int numCommonBlocks = Math.min(indices.length, other.indices.length);
+      for (int i = 0; i < numCommonBlocks; ++i) {
+        if ((indices[i] & other.indices[i]) == 0) {
+          this.nonZeroLongCount -= Long.bitCount(this.indices[i]);
+          this.indices[i] = 0;
+          this.bits[i] = null;
+        }
+      }
+    }
+    super.and(it);
   }
 
   @Override
-  public Iterable<? extends Accountable> getChildResources() {
-    return Collections.emptyList();
+  public long ramBytesUsed() {
+    return ramBytesUsed;
   }
+
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java Mon Oct 27 08:57:05 2014
@@ -32,6 +32,23 @@ public class TestSparseFixedBitSet exten
     return set;
   }
 
+  @Override
+  protected void assertEquals(BitSet set1, SparseFixedBitSet set2, int maxDoc) {
+    super.assertEquals(set1, set2, maxDoc);
+    // check invariants of the sparse set
+    int nonZeroLongCount = 0;
+    for (int i = 0; i < set2.indices.length; ++i) {
+      final int n = Long.bitCount(set2.indices[i]);
+      if (n != 0) {
+        nonZeroLongCount += n;
+        for (int j = n; j < set2.bits[i].length; ++j) {
+          assertEquals(0, set2.bits[i][j]);
+        }
+      }
+    }
+    assertEquals(nonZeroLongCount, set2.nonZeroLongCount);
+  }
+
   public void testApproximateCardinality() {
     final SparseFixedBitSet set = new SparseFixedBitSet(10000);
     final int first = random().nextInt(1000);

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java?rev=1634479&r1=1634478&r2=1634479&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java Mon Oct 27 08:57:05 2014
@@ -58,7 +58,7 @@ public abstract class BaseBitSetTestCase
     return randomSet(numBits, (int) (percentSet * numBits));
   }
 
-  private void assertEquals(BitSet set1, BitSet set2, int maxDoc) {
+  protected void assertEquals(BitSet set1, T set2, int maxDoc) {
     for (int i = 0; i < maxDoc; ++i) {
       assertEquals("Different at " + i, set1.get(i), set2.get(i));
     }
@@ -77,11 +77,15 @@ public abstract class BaseBitSetTestCase
   /** Test the {@link BitSet#set} method. */
   public void testSet() throws IOException {
     final int numBits = 1 + random().nextInt(100000);
-    for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
-      BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
-      T set2 = copyOf(set1, numBits);
-      assertEquals(set1, set2, numBits);
+    BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
+    T set2 = copyOf(set1, numBits);
+    final int iters = 10000 + random().nextInt(10000);
+    for (int i = 0; i < iters; ++i) {
+      final int index = random().nextInt(numBits);
+      set1.set(index);
+      set2.set(index);
     }
+    assertEquals(set1, set2, numBits);
   }
 
   /** Test the {@link BitSet#clear(int)} method. */
@@ -118,16 +122,28 @@ public abstract class BaseBitSetTestCase
   }
 
   private DocIdSet randomCopy(BitSet set, int numBits) throws IOException {
-    if (random().nextBoolean()) {
-      return new BitDocIdSet(copyOf(set, numBits), set.cardinality());
-    } else if (random().nextBoolean()) {
-      final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(numBits);
-      for (int i = set.nextSetBit(0); i != DocIdSetIterator.NO_MORE_DOCS; i = i + 1 >= numBits ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(i + 1)) {
-        builder.add(i);
-      }
-      return builder.build();
-    } else {
-      return new BitDocIdSet(set, set.cardinality());
+    switch (random().nextInt(5)) {
+      case 0:
+        return new BitDocIdSet(set, set.cardinality());
+      case 1:
+        return new BitDocIdSet(copyOf(set, numBits), set.cardinality());
+      case 2:
+        final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(numBits);
+        for (int i = set.nextSetBit(0); i != DocIdSetIterator.NO_MORE_DOCS; i = i + 1 >= numBits ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(i + 1)) {
+          builder.add(i);
+        }
+        return builder.build();
+      case 3:
+        FixedBitSet fbs = new FixedBitSet(numBits);
+        fbs.or(new BitSetIterator(set, 0));
+        return new BitDocIdSet(fbs);
+      case 4:
+        SparseFixedBitSet sfbs = new SparseFixedBitSet(numBits);
+        sfbs.or(new BitSetIterator(set, 0));
+        return new BitDocIdSet(sfbs);
+      default:
+        fail();
+        return null;
     }
   }
 
@@ -136,10 +152,10 @@ public abstract class BaseBitSetTestCase
     final int numBits = 1 + random().nextInt(100000);
     BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
     T set2 = copyOf(set1, numBits);
-    final int iters = 10 + random().nextInt(100);
+    final int iters = 50 + random().nextInt(50);
     for (int i = 0; i < iters; ++i) {
       // make extreme percents more likely
-      float percentSet2 = (float) Math.pow(random().nextDouble(), 2);
+      float percentSet2 = rarely() ? 0 : (float) Math.pow(random().nextDouble(), 2);
       if (random().nextBoolean()) {
         percentSet2 = 1 - percentSet2;
       }
@@ -150,18 +166,20 @@ public abstract class BaseBitSetTestCase
       if (bulkSetCopy.iterator() == null) {
         continue;
       }
+      DocIdSetIterator it1 = bulkSetCopy.iterator();
+      DocIdSetIterator it2 = bulkSetCopy.iterator();
       switch (random().nextInt(3)) {
         case 0:
-          set1.or(bulkSetCopy.iterator());
-          set2.or(bulkSetCopy.iterator());
+          set1.or(it1);
+          set2.or(it2);
           break;
         case 1:
-          set1.and(bulkSetCopy.iterator());
-          set2.and(bulkSetCopy.iterator());
+          set1.and(it1);
+          set2.and(it2);
           break;
         default:
-          set1.andNot(bulkSetCopy.iterator());
-          set2.andNot(bulkSetCopy.iterator());
+          set1.andNot(it1);
+          set2.andNot(it2);
           break;
       }
       assertEquals(set1, set2, numBits);