You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/07/09 11:48:35 UTC

svn commit: r1690041 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/java/org/apache/lucene/util/ lucene/core/src/test/org/apache/lucene/util/ lucene/join/ lucene/join/src/java...

Author: jpountz
Date: Thu Jul  9 09:48:34 2015
New Revision: 1690041

URL: http://svn.apache.org/r1690041
Log:
LUCENE-6645: Optimized DocIdSet building for the "many small postings lists" case.

Added:
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
      - copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java
      - copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
      - copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestIntArrayDocIdSet.java
      - copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIntArrayDocIdSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
      - copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
    lucene/dev/branches/branch_5x/lucene/join/   (props changed)
    lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java
    lucene/dev/branches/branch_5x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
    lucene/dev/branches/branch_5x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
    lucene/dev/branches/branch_5x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
    lucene/dev/branches/branch_5x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu Jul  9 09:48:34 2015
@@ -280,6 +280,10 @@ Optimizations
   where an expired segments_N references non-existing files (Robert
   Muir, Mike McCandless)
 
+* LUCENE-6645: Optimized the way we merge postings lists in multi-term queries
+  and TermsQuery. This should especially help when there are lots of small
+  postings lists. (Adrien Grand, Mike McCandless)
+
 Build
 
 * LUCENE-6518: Don't report false thread leaks from IBM J9

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java Thu Jul  9 09:48:34 2015
@@ -30,8 +30,8 @@ import org.apache.lucene.index.TermState
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.DocIdSetBuilder;
 
 /**
  * This class also provides the functionality behind
@@ -60,17 +60,17 @@ final class MultiTermQueryConstantScoreW
     }
   }
 
-  private static class WeightOrBitSet {
+  private static class WeightOrDocIdSet {
     final Weight weight;
-    final BitDocIdSet bitset;
+    final DocIdSet set;
 
-    WeightOrBitSet(Weight weight) {
+    WeightOrDocIdSet(Weight weight) {
       this.weight = Objects.requireNonNull(weight);
-      this.bitset = null;
+      this.set = null;
     }
 
-    WeightOrBitSet(BitDocIdSet bitset) {
-      this.bitset = bitset;
+    WeightOrDocIdSet(DocIdSet bitset) {
+      this.set = bitset;
       this.weight = null;
     }
   }
@@ -135,11 +135,11 @@ final class MultiTermQueryConstantScoreW
        * On the given leaf context, try to either rewrite to a disjunction if
        * there are few terms, or build a bitset containing matching docs.
        */
-      private WeightOrBitSet rewrite(LeafReaderContext context) throws IOException {
+      private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
         final Terms terms = context.reader().terms(query.field);
         if (terms == null) {
           // field does not exist
-          return new WeightOrBitSet((BitDocIdSet) null);
+          return new WeightOrDocIdSet((DocIdSet) null);
         }
 
         final TermsEnum termsEnum = query.getTermsEnum(terms);
@@ -158,30 +158,30 @@ final class MultiTermQueryConstantScoreW
           }
           Query q = new ConstantScoreQuery(bq);
           q.setBoost(score());
-          return new WeightOrBitSet(searcher.rewrite(q).createWeight(searcher, needsScores));
+          return new WeightOrDocIdSet(searcher.rewrite(q).createWeight(searcher, needsScores));
         }
 
         // Too many terms: go back to the terms we already collected and start building the bit set
-        BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
+        DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
         if (collectedTerms.isEmpty() == false) {
           TermsEnum termsEnum2 = terms.iterator();
           for (TermAndState t : collectedTerms) {
             termsEnum2.seekExact(t.term, t.state);
             docs = termsEnum2.postings(docs, PostingsEnum.NONE);
-            builder.or(docs);
+            builder.add(docs);
           }
         }
 
         // Then keep filling the bit set with remaining terms
         do {
           docs = termsEnum.postings(docs, PostingsEnum.NONE);
-          builder.or(docs);
+          builder.add(docs);
         } while (termsEnum.next() != null);
 
-        return new WeightOrBitSet(builder.build());
+        return new WeightOrDocIdSet(builder.build());
       }
 
-      private Scorer scorer(BitDocIdSet set) {
+      private Scorer scorer(DocIdSet set) throws IOException {
         if (set == null) {
           return null;
         }
@@ -194,11 +194,11 @@ final class MultiTermQueryConstantScoreW
 
       @Override
       public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
-        final WeightOrBitSet weightOrBitSet = rewrite(context);
+        final WeightOrDocIdSet weightOrBitSet = rewrite(context);
         if (weightOrBitSet.weight != null) {
           return weightOrBitSet.weight.bulkScorer(context);
         } else {
-          final Scorer scorer = scorer(weightOrBitSet.bitset);
+          final Scorer scorer = scorer(weightOrBitSet.set);
           if (scorer == null) {
             return null;
           }
@@ -208,11 +208,11 @@ final class MultiTermQueryConstantScoreW
 
       @Override
       public Scorer scorer(LeafReaderContext context) throws IOException {
-        final WeightOrBitSet weightOrBitSet = rewrite(context);
+        final WeightOrDocIdSet weightOrBitSet = rewrite(context);
         if (weightOrBitSet.weight != null) {
           return weightOrBitSet.weight.scorer(context);
         } else {
-          return scorer(weightOrBitSet.bitset);
+          return scorer(weightOrBitSet.set);
         }
       }
     };

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java Thu Jul  9 09:48:34 2015
@@ -77,7 +77,8 @@ public class BitDocIdSet extends DocIdSe
   }
 
   /**
-   * A builder of {@link DocIdSet}s that supports random access.
+   * A builder of {@link DocIdSet}s that supports random access. If you don't
+   * need random access, you should rather use {@link DocIdSetBuilder}.
    * @lucene.internal
    */
   public static final class Builder {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java Thu Jul  9 09:48:34 2015
@@ -29,6 +29,21 @@ import org.apache.lucene.search.DocIdSet
  */
 public abstract class BitSet implements MutableBits, Accountable {
 
+  /** Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}.
+   *  NOTE: this will consume the {@link BitSet}. */
+  public static BitSet of(DocIdSetIterator it, int maxDoc) throws IOException {
+    final long cost = it.cost();
+    final int threshold = maxDoc >>> 7;
+    BitSet set;
+    if (cost < threshold) {
+      set = new SparseFixedBitSet(maxDoc);
+    } else {
+      set = new FixedBitSet(maxDoc);
+    }
+    set.or(it);
+    return set;
+  }
+
   /** Set the bit at <code>i</code>. */
   public abstract void set(int i);
 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java Thu Jul  9 09:48:34 2015
@@ -22,10 +22,11 @@ import java.io.IOException;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 
+
 public class TestDocIdSetBuilder extends LuceneTestCase {
 
   public void testEmpty() throws IOException {
-    assertEquals(null, new BitDocIdSet.Builder(1 + random().nextInt(1000)).build());
+    assertEquals(null, new DocIdSetBuilder(1 + random().nextInt(1000)).build());
   }
 
   private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
@@ -45,19 +46,9 @@ public class TestDocIdSetBuilder extends
     }
   }
 
-  public void testFull() throws IOException {
-    final int maxDoc = 1 + random().nextInt(1000);
-    BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc, true);
-    DocIdSet set = builder.build();
-    DocIdSetIterator it = set.iterator();
-    for (int i = 0; i < maxDoc; ++i) {
-      assertEquals(i, it.nextDoc());
-    }
-  }
-
   public void testSparse() throws IOException {
     final int maxDoc = 1000000 + random().nextInt(1000000);
-    BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc);
+    DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
     final int numIterators = 1 + random().nextInt(10);
     final FixedBitSet ref = new FixedBitSet(maxDoc);
     for (int i = 0; i < numIterators; ++i) {
@@ -67,35 +58,101 @@ public class TestDocIdSetBuilder extends
         b.add(doc);
         ref.set(doc);
       }
-      builder.or(b.build().iterator());
+      builder.add(b.build().iterator());
     }
     DocIdSet result = builder.build();
-    assertTrue(result instanceof BitDocIdSet);
+    assertTrue(result instanceof IntArrayDocIdSet);
     assertEquals(new BitDocIdSet(ref), result);
   }
 
   public void testDense() throws IOException {
     final int maxDoc = 1000000 + random().nextInt(1000000);
-    BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc);
+    DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
     final int numIterators = 1 + random().nextInt(10);
     final FixedBitSet ref = new FixedBitSet(maxDoc);
-    if (random().nextBoolean()) {
-      // try upgrades
-      final int doc = random().nextInt(maxDoc);
-      ref.set(doc);
-      builder.or(new RoaringDocIdSet.Builder(maxDoc).add(doc).build().iterator());
-    }
     for (int i = 0; i < numIterators; ++i) {
       RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder(maxDoc);
-      for (int doc = random().nextInt(1000); doc < maxDoc; doc += 1 + random().nextInt(1000)) {
+      for (int doc = random().nextInt(1000); doc < maxDoc; doc += 1 + random().nextInt(100)) {
         b.add(doc);
         ref.set(doc);
       }
-      builder.or(b.build().iterator());
+      builder.add(b.build().iterator());
     }
     DocIdSet result = builder.build();
     assertTrue(result instanceof BitDocIdSet);
     assertEquals(new BitDocIdSet(ref), result);
   }
 
+  public void testRandom() throws IOException {
+    final int maxDoc = TestUtil.nextInt(random(), 1, 10000000);
+    for (int i = 1 ; i < maxDoc / 2; i <<=1) {
+      final int numDocs = TestUtil.nextInt(random(), 1, i);
+      final FixedBitSet docs = new FixedBitSet(maxDoc);
+      int c = 0;
+      while (c < numDocs) {
+        final int d = random().nextInt(maxDoc);
+        if (docs.get(d) == false) {
+          docs.set(d);
+          c += 1;
+        }
+      }
+
+      final int[] array = new int[numDocs + random().nextInt(100)];
+      DocIdSetIterator it = new BitSetIterator(docs, 0L);
+      int j = 0;
+      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+        array[j++] = doc;
+      }
+      assertEquals(numDocs, j);
+
+      // add some duplicates
+      while (j < array.length) {
+        array[j++] = array[random().nextInt(numDocs)];
+      }
+
+      // shuffle
+      for (j = array.length - 1; j >= 1; --j) {
+        final int k = random().nextInt(j);
+        int tmp = array[j];
+        array[j] = array[k];
+        array[k] = tmp;
+      }
+
+      // add docs out of order
+      DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+      for (j = 0; j < array.length; ) {
+        final int l = TestUtil.nextInt(random(), 1, array.length - j);
+        if (rarely()) {
+          builder.grow(l);
+        }
+        for (int k = 0; k < l; ++k) {
+          builder.add(array[j++]);
+        }
+      }
+
+      final DocIdSet expected = new BitDocIdSet(docs);
+      final DocIdSet actual = builder.build();
+      assertEquals(expected, actual);
+    }
+  }
+
+  public void testMisleadingDISICost() throws IOException {
+    final int maxDoc = TestUtil.nextInt(random(), 1000, 10000);
+    DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+    FixedBitSet expected = new FixedBitSet(maxDoc);
+
+    for (int i = 0; i < 10; ++i) {
+      final FixedBitSet docs = new FixedBitSet(maxDoc);
+      final int numDocs = random().nextInt(maxDoc / 1000);
+      for (int j = 0; j < numDocs; ++j) {
+        docs.set(random().nextInt(maxDoc));
+      }
+      expected.or(docs);
+      // We provide a cost of 0 here to make sure the builder can deal with wrong costs
+      builder.add(new BitSetIterator(docs, 0L));
+    }
+
+    assertEquals(new BitDocIdSet(expected), builder.build());
+  }
+
 }

Modified: lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java Thu Jul  9 09:48:34 2015
@@ -70,14 +70,10 @@ public class QueryBitSetProducer impleme
       final Weight weight = searcher.createNormalizedWeight(query, false);
       final DocIdSetIterator it = weight.scorer(context);
 
-      BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
-      if (it != null) {
-        builder.or(it);
-      }
-      docIdSet = builder.build();
-      if (docIdSet == null) {
-        // We use EMPTY as a sentinel for the empty set, which is cacheable
+      if (it == null) {
         docIdSet = DocIdSet.EMPTY;
+      } else {
+        docIdSet = new BitDocIdSet(BitSet.of(it, context.reader().maxDoc()));
       }
       cache.put(key, docIdSet);
     }

Modified: lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java Thu Jul  9 09:48:34 2015
@@ -48,6 +48,7 @@ import org.apache.lucene.search.SortFiel
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitDocIdSet;
+import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
@@ -74,9 +75,7 @@ public class TestBlockJoinSorter extends
         final DocIdSet uncached = filter.getDocIdSet(context, null);
         final DocIdSetIterator it = uncached == null ? null : uncached.iterator();
         if (it != null) {
-          BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
-          builder.or(it);
-          docIdSet = builder.build();
+          docIdSet = new BitDocIdSet(BitSet.of(it, context.reader().maxDoc()));
         }
         if (docIdSet == null) {
           docIdSet = new BitDocIdSet(new SparseFixedBitSet(context.reader().maxDoc()));

Modified: lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java Thu Jul  9 09:48:34 2015
@@ -44,6 +44,7 @@ import org.apache.lucene.search.BulkScor
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@@ -52,9 +53,8 @@ import org.apache.lucene.search.TermQuer
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ToStringUtils;
 
@@ -222,17 +222,17 @@ public class TermsQuery extends Query im
     }
   }
 
-  private static class WeightOrBitSet {
+  private static class WeightOrDocIdSet {
     final Weight weight;
-    final BitDocIdSet bitset;
+    final DocIdSet set;
 
-    WeightOrBitSet(Weight weight) {
+    WeightOrDocIdSet(Weight weight) {
       this.weight = Objects.requireNonNull(weight);
-      this.bitset = null;
+      this.set = null;
     }
 
-    WeightOrBitSet(BitDocIdSet bitset) {
-      this.bitset = bitset;
+    WeightOrDocIdSet(DocIdSet bitset) {
+      this.set = bitset;
       this.weight = null;
     }
   }
@@ -253,7 +253,7 @@ public class TermsQuery extends Query im
        * On the given leaf context, try to either rewrite to a disjunction if
        * there are few matching terms, or build a bitset containing matching docs.
        */
-      private WeightOrBitSet rewrite(LeafReaderContext context) throws IOException {
+      private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
         final LeafReader reader = context.reader();
 
         // We will first try to collect up to 'threshold' terms into 'matchingTerms'
@@ -261,7 +261,7 @@ public class TermsQuery extends Query im
         final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
         assert termData.size() > threshold : "Query should have been rewritten";
         List<TermAndState> matchingTerms = new ArrayList<>(threshold);
-        BitDocIdSet.Builder builder = null;
+        DocIdSetBuilder builder = null;
 
         final Fields fields = reader.fields();
         String lastField = null;
@@ -284,18 +284,18 @@ public class TermsQuery extends Query im
           if (termsEnum != null && termsEnum.seekExact(term)) {
             if (matchingTerms == null) {
               docs = termsEnum.postings(docs, PostingsEnum.NONE);
-              builder.or(docs);
+              builder.add(docs);
             } else if (matchingTerms.size() < threshold) {
               matchingTerms.add(new TermAndState(field, termsEnum));
             } else {
               assert matchingTerms.size() == threshold;
-              builder = new BitDocIdSet.Builder(reader.maxDoc());
+              builder = new DocIdSetBuilder(reader.maxDoc());
               docs = termsEnum.postings(docs, PostingsEnum.NONE);
-              builder.or(docs);
+              builder.add(docs);
               for (TermAndState t : matchingTerms) {
                 t.termsEnum.seekExact(t.term, t.state);
                 docs = t.termsEnum.postings(docs, PostingsEnum.NONE);
-                builder.or(docs);
+                builder.add(docs);
               }
               matchingTerms = null;
             }
@@ -311,14 +311,14 @@ public class TermsQuery extends Query im
           }
           Query q = new ConstantScoreQuery(bq);
           q.setBoost(score());
-          return new WeightOrBitSet(searcher.rewrite(q).createWeight(searcher, needsScores));
+          return new WeightOrDocIdSet(searcher.rewrite(q).createWeight(searcher, needsScores));
         } else {
           assert builder != null;
-          return new WeightOrBitSet(builder.build());
+          return new WeightOrDocIdSet(builder.build());
         }
       }
 
-      private Scorer scorer(BitDocIdSet set) {
+      private Scorer scorer(DocIdSet set) throws IOException {
         if (set == null) {
           return null;
         }
@@ -331,11 +331,11 @@ public class TermsQuery extends Query im
 
       @Override
       public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
-        final WeightOrBitSet weightOrBitSet = rewrite(context);
+        final WeightOrDocIdSet weightOrBitSet = rewrite(context);
         if (weightOrBitSet.weight != null) {
           return weightOrBitSet.weight.bulkScorer(context);
         } else {
-          final Scorer scorer = scorer(weightOrBitSet.bitset);
+          final Scorer scorer = scorer(weightOrBitSet.set);
           if (scorer == null) {
             return null;
           }
@@ -345,11 +345,11 @@ public class TermsQuery extends Query im
 
       @Override
       public Scorer scorer(LeafReaderContext context) throws IOException {
-        final WeightOrBitSet weightOrBitSet = rewrite(context);
+        final WeightOrDocIdSet weightOrBitSet = rewrite(context);
         if (weightOrBitSet.weight != null) {
           return weightOrBitSet.weight.scorer(context);
         } else {
-          return scorer(weightOrBitSet.bitset);
+          return scorer(weightOrBitSet.set);
         }
       }
     };

Modified: lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java Thu Jul  9 09:48:34 2015
@@ -22,9 +22,7 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.RamUsageEstimator;
 
 import java.io.IOException;
@@ -73,7 +71,7 @@ final class BKDTreeReader implements Acc
     final IndexInput in;
     byte[] scratch = new byte[16];
     final ByteArrayDataInput scratchReader = new ByteArrayDataInput(scratch);
-    final FixedBitSet bits;
+    final DocIdSetBuilder docs;
     final int latMinEnc;
     final int latMaxEnc;
     final int lonMinEnc;
@@ -87,7 +85,7 @@ final class BKDTreeReader implements Acc
                       LatLonFilter latLonFilter,
                       SortedNumericDocValues sndv) {
       this.in = in;
-      this.bits = new FixedBitSet(maxDoc);
+      this.docs = new DocIdSetBuilder(maxDoc);
       this.latMinEnc = latMinEnc;
       this.latMaxEnc = latMaxEnc;
       this.lonMinEnc = lonMinEnc;
@@ -137,7 +135,7 @@ final class BKDTreeReader implements Acc
                              BKDTreeWriter.encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)));
 
     // NOTE: hitCount is an over-estimate in the multi-valued case:
-    return new BitDocIdSet(state.bits, hitCount);
+    return state.docs.build(hitCount);
   }
 
   /** Fast path: this is called when the query rect fully encompasses all cells under this node. */
@@ -169,9 +167,10 @@ final class BKDTreeReader implements Acc
       //System.out.println("    seek to leafFP=" + fp);
       // How many points are stored in this leaf cell:
       int count = state.in.readVInt();
+      state.docs.grow(count);
       for(int i=0;i<count;i++) {
         int docID = state.in.readInt();
-        state.bits.set(docID);
+        state.docs.add(docID);
       }
 
       //bits.or(allLeafDISI);
@@ -264,6 +263,7 @@ final class BKDTreeReader implements Acc
       // How many points are stored in this leaf cell:
       int count = state.in.readVInt();
 
+      state.docs.grow(count);
       for(int i=0;i<count;i++) {
         int docID = state.in.readInt();
         state.sndv.setDocument(docID);
@@ -281,7 +281,7 @@ final class BKDTreeReader implements Acc
               lonEnc < state.lonMaxEnc &&
               (state.latLonFilter == null ||
                state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc)))) {
-            state.bits.set(docID);
+            state.docs.add(docID);
             hitCount++;
 
             // Stop processing values for this doc:

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java Thu Jul  9 09:48:34 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.util.BitSet;
 import java.util.Random;
 
@@ -29,13 +28,10 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.TestUtil;
@@ -145,7 +141,7 @@ public abstract class SearchEquivalenceT
   /**
    * Returns a random filter over the document set
    */
-  protected Filter randomFilter() {
+  protected Query randomFilter() {
     final Query query;
     if (random().nextBoolean()) {
       query = TermRangeQuery.newStringRange("field", "a", "" + randomChar(), true, true);
@@ -157,89 +153,7 @@ public abstract class SearchEquivalenceT
       phrase.setSlop(100);
       query = phrase;
     }
-    
-    // now wrap the query as a filter. QWF has its own codepath
-    if (random().nextBoolean()) {
-      return new QueryWrapperFilter(query);
-    } else {
-      return new SlowWrapperFilter(query, random().nextBoolean());
-    }
-  }
-  
-  static class SlowWrapperFilter extends Filter {
-    final Query query;
-    final boolean useBits;
-    
-    SlowWrapperFilter(Query query, boolean useBits) {
-      this.query = query;
-      this.useBits = useBits;
-    }
-    
-    @Override
-    public Query rewrite(IndexReader reader) throws IOException {
-      Query q = query.rewrite(reader);
-      if (q != query) {
-        return new SlowWrapperFilter(q, useBits);
-      } else {
-        return super.rewrite(reader);
-      }
-    }
-
-    @Override
-    public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) throws IOException {
-      // get a private context that is used to rewrite, createWeight and score eventually
-      final LeafReaderContext privateContext = context.reader().getContext();
-      final Weight weight = new IndexSearcher(privateContext).createNormalizedWeight(query, false);
-      return new DocIdSet() {
-        @Override
-        public DocIdSetIterator iterator() throws IOException {
-          return weight.scorer(privateContext);
-        }
-
-        @Override
-        public long ramBytesUsed() {
-          return 0L;
-        }
-
-        @Override
-        public Bits bits() throws IOException {
-          if (useBits) {
-            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
-            DocIdSetIterator disi = iterator();
-            if (disi != null) {
-              builder.or(disi);
-            }
-            BitDocIdSet bitset = builder.build();
-            if (bitset == null) {
-              return new Bits.MatchNoBits(context.reader().maxDoc());
-            } else {
-              return bitset.bits();
-            }
-          } else {
-            return null;
-          }
-        }
-      };
-    }
-
-    @Override
-    public String toString(String field) {
-      return "SlowQWF(" + query + ")";
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (super.equals(obj) == false) {
-        return false;
-      }
-      return query.equals(((SlowWrapperFilter) obj).query);
-    }
-
-    @Override
-    public int hashCode() {
-      return 31 * super.hashCode() + query.hashCode();
-    }
-
+    return query;
   }
 
   /**
@@ -262,13 +176,10 @@ public abstract class SearchEquivalenceT
     // test with some filters (this will sometimes cause advance'ing enough to test it)
     int numFilters = TEST_NIGHTLY ? atLeast(10) : atLeast(3);
     for (int i = 0; i < numFilters; i++) {
-      Filter filter = randomFilter();
+      Query filter = randomFilter();
       // incorporate the filter in different ways.
       assertSubsetOf(q1, q2, filter);
       assertSubsetOf(filteredQuery(q1, filter), filteredQuery(q2, filter), null);
-      assertSubsetOf(filteredQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
-      assertSubsetOf(filteredBooleanQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
-      assertSubsetOf(filteredBooleanQuery(q1, filter), filteredQuery(q2, filter), null);
     }
   }
   
@@ -278,13 +189,13 @@ public abstract class SearchEquivalenceT
    * 
    * Both queries will be filtered by <code>filter</code>
    */
-  protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
+  protected void assertSubsetOf(Query q1, Query q2, Query filter) throws Exception {
     QueryUtils.check(q1);
     QueryUtils.check(q2);
 
     if (filter != null) {
-      q1 = new FilteredQuery(q1, filter);
-      q2 = new FilteredQuery(q2, filter);
+      q1 = filteredQuery(q1, filter);
+      q2 = filteredQuery(q2, filter);
     }
     // we test both INDEXORDER and RELEVANCE because we want to test needsScores=true/false
     for (Sort sort : new Sort[] { Sort.INDEXORDER, Sort.RELEVANCE }) {
@@ -316,21 +227,18 @@ public abstract class SearchEquivalenceT
     // also test with some filters to test advancing
     int numFilters = TEST_NIGHTLY ? atLeast(10) : atLeast(3);
     for (int i = 0; i < numFilters; i++) {
-      Filter filter = randomFilter();
+      Query filter = randomFilter();
       // incorporate the filter in different ways.
       assertSameScores(q1, q2, filter);
       assertSameScores(filteredQuery(q1, filter), filteredQuery(q2, filter), null);
-      assertSameScores(filteredQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
-      assertSameScores(filteredBooleanQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
-      assertSameScores(filteredBooleanQuery(q1, filter), filteredQuery(q2, filter), null);
     }
   }
 
-  protected void assertSameScores(Query q1, Query q2, Filter filter) throws Exception {
+  protected void assertSameScores(Query q1, Query q2, Query filter) throws Exception {
     // not efficient, but simple!
     if (filter != null) {
-      q1 = new FilteredQuery(q1, filter);
-      q2 = new FilteredQuery(q2, filter);
+      q1 = filteredQuery(q1, filter);
+      q2 = filteredQuery(q2, filter);
     }
     TopDocs td1 = s1.search(q1, reader.maxDoc());
     TopDocs td2 = s2.search(q2, reader.maxDoc());
@@ -341,11 +249,7 @@ public abstract class SearchEquivalenceT
     }
   }
   
-  protected Query filteredQuery(Query query, Filter filter) {
-    return new FilteredQuery(query, filter, TestUtil.randomFilterStrategy(random()));
-  }
-  
-  protected Query filteredBooleanQuery(Query query, Filter filter) {
+  protected Query filteredQuery(Query query, Query filter) {
     BooleanQuery bq = new BooleanQuery();
     bq.add(query, Occur.MUST);
     bq.add(filter, Occur.FILTER);