You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/07/09 11:48:35 UTC
svn commit: r1690041 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/util/
lucene/core/src/test/org/apache/lucene/util/ lucene/join/
lucene/join/src/java...
Author: jpountz
Date: Thu Jul 9 09:48:34 2015
New Revision: 1690041
URL: http://svn.apache.org/r1690041
Log:
LUCENE-6645: Optimized DocIdSet building for the "many small postings lists" case.
Added:
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
- copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java
- copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
- copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestIntArrayDocIdSet.java
- copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIntArrayDocIdSet.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
- copied unchanged from r1690026, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
lucene/dev/branches/branch_5x/lucene/join/ (props changed)
lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java
lucene/dev/branches/branch_5x/lucene/misc/ (props changed)
lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
lucene/dev/branches/branch_5x/lucene/queries/ (props changed)
lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
lucene/dev/branches/branch_5x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
lucene/dev/branches/branch_5x/lucene/spatial/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu Jul 9 09:48:34 2015
@@ -280,6 +280,10 @@ Optimizations
where an expired segments_N references non-existing files (Robert
Muir, Mike McCandless)
+* LUCENE-6645: Optimized the way we merge postings lists in multi-term queries
+ and TermsQuery. This should especially help when there are lots of small
+ postings lists. (Adrien Grand, Mike McCandless)
+
Build
* LUCENE-6518: Don't report false thread leaks from IBM J9
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java Thu Jul 9 09:48:34 2015
@@ -30,8 +30,8 @@ import org.apache.lucene.index.TermState
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.DocIdSetBuilder;
/**
* This class also provides the functionality behind
@@ -60,17 +60,17 @@ final class MultiTermQueryConstantScoreW
}
}
- private static class WeightOrBitSet {
+ private static class WeightOrDocIdSet {
final Weight weight;
- final BitDocIdSet bitset;
+ final DocIdSet set;
- WeightOrBitSet(Weight weight) {
+ WeightOrDocIdSet(Weight weight) {
this.weight = Objects.requireNonNull(weight);
- this.bitset = null;
+ this.set = null;
}
- WeightOrBitSet(BitDocIdSet bitset) {
- this.bitset = bitset;
+ WeightOrDocIdSet(DocIdSet bitset) {
+ this.set = bitset;
this.weight = null;
}
}
@@ -135,11 +135,11 @@ final class MultiTermQueryConstantScoreW
* On the given leaf context, try to either rewrite to a disjunction if
* there are few terms, or build a bitset containing matching docs.
*/
- private WeightOrBitSet rewrite(LeafReaderContext context) throws IOException {
+ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(query.field);
if (terms == null) {
// field does not exist
- return new WeightOrBitSet((BitDocIdSet) null);
+ return new WeightOrDocIdSet((DocIdSet) null);
}
final TermsEnum termsEnum = query.getTermsEnum(terms);
@@ -158,30 +158,30 @@ final class MultiTermQueryConstantScoreW
}
Query q = new ConstantScoreQuery(bq);
q.setBoost(score());
- return new WeightOrBitSet(searcher.rewrite(q).createWeight(searcher, needsScores));
+ return new WeightOrDocIdSet(searcher.rewrite(q).createWeight(searcher, needsScores));
}
// Too many terms: go back to the terms we already collected and start building the bit set
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
+ DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
if (collectedTerms.isEmpty() == false) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
termsEnum2.seekExact(t.term, t.state);
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
- builder.or(docs);
+ builder.add(docs);
}
}
// Then keep filling the bit set with remaining terms
do {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
- builder.or(docs);
+ builder.add(docs);
} while (termsEnum.next() != null);
- return new WeightOrBitSet(builder.build());
+ return new WeightOrDocIdSet(builder.build());
}
- private Scorer scorer(BitDocIdSet set) {
+ private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
@@ -194,11 +194,11 @@ final class MultiTermQueryConstantScoreW
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
- final WeightOrBitSet weightOrBitSet = rewrite(context);
+ final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.bulkScorer(context);
} else {
- final Scorer scorer = scorer(weightOrBitSet.bitset);
+ final Scorer scorer = scorer(weightOrBitSet.set);
if (scorer == null) {
return null;
}
@@ -208,11 +208,11 @@ final class MultiTermQueryConstantScoreW
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
- final WeightOrBitSet weightOrBitSet = rewrite(context);
+ final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.scorer(context);
} else {
- return scorer(weightOrBitSet.bitset);
+ return scorer(weightOrBitSet.set);
}
}
};
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitDocIdSet.java Thu Jul 9 09:48:34 2015
@@ -77,7 +77,8 @@ public class BitDocIdSet extends DocIdSe
}
/**
- * A builder of {@link DocIdSet}s that supports random access.
+ * A builder of {@link DocIdSet}s that supports random access. If you don't
+ * need random access, you should rather use {@link DocIdSetBuilder}.
* @lucene.internal
*/
public static final class Builder {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitSet.java Thu Jul 9 09:48:34 2015
@@ -29,6 +29,21 @@ import org.apache.lucene.search.DocIdSet
*/
public abstract class BitSet implements MutableBits, Accountable {
+ /** Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}.
+ * NOTE: this will consume the {@link BitSet}. */
+ public static BitSet of(DocIdSetIterator it, int maxDoc) throws IOException {
+ final long cost = it.cost();
+ final int threshold = maxDoc >>> 7;
+ BitSet set;
+ if (cost < threshold) {
+ set = new SparseFixedBitSet(maxDoc);
+ } else {
+ set = new FixedBitSet(maxDoc);
+ }
+ set.or(it);
+ return set;
+ }
+
/** Set the bit at <code>i</code>. */
public abstract void set(int i);
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java Thu Jul 9 09:48:34 2015
@@ -22,10 +22,11 @@ import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
+
public class TestDocIdSetBuilder extends LuceneTestCase {
public void testEmpty() throws IOException {
- assertEquals(null, new BitDocIdSet.Builder(1 + random().nextInt(1000)).build());
+ assertEquals(null, new DocIdSetBuilder(1 + random().nextInt(1000)).build());
}
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
@@ -45,19 +46,9 @@ public class TestDocIdSetBuilder extends
}
}
- public void testFull() throws IOException {
- final int maxDoc = 1 + random().nextInt(1000);
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc, true);
- DocIdSet set = builder.build();
- DocIdSetIterator it = set.iterator();
- for (int i = 0; i < maxDoc; ++i) {
- assertEquals(i, it.nextDoc());
- }
- }
-
public void testSparse() throws IOException {
final int maxDoc = 1000000 + random().nextInt(1000000);
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc);
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
final int numIterators = 1 + random().nextInt(10);
final FixedBitSet ref = new FixedBitSet(maxDoc);
for (int i = 0; i < numIterators; ++i) {
@@ -67,35 +58,101 @@ public class TestDocIdSetBuilder extends
b.add(doc);
ref.set(doc);
}
- builder.or(b.build().iterator());
+ builder.add(b.build().iterator());
}
DocIdSet result = builder.build();
- assertTrue(result instanceof BitDocIdSet);
+ assertTrue(result instanceof IntArrayDocIdSet);
assertEquals(new BitDocIdSet(ref), result);
}
public void testDense() throws IOException {
final int maxDoc = 1000000 + random().nextInt(1000000);
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(maxDoc);
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
final int numIterators = 1 + random().nextInt(10);
final FixedBitSet ref = new FixedBitSet(maxDoc);
- if (random().nextBoolean()) {
- // try upgrades
- final int doc = random().nextInt(maxDoc);
- ref.set(doc);
- builder.or(new RoaringDocIdSet.Builder(maxDoc).add(doc).build().iterator());
- }
for (int i = 0; i < numIterators; ++i) {
RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder(maxDoc);
- for (int doc = random().nextInt(1000); doc < maxDoc; doc += 1 + random().nextInt(1000)) {
+ for (int doc = random().nextInt(1000); doc < maxDoc; doc += 1 + random().nextInt(100)) {
b.add(doc);
ref.set(doc);
}
- builder.or(b.build().iterator());
+ builder.add(b.build().iterator());
}
DocIdSet result = builder.build();
assertTrue(result instanceof BitDocIdSet);
assertEquals(new BitDocIdSet(ref), result);
}
+ public void testRandom() throws IOException {
+ final int maxDoc = TestUtil.nextInt(random(), 1, 10000000);
+ for (int i = 1 ; i < maxDoc / 2; i <<=1) {
+ final int numDocs = TestUtil.nextInt(random(), 1, i);
+ final FixedBitSet docs = new FixedBitSet(maxDoc);
+ int c = 0;
+ while (c < numDocs) {
+ final int d = random().nextInt(maxDoc);
+ if (docs.get(d) == false) {
+ docs.set(d);
+ c += 1;
+ }
+ }
+
+ final int[] array = new int[numDocs + random().nextInt(100)];
+ DocIdSetIterator it = new BitSetIterator(docs, 0L);
+ int j = 0;
+ for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+ array[j++] = doc;
+ }
+ assertEquals(numDocs, j);
+
+ // add some duplicates
+ while (j < array.length) {
+ array[j++] = array[random().nextInt(numDocs)];
+ }
+
+ // shuffle
+ for (j = array.length - 1; j >= 1; --j) {
+ final int k = random().nextInt(j);
+ int tmp = array[j];
+ array[j] = array[k];
+ array[k] = tmp;
+ }
+
+ // add docs out of order
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+ for (j = 0; j < array.length; ) {
+ final int l = TestUtil.nextInt(random(), 1, array.length - j);
+ if (rarely()) {
+ builder.grow(l);
+ }
+ for (int k = 0; k < l; ++k) {
+ builder.add(array[j++]);
+ }
+ }
+
+ final DocIdSet expected = new BitDocIdSet(docs);
+ final DocIdSet actual = builder.build();
+ assertEquals(expected, actual);
+ }
+ }
+
+ public void testMisleadingDISICost() throws IOException {
+ final int maxDoc = TestUtil.nextInt(random(), 1000, 10000);
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+ FixedBitSet expected = new FixedBitSet(maxDoc);
+
+ for (int i = 0; i < 10; ++i) {
+ final FixedBitSet docs = new FixedBitSet(maxDoc);
+ final int numDocs = random().nextInt(maxDoc / 1000);
+ for (int j = 0; j < numDocs; ++j) {
+ docs.set(random().nextInt(maxDoc));
+ }
+ expected.or(docs);
+ // We provide a cost of 0 here to make sure the builder can deal with wrong costs
+ builder.add(new BitSetIterator(docs, 0L));
+ }
+
+ assertEquals(new BitDocIdSet(expected), builder.build());
+ }
+
}
Modified: lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/join/src/java/org/apache/lucene/search/join/QueryBitSetProducer.java Thu Jul 9 09:48:34 2015
@@ -70,14 +70,10 @@ public class QueryBitSetProducer impleme
final Weight weight = searcher.createNormalizedWeight(query, false);
final DocIdSetIterator it = weight.scorer(context);
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
- if (it != null) {
- builder.or(it);
- }
- docIdSet = builder.build();
- if (docIdSet == null) {
- // We use EMPTY as a sentinel for the empty set, which is cacheable
+ if (it == null) {
docIdSet = DocIdSet.EMPTY;
+ } else {
+ docIdSet = new BitDocIdSet(BitSet.of(it, context.reader().maxDoc()));
}
cache.put(key, docIdSet);
}
Modified: lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/misc/src/test/org/apache/lucene/index/TestBlockJoinSorter.java Thu Jul 9 09:48:34 2015
@@ -48,6 +48,7 @@ import org.apache.lucene.search.SortFiel
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
+import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
@@ -74,9 +75,7 @@ public class TestBlockJoinSorter extends
final DocIdSet uncached = filter.getDocIdSet(context, null);
final DocIdSetIterator it = uncached == null ? null : uncached.iterator();
if (it != null) {
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
- builder.or(it);
- docIdSet = builder.build();
+ docIdSet = new BitDocIdSet(BitSet.of(it, context.reader().maxDoc()));
}
if (docIdSet == null) {
docIdSet = new BitDocIdSet(new SparseFixedBitSet(context.reader().maxDoc()));
Modified: lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java Thu Jul 9 09:48:34 2015
@@ -44,6 +44,7 @@ import org.apache.lucene.search.BulkScor
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -52,9 +53,8 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
@@ -222,17 +222,17 @@ public class TermsQuery extends Query im
}
}
- private static class WeightOrBitSet {
+ private static class WeightOrDocIdSet {
final Weight weight;
- final BitDocIdSet bitset;
+ final DocIdSet set;
- WeightOrBitSet(Weight weight) {
+ WeightOrDocIdSet(Weight weight) {
this.weight = Objects.requireNonNull(weight);
- this.bitset = null;
+ this.set = null;
}
- WeightOrBitSet(BitDocIdSet bitset) {
- this.bitset = bitset;
+ WeightOrDocIdSet(DocIdSet bitset) {
+ this.set = bitset;
this.weight = null;
}
}
@@ -253,7 +253,7 @@ public class TermsQuery extends Query im
* On the given leaf context, try to either rewrite to a disjunction if
* there are few matching terms, or build a bitset containing matching docs.
*/
- private WeightOrBitSet rewrite(LeafReaderContext context) throws IOException {
+ private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
final LeafReader reader = context.reader();
// We will first try to collect up to 'threshold' terms into 'matchingTerms'
@@ -261,7 +261,7 @@ public class TermsQuery extends Query im
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
assert termData.size() > threshold : "Query should have been rewritten";
List<TermAndState> matchingTerms = new ArrayList<>(threshold);
- BitDocIdSet.Builder builder = null;
+ DocIdSetBuilder builder = null;
final Fields fields = reader.fields();
String lastField = null;
@@ -284,18 +284,18 @@ public class TermsQuery extends Query im
if (termsEnum != null && termsEnum.seekExact(term)) {
if (matchingTerms == null) {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
- builder.or(docs);
+ builder.add(docs);
} else if (matchingTerms.size() < threshold) {
matchingTerms.add(new TermAndState(field, termsEnum));
} else {
assert matchingTerms.size() == threshold;
- builder = new BitDocIdSet.Builder(reader.maxDoc());
+ builder = new DocIdSetBuilder(reader.maxDoc());
docs = termsEnum.postings(docs, PostingsEnum.NONE);
- builder.or(docs);
+ builder.add(docs);
for (TermAndState t : matchingTerms) {
t.termsEnum.seekExact(t.term, t.state);
docs = t.termsEnum.postings(docs, PostingsEnum.NONE);
- builder.or(docs);
+ builder.add(docs);
}
matchingTerms = null;
}
@@ -311,14 +311,14 @@ public class TermsQuery extends Query im
}
Query q = new ConstantScoreQuery(bq);
q.setBoost(score());
- return new WeightOrBitSet(searcher.rewrite(q).createWeight(searcher, needsScores));
+ return new WeightOrDocIdSet(searcher.rewrite(q).createWeight(searcher, needsScores));
} else {
assert builder != null;
- return new WeightOrBitSet(builder.build());
+ return new WeightOrDocIdSet(builder.build());
}
}
- private Scorer scorer(BitDocIdSet set) {
+ private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
@@ -331,11 +331,11 @@ public class TermsQuery extends Query im
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
- final WeightOrBitSet weightOrBitSet = rewrite(context);
+ final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.bulkScorer(context);
} else {
- final Scorer scorer = scorer(weightOrBitSet.bitset);
+ final Scorer scorer = scorer(weightOrBitSet.set);
if (scorer == null) {
return null;
}
@@ -345,11 +345,11 @@ public class TermsQuery extends Query im
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
- final WeightOrBitSet weightOrBitSet = rewrite(context);
+ final WeightOrDocIdSet weightOrBitSet = rewrite(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.scorer(context);
} else {
- return scorer(weightOrBitSet.bitset);
+ return scorer(weightOrBitSet.set);
}
}
};
Modified: lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java (original)
+++ lucene/dev/branches/branch_5x/lucene/sandbox/src/java/org/apache/lucene/bkdtree/BKDTreeReader.java Thu Jul 9 09:48:34 2015
@@ -22,9 +22,7 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
@@ -73,7 +71,7 @@ final class BKDTreeReader implements Acc
final IndexInput in;
byte[] scratch = new byte[16];
final ByteArrayDataInput scratchReader = new ByteArrayDataInput(scratch);
- final FixedBitSet bits;
+ final DocIdSetBuilder docs;
final int latMinEnc;
final int latMaxEnc;
final int lonMinEnc;
@@ -87,7 +85,7 @@ final class BKDTreeReader implements Acc
LatLonFilter latLonFilter,
SortedNumericDocValues sndv) {
this.in = in;
- this.bits = new FixedBitSet(maxDoc);
+ this.docs = new DocIdSetBuilder(maxDoc);
this.latMinEnc = latMinEnc;
this.latMaxEnc = latMaxEnc;
this.lonMinEnc = lonMinEnc;
@@ -137,7 +135,7 @@ final class BKDTreeReader implements Acc
BKDTreeWriter.encodeLon(Math.nextAfter(180.0, Double.POSITIVE_INFINITY)));
// NOTE: hitCount is an over-estimate in the multi-valued case:
- return new BitDocIdSet(state.bits, hitCount);
+ return state.docs.build(hitCount);
}
/** Fast path: this is called when the query rect fully encompasses all cells under this node. */
@@ -169,9 +167,10 @@ final class BKDTreeReader implements Acc
//System.out.println(" seek to leafFP=" + fp);
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
+ state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
- state.bits.set(docID);
+ state.docs.add(docID);
}
//bits.or(allLeafDISI);
@@ -264,6 +263,7 @@ final class BKDTreeReader implements Acc
// How many points are stored in this leaf cell:
int count = state.in.readVInt();
+ state.docs.grow(count);
for(int i=0;i<count;i++) {
int docID = state.in.readInt();
state.sndv.setDocument(docID);
@@ -281,7 +281,7 @@ final class BKDTreeReader implements Acc
lonEnc < state.lonMaxEnc &&
(state.latLonFilter == null ||
state.latLonFilter.accept(BKDTreeWriter.decodeLat(latEnc), BKDTreeWriter.decodeLon(lonEnc)))) {
- state.bits.set(docID);
+ state.docs.add(docID);
hitCount++;
// Stop processing values for this doc:
Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java?rev=1690041&r1=1690040&r2=1690041&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java Thu Jul 9 09:48:34 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
import java.util.BitSet;
import java.util.Random;
@@ -29,13 +28,10 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
@@ -145,7 +141,7 @@ public abstract class SearchEquivalenceT
/**
* Returns a random filter over the document set
*/
- protected Filter randomFilter() {
+ protected Query randomFilter() {
final Query query;
if (random().nextBoolean()) {
query = TermRangeQuery.newStringRange("field", "a", "" + randomChar(), true, true);
@@ -157,89 +153,7 @@ public abstract class SearchEquivalenceT
phrase.setSlop(100);
query = phrase;
}
-
- // now wrap the query as a filter. QWF has its own codepath
- if (random().nextBoolean()) {
- return new QueryWrapperFilter(query);
- } else {
- return new SlowWrapperFilter(query, random().nextBoolean());
- }
- }
-
- static class SlowWrapperFilter extends Filter {
- final Query query;
- final boolean useBits;
-
- SlowWrapperFilter(Query query, boolean useBits) {
- this.query = query;
- this.useBits = useBits;
- }
-
- @Override
- public Query rewrite(IndexReader reader) throws IOException {
- Query q = query.rewrite(reader);
- if (q != query) {
- return new SlowWrapperFilter(q, useBits);
- } else {
- return super.rewrite(reader);
- }
- }
-
- @Override
- public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) throws IOException {
- // get a private context that is used to rewrite, createWeight and score eventually
- final LeafReaderContext privateContext = context.reader().getContext();
- final Weight weight = new IndexSearcher(privateContext).createNormalizedWeight(query, false);
- return new DocIdSet() {
- @Override
- public DocIdSetIterator iterator() throws IOException {
- return weight.scorer(privateContext);
- }
-
- @Override
- public long ramBytesUsed() {
- return 0L;
- }
-
- @Override
- public Bits bits() throws IOException {
- if (useBits) {
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
- DocIdSetIterator disi = iterator();
- if (disi != null) {
- builder.or(disi);
- }
- BitDocIdSet bitset = builder.build();
- if (bitset == null) {
- return new Bits.MatchNoBits(context.reader().maxDoc());
- } else {
- return bitset.bits();
- }
- } else {
- return null;
- }
- }
- };
- }
-
- @Override
- public String toString(String field) {
- return "SlowQWF(" + query + ")";
- }
-
- @Override
- public boolean equals(Object obj) {
- if (super.equals(obj) == false) {
- return false;
- }
- return query.equals(((SlowWrapperFilter) obj).query);
- }
-
- @Override
- public int hashCode() {
- return 31 * super.hashCode() + query.hashCode();
- }
-
+ return query;
}
/**
@@ -262,13 +176,10 @@ public abstract class SearchEquivalenceT
// test with some filters (this will sometimes cause advance'ing enough to test it)
int numFilters = TEST_NIGHTLY ? atLeast(10) : atLeast(3);
for (int i = 0; i < numFilters; i++) {
- Filter filter = randomFilter();
+ Query filter = randomFilter();
// incorporate the filter in different ways.
assertSubsetOf(q1, q2, filter);
assertSubsetOf(filteredQuery(q1, filter), filteredQuery(q2, filter), null);
- assertSubsetOf(filteredQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
- assertSubsetOf(filteredBooleanQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
- assertSubsetOf(filteredBooleanQuery(q1, filter), filteredQuery(q2, filter), null);
}
}
@@ -278,13 +189,13 @@ public abstract class SearchEquivalenceT
*
* Both queries will be filtered by <code>filter</code>
*/
- protected void assertSubsetOf(Query q1, Query q2, Filter filter) throws Exception {
+ protected void assertSubsetOf(Query q1, Query q2, Query filter) throws Exception {
QueryUtils.check(q1);
QueryUtils.check(q2);
if (filter != null) {
- q1 = new FilteredQuery(q1, filter);
- q2 = new FilteredQuery(q2, filter);
+ q1 = filteredQuery(q1, filter);
+ q2 = filteredQuery(q2, filter);
}
// we test both INDEXORDER and RELEVANCE because we want to test needsScores=true/false
for (Sort sort : new Sort[] { Sort.INDEXORDER, Sort.RELEVANCE }) {
@@ -316,21 +227,18 @@ public abstract class SearchEquivalenceT
// also test with some filters to test advancing
int numFilters = TEST_NIGHTLY ? atLeast(10) : atLeast(3);
for (int i = 0; i < numFilters; i++) {
- Filter filter = randomFilter();
+ Query filter = randomFilter();
// incorporate the filter in different ways.
assertSameScores(q1, q2, filter);
assertSameScores(filteredQuery(q1, filter), filteredQuery(q2, filter), null);
- assertSameScores(filteredQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
- assertSameScores(filteredBooleanQuery(q1, filter), filteredBooleanQuery(q2, filter), null);
- assertSameScores(filteredBooleanQuery(q1, filter), filteredQuery(q2, filter), null);
}
}
- protected void assertSameScores(Query q1, Query q2, Filter filter) throws Exception {
+ protected void assertSameScores(Query q1, Query q2, Query filter) throws Exception {
// not efficient, but simple!
if (filter != null) {
- q1 = new FilteredQuery(q1, filter);
- q2 = new FilteredQuery(q2, filter);
+ q1 = filteredQuery(q1, filter);
+ q2 = filteredQuery(q2, filter);
}
TopDocs td1 = s1.search(q1, reader.maxDoc());
TopDocs td2 = s2.search(q2, reader.maxDoc());
@@ -341,11 +249,7 @@ public abstract class SearchEquivalenceT
}
}
- protected Query filteredQuery(Query query, Filter filter) {
- return new FilteredQuery(query, filter, TestUtil.randomFilterStrategy(random()));
- }
-
- protected Query filteredBooleanQuery(Query query, Filter filter) {
+ protected Query filteredQuery(Query query, Query filter) {
BooleanQuery bq = new BooleanQuery();
bq.add(query, Occur.MUST);
bq.add(filter, Occur.FILTER);