You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/05/21 22:16:45 UTC
svn commit: r1680948 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/test/org/apache/lucene/index/ lucene/queries/
lucene/queries/...
Author: jpountz
Date: Thu May 21 20:16:44 2015
New Revision: 1680948
URL: http://svn.apache.org/r1680948
Log:
LUCENE-6360: Make TermsQuery rewrite as a disjunction when there are few terms.
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Weight.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java
lucene/dev/branches/branch_5x/lucene/queries/ (props changed)
lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu May 21 20:16:44 2015
@@ -105,6 +105,9 @@ Optimizations
* LUCENE-6458: Multi-term queries matching few terms per segment now execute
like a disjunction. (Adrien Grand)
+* LUCENE-6360: TermsQuery rewrites to a disjunction when there are 16 matching
+ terms or less. (Adrien Grand)
+
Bug Fixes
* LUCENE-329: Fix FuzzyQuery defaults to rank exact matches highest.
(Mark Harwood, Adrien Grand)
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CoalescedUpdates.java Thu May 21 20:16:44 2015
@@ -34,7 +34,7 @@ class CoalescedUpdates {
final List<PrefixCodedTerms> terms = new ArrayList<>();
final List<NumericDocValuesUpdate> numericDVUpdates = new ArrayList<>();
final List<BinaryDocValuesUpdate> binaryDVUpdates = new ArrayList<>();
- int totalTermCount;
+ long totalTermCount;
@Override
public String toString() {
@@ -46,7 +46,7 @@ class CoalescedUpdates {
}
void update(FrozenBufferedUpdates in) {
- totalTermCount += in.termCount;
+ totalTermCount += in.terms.size();
terms.add(in.terms);
for (int queryIdx = 0; queryIdx < in.queries.length; queryIdx++) {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java Thu May 21 20:16:44 2015
@@ -44,7 +44,6 @@ class FrozenBufferedUpdates {
// Terms, in sorted order:
final PrefixCodedTerms terms;
- int termCount; // just for debugging
// Parallel array of deleted query, and the docIDUpto for each
final Query[] queries;
@@ -68,7 +67,6 @@ class FrozenBufferedUpdates {
this.isSegmentPrivate = isSegmentPrivate;
assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
- termCount = termsArray.length;
ArrayUtil.timSort(termsArray);
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
for (Term term : termsArray) {
@@ -167,7 +165,7 @@ class FrozenBufferedUpdates {
public String toString() {
String s = "";
if (numTermDeletes != 0) {
- s += " " + numTermDeletes + " deleted terms (unique count=" + termCount + ")";
+ s += " " + numTermDeletes + " deleted terms (unique count=" + terms.size() + ")";
}
if (queries.length != 0) {
s += " " + queries.length + " deleted queries";
@@ -180,6 +178,6 @@ class FrozenBufferedUpdates {
}
boolean any() {
- return termCount > 0 || queries.length > 0 || numericDVUpdates.length > 0 || binaryDVUpdates.length > 0;
+ return terms.size() > 0 || queries.length > 0 || numericDVUpdates.length > 0 || binaryDVUpdates.length > 0;
}
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java Thu May 21 20:16:44 2015
@@ -29,6 +29,7 @@ import org.apache.lucene.store.RAMOutput
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
/**
* Prefix codes term instances (prefixes are shared)
@@ -36,15 +37,17 @@ import org.apache.lucene.util.BytesRefBu
*/
public class PrefixCodedTerms implements Accountable {
final RAMFile buffer;
+ private final long size;
private long delGen;
- private PrefixCodedTerms(RAMFile buffer) {
+ private PrefixCodedTerms(RAMFile buffer, long size) {
this.buffer = Objects.requireNonNull(buffer);
+ this.size = size;
}
@Override
public long ramBytesUsed() {
- return buffer.ramBytesUsed();
+ return buffer.ramBytesUsed() + 2 * RamUsageEstimator.NUM_BYTES_LONG;
}
@Override
@@ -63,6 +66,7 @@ public class PrefixCodedTerms implements
private RAMOutputStream output = new RAMOutputStream(buffer, false);
private Term lastTerm = new Term("");
private BytesRefBuilder lastTermBytes = new BytesRefBuilder();
+ private long size;
/** Sole constructor. */
public Builder() {}
@@ -85,6 +89,7 @@ public class PrefixCodedTerms implements
lastTermBytes.copyBytes(term.bytes);
lastTerm.bytes = lastTermBytes.get();
lastTerm.field = term.field;
+ size += 1;
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -94,7 +99,7 @@ public class PrefixCodedTerms implements
public PrefixCodedTerms finish() {
try {
output.close();
- return new PrefixCodedTerms(buffer);
+ return new PrefixCodedTerms(buffer, size);
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -179,6 +184,11 @@ public class PrefixCodedTerms implements
return new TermIterator(delGen, buffer);
}
+ /** Return the number of terms stored in this {@link PrefixCodedTerms}. */
+ public long size() {
+ return size;
+ }
+
@Override
public int hashCode() {
int h = buffer.hashCode();
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Weight.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Weight.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Weight.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Weight.java Thu May 21 20:16:44 2015
@@ -145,10 +145,12 @@ public abstract class Weight {
return new DefaultBulkScorer(scorer);
}
- /** Just wraps a Scorer and performs top scoring using it. */
- static class DefaultBulkScorer extends BulkScorer {
+ /** Just wraps a Scorer and performs top scoring using it.
+ * @lucene.internal */
+ protected static class DefaultBulkScorer extends BulkScorer {
private final Scorer scorer;
+ /** Sole constructor. */
public DefaultBulkScorer(Scorer scorer) {
if (scorer == null) {
throw new NullPointerException();
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java Thu May 21 20:16:44 2015
@@ -136,7 +136,7 @@ public class TestDocumentsWriterDeleteQu
assertTrue(queue.anyChanges());
if (random().nextInt(5) == 0) {
FrozenBufferedUpdates freezeGlobalBuffer = queue.freezeGlobalBuffer(null);
- assertEquals(termsSinceFreeze, freezeGlobalBuffer.termCount);
+ assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.size());
assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length);
queriesSinceFreeze = 0;
termsSinceFreeze = 0;
@@ -168,7 +168,7 @@ public class TestDocumentsWriterDeleteQu
assertTrue("changes in global buffer", queue.anyChanges());
FrozenBufferedUpdates freezeGlobalBuffer = queue.freezeGlobalBuffer(null);
assertTrue(freezeGlobalBuffer.any());
- assertEquals(1, freezeGlobalBuffer.termCount);
+ assertEquals(1, freezeGlobalBuffer.terms.size());
assertFalse("all changes applied", queue.anyChanges());
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java Thu May 21 20:16:44 2015
@@ -64,6 +64,7 @@ public class TestPrefixCodedTerms extend
TermIterator iter = pb.iterator();
Iterator<Term> expected = terms.iterator();
+ assertEquals(terms.size(), pb.size());
//System.out.println("TEST: now iter");
while (iter.next() != null) {
assertTrue(expected.hasNext());
Modified: lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java Thu May 21 20:16:44 2015
@@ -18,22 +18,29 @@ package org.apache.lucene.queries;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
+import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
@@ -41,6 +48,7 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
@@ -58,21 +66,23 @@ import org.apache.lucene.util.ToStringUt
* would yield the same scores:
* <pre class="prettyprint">
* Query q1 = new TermsQuery(new Term("field", "foo"), new Term("field", "bar"));
- *
+ *
* BooleanQuery bq = new BooleanQuery();
* bq.add(new TermQuery(new Term("field", "foo")), Occur.SHOULD);
* bq.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD);
* Query q2 = new ConstantScoreQuery(bq);
* </pre>
- * <p>This query creates a bit set and sets bits that match any of the
- * wrapped terms. While this might help performance when there are many terms,
- * it would be slower than a {@link BooleanQuery} when there are few terms to
- * match.
+ * <p>When there are few terms, this query executes like a regular disjunction.
+ * However, when there are many terms, instead of merging iterators on the fly,
+ * it will populate a bit set with matching docs and return a {@link Scorer}
+ * over this bit set.
* <p>NOTE: This query produces scores that are equal to its boost
*/
public class TermsQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsQuery.class);
+ // Same threshold as MultiTermQueryConstantScoreWrapper
+ static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
private final PrefixCodedTerms termData;
private final int termDataHashCode; // cached hashcode of termData
@@ -131,6 +141,23 @@ public class TermsQuery extends Query im
}
@Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
+ if (termData.size() <= threshold) {
+ BooleanQuery bq = new BooleanQuery();
+ TermIterator iterator = termData.iterator();
+ for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+ bq.add(new TermQuery(new Term(iterator.field(), BytesRef.deepCopyOf(term))), Occur.SHOULD);
+ }
+ assert bq.clauses().size() == termData.size();
+ ConstantScoreQuery csq = new ConstantScoreQuery(bq);
+ csq.setBoost(getBoost());
+ return csq;
+ }
+ return super.rewrite(reader);
+ }
+
+ @Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
@@ -177,8 +204,41 @@ public class TermsQuery extends Query im
return Collections.emptyList();
}
+ private static class TermAndState {
+ final String field;
+ final TermsEnum termsEnum;
+ final BytesRef term;
+ final TermState state;
+ final int docFreq;
+ final long totalTermFreq;
+
+ TermAndState(String field, TermsEnum termsEnum) throws IOException {
+ this.field = field;
+ this.termsEnum = termsEnum;
+ this.term = BytesRef.deepCopyOf(termsEnum.term());
+ this.state = termsEnum.termState();
+ this.docFreq = termsEnum.docFreq();
+ this.totalTermFreq = termsEnum.totalTermFreq();
+ }
+ }
+
+ private static class WeightOrBitSet {
+ final Weight weight;
+ final BitDocIdSet bitset;
+
+ WeightOrBitSet(Weight weight) {
+ this.weight = Objects.requireNonNull(weight);
+ this.bitset = null;
+ }
+
+ WeightOrBitSet(BitDocIdSet bitset) {
+ this.bitset = bitset;
+ this.weight = null;
+ }
+ }
+
@Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ public Weight createWeight(final IndexSearcher searcher, final boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
@Override
@@ -189,10 +249,20 @@ public class TermsQuery extends Query im
// order to protect highlighters
}
- @Override
- public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ /**
+ * On the given leaf context, try to either rewrite to a disjunction if
+ * there are few matching terms, or build a bitset containing matching docs.
+ */
+ private WeightOrBitSet rewrite(LeafReaderContext context, Bits acceptDocs) throws IOException {
final LeafReader reader = context.reader();
- BitDocIdSet.Builder builder = new BitDocIdSet.Builder(reader.maxDoc());
+
+ // We will first try to collect up to 'threshold' terms into 'matchingTerms'
+ // if there are two many terms, we will fall back to building the 'builder'
+ final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
+ assert termData.size() > threshold : "Query should have been rewritten";
+ List<TermAndState> matchingTerms = new ArrayList<>(threshold);
+ BitDocIdSet.Builder builder = null;
+
final Fields fields = reader.fields();
String lastField = null;
Terms terms = null;
@@ -209,24 +279,79 @@ public class TermsQuery extends Query im
} else {
termsEnum = terms.iterator();
}
+ lastField = field;
}
if (termsEnum != null && termsEnum.seekExact(term)) {
- docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
- builder.or(docs);
+ if (matchingTerms == null) {
+ docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
+ builder.or(docs);
+ } else if (matchingTerms.size() < threshold) {
+ matchingTerms.add(new TermAndState(field, termsEnum));
+ } else {
+ assert matchingTerms.size() == threshold;
+ builder = new BitDocIdSet.Builder(reader.maxDoc());
+ docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
+ builder.or(docs);
+ for (TermAndState t : matchingTerms) {
+ t.termsEnum.seekExact(t.term, t.state);
+ docs = t.termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
+ builder.or(docs);
+ }
+ matchingTerms = null;
+ }
}
}
- BitDocIdSet result = builder.build();
- if (result == null) {
- return null;
+ if (matchingTerms != null) {
+ assert builder == null;
+ BooleanQuery bq = new BooleanQuery();
+ for (TermAndState t : matchingTerms) {
+ final TermContext termContext = new TermContext(searcher.getTopReaderContext());
+ termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
+ bq.add(new TermQuery(new Term(t.field, t.term), termContext), Occur.SHOULD);
+ }
+ Query q = new ConstantScoreQuery(bq);
+ q.setBoost(score());
+ return new WeightOrBitSet(searcher.rewrite(q).createWeight(searcher, needsScores));
+ } else {
+ assert builder != null;
+ return new WeightOrBitSet(builder.build());
}
+ }
- final DocIdSetIterator disi = result.iterator();
+ private Scorer scorer(BitDocIdSet set) {
+ if (set == null) {
+ return null;
+ }
+ final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
-
return new ConstantScoreScorer(this, score(), disi);
}
+
+ @Override
+ public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ final WeightOrBitSet weightOrBitSet = rewrite(context, acceptDocs);
+ if (weightOrBitSet.weight != null) {
+ return weightOrBitSet.weight.bulkScorer(context, acceptDocs);
+ } else {
+ final Scorer scorer = scorer(weightOrBitSet.bitset);
+ if (scorer == null) {
+ return null;
+ }
+ return new DefaultBulkScorer(scorer);
+ }
+ }
+
+ @Override
+ public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ final WeightOrBitSet weightOrBitSet = rewrite(context, acceptDocs);
+ if (weightOrBitSet.weight != null) {
+ return weightOrBitSet.weight.scorer(context, acceptDocs);
+ } else {
+ return scorer(weightOrBitSet.bitset);
+ }
+ }
};
}
}
Modified: lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java?rev=1680948&r1=1680947&r2=1680948&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java Thu May 21 20:16:44 2015
@@ -25,13 +25,21 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
@@ -48,6 +56,7 @@ import org.apache.lucene.util.LuceneTest
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
public class TermsQueryTest extends LuceneTestCase {
@@ -223,4 +232,94 @@ public class TermsQueryTest extends Luce
// error margin within 5%
assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 20);
}
+
+ private static class TermsCountingDirectoryReaderWrapper extends FilterDirectoryReader {
+
+ private final AtomicInteger counter;
+
+ public TermsCountingDirectoryReaderWrapper(DirectoryReader in, AtomicInteger counter) throws IOException {
+ super(in, new TermsCountingSubReaderWrapper(counter));
+ this.counter = counter;
+ }
+
+ private static class TermsCountingSubReaderWrapper extends SubReaderWrapper {
+ private final AtomicInteger counter;
+
+ public TermsCountingSubReaderWrapper(AtomicInteger counter) {
+ this.counter = counter;
+ }
+
+ @Override
+ public LeafReader wrap(LeafReader reader) {
+ return new TermsCountingLeafReaderWrapper(reader, counter);
+ }
+ }
+
+ private static class TermsCountingLeafReaderWrapper extends FilterLeafReader {
+
+ private final AtomicInteger counter;
+
+ public TermsCountingLeafReaderWrapper(LeafReader in, AtomicInteger counter) {
+ super(in);
+ this.counter = counter;
+ }
+
+ @Override
+ public Fields fields() throws IOException {
+ return new FilterFields(in.fields()) {
+ @Override
+ public Terms terms(String field) throws IOException {
+ final Terms in = this.in.terms(field);
+ if (in == null) {
+ return null;
+ }
+ return new FilterTerms(in) {
+ @Override
+ public TermsEnum iterator() throws IOException {
+ counter.incrementAndGet();
+ return super.iterator();
+ }
+ };
+ }
+ };
+ }
+
+ }
+
+ @Override
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
+ return new TermsCountingDirectoryReaderWrapper(in, counter);
+ }
+
+ }
+
+ public void testPullOneTermsEnumPerField() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ doc.add(new StringField("foo", "1", Store.NO));
+ doc.add(new StringField("bar", "2", Store.NO));
+ doc.add(new StringField("baz", "3", Store.NO));
+ w.addDocument(doc);
+ DirectoryReader reader = w.getReader();
+ w.close();
+ final AtomicInteger counter = new AtomicInteger();
+ DirectoryReader wrapped = new TermsCountingDirectoryReaderWrapper(reader, counter);
+
+ final List<Term> terms = new ArrayList<>();
+ final Set<String> fields = new HashSet<>();
+ // enough terms to avoid the rewrite
+ final int numTerms = TestUtil.nextInt(random(), TermsQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD + 1, 100);
+ for (int i = 0; i < numTerms; ++i) {
+ final String field = RandomPicks.randomFrom(random(), new String[] {"foo", "bar", "baz"});
+ final BytesRef term = new BytesRef(RandomStrings.randomUnicodeOfCodepointLength(random(), 10));
+ fields.add(field);
+ terms.add(new Term(field, term));
+ }
+
+ new IndexSearcher(wrapped).count(new TermsQuery(terms));
+ assertEquals(fields.size(), counter.get());
+ wrapped.close();
+ dir.close();
+ }
}