You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/05/21 14:30:03 UTC
svn commit: r1680856 - in /lucene/dev/trunk/lucene:
codecs/src/test/org/apache/lucene/codecs/autoprefix/
core/src/java/org/apache/lucene/codecs/
core/src/java/org/apache/lucene/index/
core/src/java/org/apache/lucene/search/ core/src/test/org/apache/luc...
Author: jpountz
Date: Thu May 21 12:30:02 2015
New Revision: 1680856
URL: http://svn.apache.org/r1680856
Log:
LUCENE-6491: Forbid term queries on fake terms for now.
Modified:
lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermState.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
Modified: lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/TestAutoPrefixTerms.java Thu May 21 12:30:02 2015
@@ -17,7 +17,9 @@ package org.apache.lucene.codecs.autopre
* limitations under the License.
*/
+import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -46,14 +48,19 @@ import org.apache.lucene.index.MultiDocV
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
@@ -494,7 +501,6 @@ public class TestAutoPrefixTerms extends
// 1 document has exactly "a", and 30 documents had "a?"
verifier.finish(31, maxTermsAutoPrefix);
PrefixQuery q = new PrefixQuery(new Term("field", "a"));
- q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
assertEquals(31, newSearcher(r).search(q, 1).totalHits);
r.close();
w.close();
@@ -746,4 +752,247 @@ public class TestAutoPrefixTerms extends
w.close();
dir.close();
}
+
+ /** Make sure auto prefix terms are used with TermRangeQuery */
+ public void testTermRange() throws Exception {
+
+ List<String> prefixes = new ArrayList<>();
+ for(int i=1;i<5;i++) {
+ char[] chars = new char[i];
+ Arrays.fill(chars, 'a');
+ prefixes.add(new String(chars));
+ }
+
+ Set<String> randomTerms = new HashSet<>();
+ int numTerms = atLeast(10000);
+ while (randomTerms.size() < numTerms) {
+ for(String prefix : prefixes) {
+ randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
+ }
+ }
+
+ // We make term range aa<start> - aa<end>
+ char start;
+ char end;
+
+ int actualCount;
+ boolean startInclusive = random().nextBoolean();
+ boolean endInclusive = random().nextBoolean();
+ String startTerm;
+ String endTerm;
+
+ while (true) {
+ start = (char) TestUtil.nextInt(random(), 'a', 'm');
+ end = (char) TestUtil.nextInt(random(), start+1, 'z');
+
+ actualCount = 0;
+
+ startTerm = "aa" + start;
+ endTerm = "aa" + end;
+
+ for(String term : randomTerms) {
+ int cmpStart = startTerm.compareTo(term);
+ int cmpEnd = endTerm.compareTo(term);
+ if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
+ (cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
+ actualCount++;
+ }
+ }
+
+ if (actualCount > 2000) {
+ break;
+ }
+ }
+
+ if (VERBOSE) {
+ System.out.println("start " + startTerm + " inclusive? " + startInclusive);
+ System.out.println("end " + endTerm + " inclusive? " + endInclusive);
+ System.out.println("actual count " + actualCount);
+ }
+
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
+ int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
+
+ int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
+ int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+ if (VERBOSE) {
+ System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
+ System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
+ }
+
+ iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
+ minTermsAutoPrefix, maxTermsAutoPrefix)));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ if (VERBOSE) {
+ System.out.println("TEST: index terms");
+ }
+ for (String term : randomTerms) {
+ Document doc = new Document();
+ doc.add(new StringField("field", term, Field.Store.NO));
+ w.addDocument(doc);
+ if (VERBOSE) {
+ System.out.println(" " + term);
+ }
+ }
+
+ if (VERBOSE) {
+ System.out.println("TEST: now force merge");
+ }
+
+ w.forceMerge(1);
+ IndexReader r = w.getReader();
+ final Terms terms = MultiFields.getTerms(r, "field");
+ IndexSearcher s = new IndexSearcher(r);
+ final int finalActualCount = actualCount;
+ if (VERBOSE) {
+ System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
+ }
+ TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
+ public TermRangeQuery checkTerms() throws IOException {
+ TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
+ int count = 0;
+ while (termsEnum.next() != null) {
+ if (VERBOSE) {
+ System.out.println("got term: " + termsEnum.term().utf8ToString());
+ }
+ count++;
+ }
+ if (VERBOSE) {
+ System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
+ }
+
+ // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
+ assertTrue(count < finalActualCount);
+
+ return this;
+ }
+ }.checkTerms();
+
+ int maxClauseCount = BooleanQuery.getMaxClauseCount();
+
+ try {
+
+ // TODO test with boolean rewrite as well once we can create term
+ // queries on fake terms
+ /*if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+ BooleanQuery.setMaxClauseCount(actualCount);
+ } else if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+ BooleanQuery.setMaxClauseCount(actualCount);
+ }*/
+
+ if (VERBOSE) {
+ System.out.println("TEST: use rewrite method " + q.getRewriteMethod());
+ }
+ assertEquals(actualCount, s.search(q, 1).totalHits);
+ } finally {
+ BooleanQuery.setMaxClauseCount(maxClauseCount);
+ }
+
+ // Test when min == max:
+ List<String> randomTermsList = new ArrayList<>(randomTerms);
+ for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+ String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
+ q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
+ assertEquals(1, s.search(q, 1).totalHits);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+
+ /** Make sure auto prefix terms are used with PrefixQuery. */
+ public void testPrefixQuery() throws Exception {
+
+ List<String> prefixes = new ArrayList<>();
+ for(int i=1;i<5;i++) {
+ char[] chars = new char[i];
+ Arrays.fill(chars, 'a');
+ prefixes.add(new String(chars));
+ }
+
+ Set<String> randomTerms = new HashSet<>();
+ int numTerms = atLeast(10000);
+ while (randomTerms.size() < numTerms) {
+ for(String prefix : prefixes) {
+ randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
+ }
+ }
+
+ int actualCount = 0;
+ for(String term : randomTerms) {
+ if (term.startsWith("aa")) {
+ actualCount++;
+ }
+ }
+
+ //System.out.println("actual count " + actualCount);
+
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
+ int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
+
+ // As long as this is never > actualCount, aa should always see at least one auto-prefix term:
+ int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
+ int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+ iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
+ minTermsAutoPrefix, maxTermsAutoPrefix)));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ for (String term : randomTerms) {
+ Document doc = new Document();
+ doc.add(new StringField("field", term, Field.Store.NO));
+ w.addDocument(doc);
+ }
+
+ w.forceMerge(1);
+ IndexReader r = w.getReader();
+ final Terms terms = MultiFields.getTerms(r, "field");
+ IndexSearcher s = new IndexSearcher(r);
+ final int finalActualCount = actualCount;
+ PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
+ public PrefixQuery checkTerms() throws IOException {
+ TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
+ int count = 0;
+ while (termsEnum.next() != null) {
+ //System.out.println("got term: " + termsEnum.term().utf8ToString());
+ count++;
+ }
+
+ // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
+ assertTrue(count < finalActualCount);
+
+ return this;
+ }
+ }.checkTerms();
+
+ int x = BooleanQuery.getMaxClauseCount();
+ try {
+ // TODO test with boolean rewrite as well once we can create term
+ // queries on fake terms
+ /*BooleanQuery.setMaxClauseCount(randomTerms.size());
+ if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+ } else if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+ }*/
+
+ assertEquals(actualCount, s.search(q, 1).totalHits);
+ } finally {
+ BooleanQuery.setMaxClauseCount(x);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Thu May 21 12:30:02 2015
@@ -42,7 +42,7 @@ public class BlockTermState extends OrdT
/** True if this term is "real" (e.g., not an auto-prefix term or
* some other "secret" term; currently only {@link BlockTreeTermsReader}
* sets this). */
- public boolean isRealTerm;
+ public boolean isRealTerm = true;
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
@@ -62,6 +62,11 @@ public class BlockTermState extends OrdT
}
@Override
+ public boolean isRealTerm() {
+ return isRealTerm;
+ }
+
+ @Override
public String toString() {
return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer + " isRealTerm=" + isRealTerm;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java Thu May 21 12:30:02 2015
@@ -182,12 +182,11 @@ public final class TermContext {
*
* @lucene.internal */
public boolean hasOnlyRealTerms() {
- for(TermState termState : states) {
- if (termState instanceof BlockTermState && ((BlockTermState) termState).isRealTerm == false) {
+ for (TermState termState : states) {
+ if (termState != null && termState.isRealTerm() == false) {
return false;
}
}
-
return true;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermState.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermState.java Thu May 21 12:30:02 2015
@@ -50,6 +50,12 @@ public abstract class TermState implemen
}
}
+ /** Returns true if this term is real (e.g., not an auto-prefix term).
+ * @lucene.internal */
+ public boolean isRealTerm() {
+ return true;
+ }
+
@Override
public String toString() {
return "TermState";
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java Thu May 21 12:30:02 2015
@@ -106,7 +106,12 @@ final class MultiTermQueryConstantScoreW
if (term == null) {
return true;
}
- terms.add(new TermAndState(BytesRef.deepCopyOf(term), termsEnum.termState(), termsEnum.docFreq(), termsEnum.totalTermFreq()));
+ TermState state = termsEnum.termState();
+ if (state.isRealTerm() == false) {
+ // TermQuery does not accept fake terms for now
+ return false;
+ }
+ terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
}
return termsEnum.next() == null;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TermQuery.java Thu May 21 12:30:02 2015
@@ -54,6 +54,8 @@ public class TermQuery extends Query {
super(TermQuery.this);
this.needsScores = needsScores;
assert termStates != null : "TermContext must not be null";
+ // checked with a real exception in TermQuery constructor
+ assert termStates.hasOnlyRealTerms();
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
@@ -164,6 +166,12 @@ public class TermQuery extends Query {
public TermQuery(Term t, TermContext states) {
assert states != null;
term = Objects.requireNonNull(t);
+ if (states.hasOnlyRealTerms() == false) {
+ // The reason for this is that fake terms might have the same bytes as
+ // real terms, and this confuses query caching because they don't match
+ // the same documents
+ throw new IllegalArgumentException("Term queries must be created on real terms");
+ }
perReaderTermState = Objects.requireNonNull(states);
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java Thu May 21 12:30:02 2015
@@ -17,32 +17,23 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
-import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
-import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
@@ -84,92 +75,6 @@ public class TestPrefixQuery extends Luc
directory.close();
}
- /** Make sure auto prefix terms are used with PrefixQuery. */
- public void testAutoPrefixTermsKickIn() throws Exception {
-
- List<String> prefixes = new ArrayList<>();
- for(int i=1;i<5;i++) {
- char[] chars = new char[i];
- Arrays.fill(chars, 'a');
- prefixes.add(new String(chars));
- }
-
- Set<String> randomTerms = new HashSet<>();
- int numTerms = atLeast(10000);
- while (randomTerms.size() < numTerms) {
- for(String prefix : prefixes) {
- randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
- }
- }
-
- int actualCount = 0;
- for(String term : randomTerms) {
- if (term.startsWith("aa")) {
- actualCount++;
- }
- }
-
- //System.out.println("actual count " + actualCount);
-
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
- int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
-
- // As long as this is never > actualCount, aa should always see at least one auto-prefix term:
- int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
- int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
-
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
- minTermsAutoPrefix, maxTermsAutoPrefix)));
- RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-
- for (String term : randomTerms) {
- Document doc = new Document();
- doc.add(new StringField("field", term, Field.Store.NO));
- w.addDocument(doc);
- }
-
- w.forceMerge(1);
- IndexReader r = w.getReader();
- final Terms terms = MultiFields.getTerms(r, "field");
- IndexSearcher s = new IndexSearcher(r);
- final int finalActualCount = actualCount;
- PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
- public PrefixQuery checkTerms() throws IOException {
- TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
- int count = 0;
- while (termsEnum.next() != null) {
- //System.out.println("got term: " + termsEnum.term().utf8ToString());
- count++;
- }
-
- // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
- assertTrue(count < finalActualCount);
-
- return this;
- }
- }.checkTerms();
-
- int x = BooleanQuery.getMaxClauseCount();
- try {
- BooleanQuery.setMaxClauseCount(randomTerms.size());
- if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
- } else if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
- }
-
- assertEquals(actualCount, s.search(q, 1).totalHits);
- } finally {
- BooleanQuery.setMaxClauseCount(x);
- }
-
- r.close();
- w.close();
- dir.close();
- }
-
public void testMatchAll() throws Exception {
Directory directory = newDirectory();
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1680856&r1=1680855&r2=1680856&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java Thu May 21 12:30:02 2015
@@ -18,32 +18,21 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
import java.util.Set;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
public class TestTermRangeQuery extends LuceneTestCase {
@@ -348,155 +337,4 @@ public class TestTermRangeQuery extends
reader.close();
}
- /** Make sure auto prefix terms are used with TermRangeQuery */
- public void testAutoPrefixTermsKickIn() throws Exception {
-
- List<String> prefixes = new ArrayList<>();
- for(int i=1;i<5;i++) {
- char[] chars = new char[i];
- Arrays.fill(chars, 'a');
- prefixes.add(new String(chars));
- }
-
- Set<String> randomTerms = new HashSet<>();
- int numTerms = atLeast(10000);
- while (randomTerms.size() < numTerms) {
- for(String prefix : prefixes) {
- randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
- }
- }
-
- // We make term range aa<start> - aa<end>
- char start;
- char end;
-
- int actualCount;
- boolean startInclusive = random().nextBoolean();
- boolean endInclusive = random().nextBoolean();
- String startTerm;
- String endTerm;
-
- while (true) {
- start = (char) TestUtil.nextInt(random(), 'a', 'm');
- end = (char) TestUtil.nextInt(random(), start+1, 'z');
-
- actualCount = 0;
-
- startTerm = "aa" + start;
- endTerm = "aa" + end;
-
- for(String term : randomTerms) {
- int cmpStart = startTerm.compareTo(term);
- int cmpEnd = endTerm.compareTo(term);
- if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
- (cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
- actualCount++;
- }
- }
-
- if (actualCount > 2000) {
- break;
- }
- }
-
- if (VERBOSE) {
- System.out.println("start " + startTerm + " inclusive? " + startInclusive);
- System.out.println("end " + endTerm + " inclusive? " + endInclusive);
- System.out.println("actual count " + actualCount);
- }
-
- Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
- int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
- int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
-
- int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
- int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
-
- if (VERBOSE) {
- System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
- System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
- }
-
- iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
- minTermsAutoPrefix, maxTermsAutoPrefix)));
- RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-
- if (VERBOSE) {
- System.out.println("TEST: index terms");
- }
- for (String term : randomTerms) {
- Document doc = new Document();
- doc.add(new StringField("field", term, Field.Store.NO));
- w.addDocument(doc);
- if (VERBOSE) {
- System.out.println(" " + term);
- }
- }
-
- if (VERBOSE) {
- System.out.println("TEST: now force merge");
- }
-
- w.forceMerge(1);
- IndexReader r = w.getReader();
- final Terms terms = MultiFields.getTerms(r, "field");
- IndexSearcher s = new IndexSearcher(r);
- final int finalActualCount = actualCount;
- if (VERBOSE) {
- System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
- }
- TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
- public TermRangeQuery checkTerms() throws IOException {
- TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
- int count = 0;
- while (termsEnum.next() != null) {
- if (VERBOSE) {
- System.out.println("got term: " + termsEnum.term().utf8ToString());
- }
- count++;
- }
- if (VERBOSE) {
- System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
- }
-
- // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
- assertTrue(count < finalActualCount);
-
- return this;
- }
- }.checkTerms();
-
- int maxClauseCount = BooleanQuery.getMaxClauseCount();
-
- try {
-
- if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
- BooleanQuery.setMaxClauseCount(actualCount);
- } else if (random().nextBoolean()) {
- q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
- BooleanQuery.setMaxClauseCount(actualCount);
- }
-
- if (VERBOSE) {
- System.out.println("TEST: use rewrite method " + q.getRewriteMethod());
- }
- assertEquals(actualCount, s.search(q, 1).totalHits);
- } finally {
- BooleanQuery.setMaxClauseCount(maxClauseCount);
- }
-
- // Test when min == max:
- List<String> randomTermsList = new ArrayList<>(randomTerms);
- for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
- String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
- q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
- assertEquals(1, s.search(q, 1).totalHits);
- }
-
- r.close();
- w.close();
- dir.close();
- }
}