You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by do...@apache.org on 2007/12/30 22:19:18 UTC
svn commit: r607591 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/queryParser/
src/test/org/apache/lucene/analysis/
src/test/org/apache/lucene/queryParser/ src/test/org/apache/lucene/search/
Author: doronc
Date: Sun Dec 30 13:19:17 2007
New Revision: 607591
URL: http://svn.apache.org/viewvc?rev=607591&view=rev
Log:
LUCENE-1095: option added to StopFilter and QueryParser to consider position increments.
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Dec 30 13:19:17 2007
@@ -266,6 +266,14 @@
11. LUCENE-1019: CustomScoreQuery enhanced to support multiple
ValueSource queries. (Kyle Maxwell via Doron Cohen)
+
+12. LUCENE-1095: Added an option to StopFilter to increase
+ positionIncrement of the token succeeding a stopped token.
+ Disabled by default. Similar option added to QueryParser
+ to consider token positions when creating PhraseQuery
+ and MultiPhraseQuery. Disabled by default (so by default
+ the query parser ignores position increments).
+ (Doron Cohen)
Optimizations
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java Sun Dec 30 13:19:17 2007
@@ -27,7 +27,10 @@
public final class StopFilter extends TokenFilter {
+ private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
+
private final CharArraySet stopWords;
+ private boolean enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
/**
* Construct a token stream filtering the given input.
@@ -111,11 +114,58 @@
*/
public final Token next(Token result) throws IOException {
// return the first non-stop word found
+ int skippedPositions = 0;
while((result = input.next(result)) != null) {
- if (!stopWords.contains(result.termBuffer(), 0, result.termLength))
+ if (!stopWords.contains(result.termBuffer(), 0, result.termLength)) {
+ if (enablePositionIncrements) {
+ result.setPositionIncrement(result.getPositionIncrement() + skippedPositions);
+ }
return result;
+ }
+ skippedPositions += result.getPositionIncrement();
}
// reached EOS -- return null
return null;
+ }
+
+ /**
+ * @see #setEnablePositionIncrementsDefault(boolean).
+ */
+ public static boolean getEnablePositionIncrementsDefault() {
+ return ENABLE_POSITION_INCREMENTS_DEFAULT;
+ }
+
+ /**
+ * Set the default position increments behavior of every StopFilter created from now on.
+ * <p>
+ * Note: behavior of a single StopFilter instance can be modified
+ * with {@link #setEnablePositionIncrements(boolean)}.
+ * This static method allows control over behavior of classes using StopFilters internally,
+ * for example {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer}.
+ * <p>
+ * Default : false.
+ * @see #setEnablePositionIncrements(boolean).
+ */
+ public static void setEnablePositionIncrementsDefault(boolean defaultValue) {
+ ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
+ }
+
+ /**
+ * @see #setEnablePositionIncrements(boolean).
+ */
+ public boolean getEnablePositionIncrements() {
+ return enablePositionIncrements;
+ }
+
+ /**
+ * Set to <code>true</code> to make <b>this</b> StopFilter enable position increments to result tokens.
+ * <p>
+ * When set, when a token is stopped (omitted), the position increment of
+ * the following token is incremented.
+ * <p>
+ * Default: see {@link #setEnablePositionIncrementsDefault(boolean)}.
+ */
+ public void setEnablePositionIncrements(boolean enable) {
+ this.enablePositionIncrements = enable;
}
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java Sun Dec 30 13:19:17 2007
@@ -100,6 +100,7 @@
boolean lowercaseExpandedTerms = true;
boolean useOldRangeQuery= false;
boolean allowLeadingWildcard = false;
+ boolean enablePositionIncrements = false;
Analyzer analyzer;
String field;
@@ -234,13 +235,34 @@
}
/**
- * @see #setAllowLeadingWildcard
+ * @see #setAllowLeadingWildcard(boolean)
*/
public boolean getAllowLeadingWildcard() {
return allowLeadingWildcard;
}
/**
+ * Set to <code>true</code> to enable position increments in result query.
+ * <p>
+ * When set, result phrase and multi-phrase queries will
+ * be aware of position increments.
+ * Useful when e.g. a StopFilter increases the position increment of
+ * the token that follows an omitted token.
+ * <p>
+ * Default: false.
+ */
+ public void setEnablePositionIncrements(boolean enable) {
+ this.enablePositionIncrements = enable;
+ }
+
+ /**
+ * @see #setEnablePositionIncrements(boolean)
+ */
+ public boolean getEnablePositionIncrements() {
+ return enablePositionIncrements;
+ }
+
+ /**
* Sets the boolean operator of the QueryParser.
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
@@ -478,27 +500,42 @@
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
+ int position = -1;
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
- if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
- mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+ if (enablePositionIncrements) {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+ } else {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ }
multiTerms.clear();
}
+ position += t.getPositionIncrement();
multiTerms.add(new Term(field, t.termText()));
}
- mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ if (enablePositionIncrements) {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+ } else {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ }
return mpq;
}
}
else {
- PhraseQuery q = new PhraseQuery();
- q.setSlop(phraseSlop);
+ PhraseQuery pq = new PhraseQuery();
+ pq.setSlop(phraseSlop);
+ int position = -1;
for (int i = 0; i < v.size(); i++) {
- q.add(new Term(field, ((org.apache.lucene.analysis.Token)
- v.elementAt(i)).termText()));
-
+ t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+ if (enablePositionIncrements) {
+ position += t.getPositionIncrement();
+ pq.add(new Term(field, t.termText()),position);
+ } else {
+ pq.add(new Term(field, t.termText()));
+ }
}
- return q;
+ return pq;
}
}
}
@@ -1262,12 +1299,6 @@
finally { jj_save(0, xla); }
}
- final private boolean jj_3R_3() {
- if (jj_scan_token(STAR)) return true;
- if (jj_scan_token(COLON)) return true;
- return false;
- }
-
final private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@@ -1281,6 +1312,12 @@
jj_scanpos = xsp;
if (jj_3R_3()) return true;
}
+ return false;
+ }
+
+ final private boolean jj_3R_3() {
+ if (jj_scan_token(STAR)) return true;
+ if (jj_scan_token(COLON)) return true;
return false;
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Sun Dec 30 13:19:17 2007
@@ -124,6 +124,7 @@
boolean lowercaseExpandedTerms = true;
boolean useOldRangeQuery= false;
boolean allowLeadingWildcard = false;
+ boolean enablePositionIncrements = false;
Analyzer analyzer;
String field;
@@ -258,13 +259,34 @@
}
/**
- * @see #setAllowLeadingWildcard
+ * @see #setAllowLeadingWildcard(boolean)
*/
public boolean getAllowLeadingWildcard() {
return allowLeadingWildcard;
}
/**
+ * Set to <code>true</code> to enable position increments in result query.
+ * <p>
+ * When set, result phrase and multi-phrase queries will
+ * be aware of position increments.
+ * Useful when e.g. a StopFilter increases the position increment of
+ * the token that follows an omitted token.
+ * <p>
+ * Default: false.
+ */
+ public void setEnablePositionIncrements(boolean enable) {
+ this.enablePositionIncrements = enable;
+ }
+
+ /**
+ * @see #setEnablePositionIncrements(boolean)
+ */
+ public boolean getEnablePositionIncrements() {
+ return enablePositionIncrements;
+ }
+
+ /**
* Sets the boolean operator of the QueryParser.
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
@@ -502,27 +524,42 @@
MultiPhraseQuery mpq = new MultiPhraseQuery();
mpq.setSlop(phraseSlop);
List multiTerms = new ArrayList();
+ int position = -1;
for (int i = 0; i < v.size(); i++) {
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
- if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
- mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+ if (enablePositionIncrements) {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+ } else {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ }
multiTerms.clear();
}
+ position += t.getPositionIncrement();
multiTerms.add(new Term(field, t.termText()));
}
- mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ if (enablePositionIncrements) {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+ } else {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ }
return mpq;
}
}
else {
- PhraseQuery q = new PhraseQuery();
- q.setSlop(phraseSlop);
+ PhraseQuery pq = new PhraseQuery();
+ pq.setSlop(phraseSlop);
+ int position = -1;
for (int i = 0; i < v.size(); i++) {
- q.add(new Term(field, ((org.apache.lucene.analysis.Token)
- v.elementAt(i)).termText()));
-
+ t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+ if (enablePositionIncrements) {
+ position += t.getPositionIncrement();
+ pq.add(new Term(field, t.termText()),position);
+ } else {
+ pq.add(new Term(field, t.termText()));
+ }
}
- return q;
+ return pq;
}
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java Sun Dec 30 13:19:17 2007
@@ -64,7 +64,33 @@
while ((token = stream.next()) != null) {
String text = token.termText();
assertFalse(stopWordsSet.contains(text));
+ assertEquals(1,token.getPositionIncrement()); // by default stop tokenizer does not apply increments.
}
}
-
+
+ public void testStopListPositions() throws IOException {
+ boolean defaultEnable = StopFilter.getEnablePositionIncrementsDefault();
+ StopFilter.setEnablePositionIncrementsDefault(true);
+ try {
+ Set stopWordsSet = new HashSet();
+ stopWordsSet.add("good");
+ stopWordsSet.add("test");
+ stopWordsSet.add("analyzer");
+ StopAnalyzer newStop = new StopAnalyzer((String[])stopWordsSet.toArray(new String[3]));
+ StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
+ int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
+ TokenStream stream = newStop.tokenStream("test", reader);
+ assertNotNull(stream);
+ Token token = null;
+ int i = 0;
+ while ((token = stream.next()) != null) {
+ String text = token.termText();
+ assertFalse(stopWordsSet.contains(text));
+ assertEquals(expectedIncr[i++],token.getPositionIncrement());
+ }
+ } finally {
+ StopFilter.setEnablePositionIncrementsDefault(defaultEnable);
+ }
+ }
+
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java Sun Dec 30 13:19:17 2007
@@ -16,10 +16,12 @@
* limitations under the License.
*/
+import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.StringReader;
+import java.util.ArrayList;
import java.util.Set;
/**
@@ -27,6 +29,8 @@
*/
public class TestStopFilter extends LuceneTestCase {
+ private final static boolean VERBOSE = false;
+
// other StopFilter functionality is already tested by TestStopAnalyzer
public void testExactCase() throws IOException {
@@ -56,4 +60,69 @@
assertEquals(null, stream.next());
}
+ /**
+ * Test Position increments applied by StopFilter with and without enabling this option.
+ */
+ public void testStopPositons() throws IOException {
+ StringBuffer sb = new StringBuffer();
+ ArrayList a = new ArrayList();
+ for (int i=0; i<20; i++) {
+ String w = English.intToEnglish(i).trim();
+ sb.append(w).append(" ");
+ if (i%3 != 0) a.add(w);
+ }
+ log(sb.toString());
+ String stopWords[] = (String[]) a.toArray(new String[0]);
+ for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
+ Set stopSet = StopFilter.makeStopSet(stopWords);
+ // with increments
+ StringReader reader = new StringReader(sb.toString());
+ StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+ doTestStopPositons(stpf,true);
+ // without increments
+ reader = new StringReader(sb.toString());
+ stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+ doTestStopPositons(stpf,false);
+ // with increments, concatenating two stop filters
+ ArrayList a0 = new ArrayList();
+ ArrayList a1 = new ArrayList();
+ for (int i=0; i<a.size(); i++) {
+ if (i%2==0) {
+ a0.add(a.get(i));
+ } else {
+ a1.add(a.get(i));
+ }
+ }
+ String stopWords0[] = (String[]) a0.toArray(new String[0]);
+ for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
+ String stopWords1[] = (String[]) a1.toArray(new String[0]);
+ for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
+ Set stopSet0 = StopFilter.makeStopSet(stopWords0);
+ Set stopSet1 = StopFilter.makeStopSet(stopWords1);
+ reader = new StringReader(sb.toString());
+ StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
+ stpf0.setEnablePositionIncrements(true);
+ StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
+ doTestStopPositons(stpf01,true);
+ }
+
+ private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
+ log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
+ stpf.setEnablePositionIncrements(enableIcrements);
+ for (int i=0; i<20; i+=3) {
+ Token t = stpf.next();
+ log("Token "+i+": "+t);
+ String w = English.intToEnglish(i).trim();
+ assertEquals("expecting token "+i+" to be "+w,w,t.termText());
+ assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,t.getPositionIncrement());
+ }
+ assertNull(stpf.next());
+ }
+
+ // print debug info depending on VERBOSE
+ private static void log(String s) {
+ if (VERBOSE) {
+ System.out.println(s);
+ }
+ }
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Sun Dec 30 13:19:17 2007
@@ -838,17 +838,40 @@
public void testStopwords() throws Exception {
QueryParser qp = new QueryParser("a", new StopAnalyzer(new String[]{"the", "foo"}));
Query result = qp.parse("a:the OR a:foo");
- assertTrue("result is null and it shouldn't be", result != null);
+ assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
result = qp.parse("a:woo OR a:the");
- assertTrue("result is null and it shouldn't be", result != null);
+ assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a TermQuery", result instanceof TermQuery);
result = qp.parse("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)");
- assertTrue("result is null and it shouldn't be", result != null);
+ assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
System.out.println("Result: " + result);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
+ }
+
+ public void testPositionIncrement() throws Exception {
+ boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
+ StopFilter.setEnablePositionIncrementsDefault(true);
+ try {
+ QueryParser qp = new QueryParser("a", new StopAnalyzer(new String[]{"the", "in", "are", "this"}));
+ qp.setEnablePositionIncrements(true);
+ String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
+ // 0 2 5 7 8
+ int expectedPositions[] = {1,3,4,6,9};
+ PhraseQuery pq = (PhraseQuery) qp.parse(qtxt);
+ //System.out.println("Query text: "+qtxt);
+ //System.out.println("Result: "+pq);
+ Term t[] = pq.getTerms();
+ int pos[] = pq.getPositions();
+ for (int i = 0; i < t.length; i++) {
+ //System.out.println(i+". "+t[i]+" pos: "+pos[i]);
+ assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]);
+ }
+ } finally {
+ StopFilter.setEnablePositionIncrementsDefault(dflt);
+ }
}
public void testMatchAllDocs() throws Exception {
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java Sun Dec 30 13:19:17 2007
@@ -19,11 +19,14 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
@@ -80,6 +83,20 @@
hits = searcher.search(q);
assertEquals(0, hits.length());
+ // same as previous, just specify positions explicitely.
+ q = new PhraseQuery();
+ q.add(new Term("field", "1"),0);
+ q.add(new Term("field", "2"),1);
+ hits = searcher.search(q);
+ assertEquals(0, hits.length());
+
+ // specifying correct positions should find the phrase.
+ q = new PhraseQuery();
+ q.add(new Term("field", "1"),0);
+ q.add(new Term("field", "2"),2);
+ hits = searcher.search(q);
+ assertEquals(1, hits.length());
+
q = new PhraseQuery();
q.add(new Term("field", "2"));
q.add(new Term("field", "3"));
@@ -92,6 +109,28 @@
hits = searcher.search(q);
assertEquals(0, hits.length());
+ // phrase query would find it when correct positions are specified.
+ q = new PhraseQuery();
+ q.add(new Term("field", "3"),0);
+ q.add(new Term("field", "4"),0);
+ hits = searcher.search(q);
+ assertEquals(1, hits.length());
+
+ // phrase query should fail for non existing searched term
+ // even if there exist another searched terms in the same searched position.
+ q = new PhraseQuery();
+ q.add(new Term("field", "3"),0);
+ q.add(new Term("field", "9"),0);
+ hits = searcher.search(q);
+ assertEquals(0, hits.length());
+
+ // multi-phrase query should succed for non existing searched term
+ // because there exist another searched terms in the same searched position.
+ MultiPhraseQuery mq = new MultiPhraseQuery();
+ mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
+ hits = searcher.search(mq);
+ assertEquals(1, hits.length());
+
q = new PhraseQuery();
q.add(new Term("field", "2"));
q.add(new Term("field", "4"));
@@ -115,6 +154,50 @@
q.add(new Term("field", "5"));
hits = searcher.search(q);
assertEquals(0, hits.length());
+
+ // analyzer to introduce stopwords and increment gaps
+ Analyzer stpa = new Analyzer() {
+ final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream ts = a.tokenStream(fieldName,reader);
+ return new StopFilter(ts,new String[]{"stop"});
+ }
+ };
+
+ // should not find "1 2" because there is a gap of 1 in the index
+ QueryParser qp = new QueryParser("field",stpa);
+ q = (PhraseQuery) qp.parse("\"1 2\"");
+ hits = searcher.search(q);
+ assertEquals(0, hits.length());
+
+ // omitted stop word cannot help because stop filter swallows the increments.
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q);
+ assertEquals(0, hits.length());
+
+ // query parser alone won't help, because stop filter swallows the increments.
+ qp.setEnablePositionIncrements(true);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q);
+ assertEquals(0, hits.length());
+
+ boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
+ try {
+ // stop filter alone won't help, because query parser swallows the increments.
+ qp.setEnablePositionIncrements(false);
+ StopFilter.setEnablePositionIncrementsDefault(true);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q);
+ assertEquals(0, hits.length());
+
+ // when both qp qnd stopFilter propagate increments, we should find the doc.
+ qp.setEnablePositionIncrements(true);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q);
+ assertEquals(1, hits.length());
+ } finally {
+ StopFilter.setEnablePositionIncrementsDefault(dflt);
+ }
}
/**