You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/07/05 22:06:04 UTC
[4/4] lucene-solr:branch_6x: LUCENE-2605: Add classic QueryParser
option setSplitOnWhitespace() to control whether to split on whitespace prior
to text analysis. Default behavior remains unchanged:
split-on-whitespace=true.
LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to control whether to split on whitespace prior to text analysis. Default behavior remains unchanged: split-on-whitespace=true.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7d092fac
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7d092fac
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7d092fac
Branch: refs/heads/branch_6x
Commit: 7d092fac4eabab42006e8e2b5c8a149cb266350c
Parents: 5abdcb8
Author: Steve Rowe <sa...@apache.org>
Authored: Tue Jul 5 18:03:42 2016 -0400
Committer: Steve Rowe <sa...@apache.org>
Committed: Tue Jul 5 18:04:09 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +
.../core/TestAllAnalyzersHaveFactories.java | 2 +
.../apache/lucene/analysis/TestStopFilter.java | 37 --
.../apache/lucene/util/TestQueryBuilder.java | 43 +-
.../lucene/queryparser/classic/CharStream.java | 2 +-
.../classic/MultiFieldQueryParser.java | 51 ++-
.../queryparser/classic/ParseException.java | 2 +-
.../lucene/queryparser/classic/QueryParser.java | 399 ++++++++++++++-----
.../lucene/queryparser/classic/QueryParser.jj | 265 +++++++-----
.../queryparser/classic/QueryParserBase.java | 39 ++
.../classic/QueryParserTokenManager.java | 27 +-
.../lucene/queryparser/classic/Token.java | 2 +-
.../queryparser/classic/TokenMgrError.java | 2 +-
.../queryparser/classic/TestQueryParser.java | 247 +++++++++++-
.../ext/TestExtendableQueryParser.java | 1 +
.../flexible/standard/TestStandardQP.java | 11 +
.../queryparser/util/QueryParserTestBase.java | 49 +--
.../lucene/analysis/MockSynonymAnalyzer.java | 28 ++
.../lucene/analysis/MockSynonymFilter.java | 97 +++++
.../lucene/analysis/TestMockSynonymFilter.java | 151 +++++++
20 files changed, 1117 insertions(+), 342 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3d29039..dcf5a94 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -62,6 +62,10 @@ Improvements
ScandinavianNormalizationFilterFactory now implement MultiTermAwareComponent.
(Adrien Grand)
+* LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to
+ control whether to split on whitespace prior to text analysis. Default
+ behavior remains unchanged: split-on-whitespace=true. (Steve Rowe)
+
Optimizations
* LUCENE-7330, LUCENE-7339: Speed up conjunction queries. (Adrien Grand)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
index f8874eb..d826a60 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.MockHoleInjectingTokenFilter;
import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
+import org.apache.lucene.analysis.MockSynonymFilter;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
@@ -75,6 +76,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
MockGraphTokenFilter.class,
MockHoleInjectingTokenFilter.class,
MockRandomLookaheadTokenFilter.class,
+ MockSynonymFilter.class,
MockTokenFilter.class,
MockVariableLengthPayloadFilter.class,
ValidatingTokenFilter.class,
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
index c224682..3e26965 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -137,40 +136,4 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
System.out.println(s);
}
}
-
- // stupid filter that inserts synonym of 'hte' for 'the'
- private class MockSynonymFilter extends TokenFilter {
- State bufferedState;
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-
- MockSynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (bufferedState != null) {
- restoreState(bufferedState);
- posIncAtt.setPositionIncrement(0);
- termAtt.setEmpty().append("hte");
- bufferedState = null;
- return true;
- } else if (input.incrementToken()) {
- if (termAtt.toString().equals("the")) {
- bufferedState = captureState();
- }
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- bufferedState = null;
- }
- }
-
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
index 205fbab..d3019e3 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockSynonymFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -121,7 +122,7 @@ public class TestQueryBuilder extends LuceneTestCase {
assertNull(builder.createBooleanQuery("field", ""));
}
- /** adds synonym of "dog" for "dogs". */
+ /** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
static class MockSynonymAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
@@ -130,37 +131,6 @@ public class TestQueryBuilder extends LuceneTestCase {
}
}
- /**
- * adds synonym of "dog" for "dogs".
- */
- protected static class MockSynonymFilter extends TokenFilter {
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- boolean addSynonym = false;
-
- public MockSynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public final boolean incrementToken() throws IOException {
- if (addSynonym) { // inject our synonym
- clearAttributes();
- termAtt.setEmpty().append("dog");
- posIncAtt.setPositionIncrement(0);
- addSynonym = false;
- return true;
- }
-
- if (input.incrementToken()) {
- addSynonym = termAtt.toString().equals("dogs");
- return true;
- } else {
- return false;
- }
- }
- }
-
/** simple synonyms test */
public void testSynonyms() throws Exception {
SynonymQuery expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog"));
@@ -180,6 +150,15 @@ public class TestQueryBuilder extends LuceneTestCase {
assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "old dogs"));
}
+ /** forms multiphrase query */
+ public void testMultiWordSynonymsPhrase() throws Exception {
+ MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder();
+ expectedBuilder.add(new Term[] { new Term("field", "guinea"), new Term("field", "cavy") });
+ expectedBuilder.add(new Term("field", "pig"));
+ QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
+ assertEquals(expectedBuilder.build(), queryBuilder.createPhraseQuery("field", "guinea pig"));
+ }
+
protected static class SimpleCJKTokenizer extends Tokenizer {
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
index 85b1461..2c5fcba 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
@@ -112,4 +112,4 @@ interface CharStream {
void Done();
}
-/* JavaCC - OriginalChecksum=c847dd1920bf7901125a7244125682ad (do not edit this line) */
+/* JavaCC - OriginalChecksum=30b94cad7b10d0d81e3a59a1083939d0 (do not edit this line) */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
index b9963ec..69a7559 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
@@ -27,6 +27,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
/**
* A QueryParser which constructs queries to search multiple fields.
@@ -148,18 +149,54 @@ public class MultiFieldQueryParser extends QueryParser
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
if (field == null) {
List<Query> clauses = new ArrayList<>();
+ Query[] fieldQueries = new Query[fields.length];
+ int maxTerms = 0;
for (int i = 0; i < fields.length; i++) {
Query q = super.getFieldQuery(fields[i], queryText, quoted);
if (q != null) {
- //If the user passes a map of boosts
- if (boosts != null) {
- //Get the boost from the map and apply them
- Float boost = boosts.get(fields[i]);
- if (boost != null) {
- q = new BoostQuery(q, boost.floatValue());
+ if (q instanceof TermQuery) {
+ maxTerms = Math.max(1, maxTerms);
+ } else if (q instanceof BooleanQuery) {
+ maxTerms = Math.max(maxTerms, ((BooleanQuery)q).clauses().size());
+ }
+ fieldQueries[i] = q;
+ }
+ }
+ for (int termNum = 0; termNum < maxTerms; termNum++) {
+ List<Query> termClauses = new ArrayList<>();
+ for (int i = 0; i < fields.length; i++) {
+ if (fieldQueries[i] != null) {
+ Query q = null;
+ if (fieldQueries[i] instanceof BooleanQuery) {
+ List<BooleanClause> nestedClauses = ((BooleanQuery)fieldQueries[i]).clauses();
+ if (termNum < nestedClauses.size()) {
+ q = nestedClauses.get(termNum).getQuery();
+ }
+ } else if (termNum == 0) { // e.g. TermQuery-s
+ q = fieldQueries[i];
+ }
+ if (q != null) {
+ if (boosts != null) {
+ //Get the boost from the map and apply them
+ Float boost = boosts.get(fields[i]);
+ if (boost != null) {
+ q = new BoostQuery(q, boost);
+ }
+ }
+ termClauses.add(q);
}
}
- clauses.add(q);
+ }
+ if (maxTerms > 1) {
+ if (termClauses.size() > 0) {
+ BooleanQuery.Builder builder = newBooleanQuery();
+ for (Query termClause : termClauses) {
+ builder.add(termClause, BooleanClause.Occur.SHOULD);
+ }
+ clauses.add(builder.build());
+ }
+ } else {
+ clauses.addAll(termClauses);
}
}
if (clauses.size() == 0) // happens for stopwords
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
index a0ddab2..3c02be3 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
@@ -184,4 +184,4 @@ public class ParseException extends Exception {
}
}
-/* JavaCC - OriginalChecksum=61602edcb3a15810cbc58f5593eba40d (do not edit this line) */
+/* JavaCC - OriginalChecksum=b187d97d5bb75c3fc63d642c1c26ac6e (do not edit this line) */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
index 08a477e..c137d30 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
@@ -3,8 +3,11 @@ package org.apache.lucene.queryparser.classic;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
import java.util.Locale;
+import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
@@ -81,6 +84,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
*/
static public enum Operator { OR, AND }
+ /** default split on whitespace behavior */
+ public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
+
/** Create a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -90,6 +96,28 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
init(f, a);
}
+ /**
+ * @see #setSplitOnWhitespace(boolean)
+ */
+ public boolean getSplitOnWhitespace() {
+ return splitOnWhitespace;
+ }
+
+ /**
+ * Whether query text should be split on whitespace prior to analysis.
+ * Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
+ */
+ public void setSplitOnWhitespace(boolean splitOnWhitespace) {
+ this.splitOnWhitespace = splitOnWhitespace;
+ }
+
+ private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
+ private static Set<Integer> disallowedPostMultiTerm
+ = new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
+ private static boolean allowedPostMultiTerm(int tokenKind) {
+ return disallowedPostMultiTerm.contains(tokenKind) == false;
+ }
+
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
final public int Conjunction() throws ParseException {
@@ -129,15 +157,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
jj_consume_token(PLUS);
- ret = MOD_REQ;
+ ret = MOD_REQ;
break;
case MINUS:
jj_consume_token(MINUS);
- ret = MOD_NOT;
+ ret = MOD_NOT;
break;
case NOT:
jj_consume_token(NOT);
- ret = MOD_NOT;
+ ret = MOD_NOT;
break;
default:
jj_la1[2] = jj_gen;
@@ -166,11 +194,37 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
int conj, mods;
- mods = Modifiers();
- q = Clause(field);
- addClause(clauses, CONJ_NONE, mods, q);
- if (mods == MOD_NONE)
- firstQuery=q;
+ if (jj_2_1(2)) {
+ firstQuery = MultiTerm(field, clauses);
+ } else {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case NOT:
+ case PLUS:
+ case MINUS:
+ case BAREOPER:
+ case LPAREN:
+ case STAR:
+ case QUOTED:
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case REGEXPTERM:
+ case RANGEIN_START:
+ case RANGEEX_START:
+ case NUMBER:
+ mods = Modifiers();
+ q = Clause(field);
+ addClause(clauses, CONJ_NONE, mods, q);
+ if (mods == MOD_NONE) {
+ firstQuery = q;
+ }
+ break;
+ default:
+ jj_la1[4] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ }
label_1:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -193,39 +247,66 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
;
break;
default:
- jj_la1[4] = jj_gen;
+ jj_la1[5] = jj_gen;
break label_1;
}
- conj = Conjunction();
- mods = Modifiers();
- q = Clause(field);
- addClause(clauses, conj, mods, q);
- }
- if (clauses.size() == 1 && firstQuery != null)
- {if (true) return firstQuery;}
- else {
- {if (true) return getBooleanQuery(clauses);}
+ if (jj_2_2(2)) {
+ MultiTerm(field, clauses);
+ } else {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case AND:
+ case OR:
+ case NOT:
+ case PLUS:
+ case MINUS:
+ case BAREOPER:
+ case LPAREN:
+ case STAR:
+ case QUOTED:
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case REGEXPTERM:
+ case RANGEIN_START:
+ case RANGEEX_START:
+ case NUMBER:
+ conj = Conjunction();
+ mods = Modifiers();
+ q = Clause(field);
+ addClause(clauses, conj, mods, q);
+ break;
+ default:
+ jj_la1[6] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
}
+ }
+ if (clauses.size() == 1 && firstQuery != null) {
+ {if (true) return firstQuery;}
+ } else {
+ {if (true) return getBooleanQuery(clauses);}
+ }
throw new Error("Missing return statement in function");
}
final public Query Clause(String field) throws ParseException {
Query q;
Token fieldToken=null, boost=null;
- if (jj_2_1(2)) {
+ if (jj_2_3(2)) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
fieldToken = jj_consume_token(TERM);
jj_consume_token(COLON);
- field=discardEscapeChar(fieldToken.image);
+ field=discardEscapeChar(fieldToken.image);
break;
case STAR:
jj_consume_token(STAR);
jj_consume_token(COLON);
- field="*";
+ field="*";
break;
default:
- jj_la1[5] = jj_gen;
+ jj_la1[7] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -255,16 +336,16 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
boost = jj_consume_token(NUMBER);
break;
default:
- jj_la1[6] = jj_gen;
+ jj_la1[8] = jj_gen;
;
}
break;
default:
- jj_la1[7] = jj_gen;
+ jj_la1[9] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
- {if (true) return handleBoost(q, boost);}
+ {if (true) return handleBoost(q, boost);}
throw new Error("Missing return statement in function");
}
@@ -291,73 +372,86 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
break;
case STAR:
term = jj_consume_token(STAR);
- wildcard=true;
+ wildcard=true;
break;
case PREFIXTERM:
term = jj_consume_token(PREFIXTERM);
- prefix=true;
+ prefix=true;
break;
case WILDTERM:
term = jj_consume_token(WILDTERM);
- wildcard=true;
+ wildcard=true;
break;
case REGEXPTERM:
term = jj_consume_token(REGEXPTERM);
- regexp=true;
+ regexp=true;
break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
case BAREOPER:
term = jj_consume_token(BAREOPER);
- term.image = term.image.substring(0,1);
+ term.image = term.image.substring(0,1);
break;
default:
- jj_la1[8] = jj_gen;
+ jj_la1[10] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case FUZZY_SLOP:
- fuzzySlop = jj_consume_token(FUZZY_SLOP);
- fuzzy=true;
- break;
- default:
- jj_la1[9] = jj_gen;
- ;
- }
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
- jj_consume_token(CARAT);
- boost = jj_consume_token(NUMBER);
+ case FUZZY_SLOP:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ break;
+ default:
+ jj_la1[11] = jj_gen;
+ ;
+ }
+ break;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
- fuzzy=true;
+ fuzzy=true;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[12] = jj_gen;
+ ;
+ }
break;
default:
- jj_la1[10] = jj_gen;
- ;
+ jj_la1[13] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
}
break;
default:
- jj_la1[11] = jj_gen;
+ jj_la1[14] = jj_gen;
;
}
- q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
+ q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
break;
case RANGEIN_START:
case RANGEEX_START:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_START:
jj_consume_token(RANGEIN_START);
- startInc=true;
+ startInc = true;
break;
case RANGEEX_START:
jj_consume_token(RANGEEX_START);
break;
default:
- jj_la1[12] = jj_gen;
+ jj_la1[15] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -369,7 +463,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
goop1 = jj_consume_token(RANGE_QUOTED);
break;
default:
- jj_la1[13] = jj_gen;
+ jj_la1[16] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -378,7 +472,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
jj_consume_token(RANGE_TO);
break;
default:
- jj_la1[14] = jj_gen;
+ jj_la1[17] = jj_gen;
;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -389,20 +483,20 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
goop2 = jj_consume_token(RANGE_QUOTED);
break;
default:
- jj_la1[15] = jj_gen;
+ jj_la1[18] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_END:
jj_consume_token(RANGEIN_END);
- endInc=true;
+ endInc = true;
break;
case RANGEEX_END:
jj_consume_token(RANGEEX_END);
break;
default:
- jj_la1[16] = jj_gen;
+ jj_la1[19] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -412,46 +506,69 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
boost = jj_consume_token(NUMBER);
break;
default:
- jj_la1[17] = jj_gen;
+ jj_la1[20] = jj_gen;
;
}
- boolean startOpen=false;
- boolean endOpen=false;
- if (goop1.kind == RANGE_QUOTED) {
- goop1.image = goop1.image.substring(1, goop1.image.length()-1);
- } else if ("*".equals(goop1.image)) {
- startOpen=true;
- }
- if (goop2.kind == RANGE_QUOTED) {
- goop2.image = goop2.image.substring(1, goop2.image.length()-1);
- } else if ("*".equals(goop2.image)) {
- endOpen=true;
- }
- q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
+ boolean startOpen=false;
+ boolean endOpen=false;
+ if (goop1.kind == RANGE_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ } else if ("*".equals(goop1.image)) {
+ startOpen=true;
+ }
+ if (goop2.kind == RANGE_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ } else if ("*".equals(goop2.image)) {
+ endOpen=true;
+ }
+ q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
break;
case QUOTED:
term = jj_consume_token(QUOTED);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case FUZZY_SLOP:
- fuzzySlop = jj_consume_token(FUZZY_SLOP);
- break;
- default:
- jj_la1[18] = jj_gen;
- ;
- }
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
- jj_consume_token(CARAT);
- boost = jj_consume_token(NUMBER);
+ case FUZZY_SLOP:
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ break;
+ default:
+ jj_la1[21] = jj_gen;
+ ;
+ }
+ break;
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[22] = jj_gen;
+ ;
+ }
+ break;
+ default:
+ jj_la1[23] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
break;
default:
- jj_la1[19] = jj_gen;
+ jj_la1[24] = jj_gen;
;
}
- q = handleQuotedTerm(field, term, fuzzySlop);
+ q = handleQuotedTerm(field, term, fuzzySlop);
break;
default:
- jj_la1[20] = jj_gen;
+ jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -459,6 +576,44 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
throw new Error("Missing return statement in function");
}
+/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */
+ final public Query MultiTerm(String field, List<BooleanClause> clauses) throws ParseException {
+ Token text, whitespace, followingText;
+ Query firstQuery = null;
+ text = jj_consume_token(TERM);
+ if (splitOnWhitespace) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery);
+ }
+ if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
+
+ } else {
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ label_2:
+ while (true) {
+ followingText = jj_consume_token(TERM);
+ if (splitOnWhitespace) {
+ Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, q);
+ } else { // build up the text to send to analysis
+ text.image += " " + followingText.image;
+ }
+ if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
+ ;
+ } else {
+ break label_2;
+ }
+ }
+ if (splitOnWhitespace == false) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addMultiTermClauses(clauses, firstQuery);
+ }
+ {if (true) return firstQuery;}
+ throw new Error("Missing return statement in function");
+ }
+
private boolean jj_2_1(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_1(); }
@@ -466,23 +621,71 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
finally { jj_save(0, xla); }
}
- private boolean jj_3R_2() {
+ private boolean jj_2_2(int xla) {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try { return !jj_3_2(); }
+ catch(LookaheadSuccess ls) { return true; }
+ finally { jj_save(1, xla); }
+ }
+
+ private boolean jj_2_3(int xla) {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try { return !jj_3_3(); }
+ catch(LookaheadSuccess ls) { return true; }
+ finally { jj_save(2, xla); }
+ }
+
+ private boolean jj_3R_4() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
+ private boolean jj_3_2() {
+ if (jj_3R_3()) return true;
+ return false;
+ }
+
private boolean jj_3_1() {
+ if (jj_3R_3()) return true;
+ return false;
+ }
+
+ private boolean jj_3R_7() {
+ if (jj_scan_token(TERM)) return true;
+ return false;
+ }
+
+ private boolean jj_3_3() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_2()) {
+ if (jj_3R_4()) {
jj_scanpos = xsp;
- if (jj_3R_3()) return true;
+ if (jj_3R_5()) return true;
}
return false;
}
private boolean jj_3R_3() {
+ if (jj_scan_token(TERM)) return true;
+ jj_lookingAhead = true;
+ jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
+ jj_lookingAhead = false;
+ if (!jj_semLA || jj_3R_6()) return true;
+ Token xsp;
+ if (jj_3R_7()) return true;
+ while (true) {
+ xsp = jj_scanpos;
+ if (jj_3R_7()) { jj_scanpos = xsp; break; }
+ }
+ return false;
+ }
+
+ private boolean jj_3R_6() {
+ return false;
+ }
+
+ private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
@@ -497,8 +700,11 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
private int jj_ntk;
private Token jj_scanpos, jj_lastpos;
private int jj_la;
+ /** Whether we are looking ahead. */
+ private boolean jj_lookingAhead = false;
+ private boolean jj_semLA;
private int jj_gen;
- final private int[] jj_la1 = new int[21];
+ final private int[] jj_la1 = new int[26];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
@@ -506,12 +712,12 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
jj_la1_init_1();
}
private static void jj_la1_init_0() {
- jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x200000,0x40000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0xfda2000,};
+ jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7c00,0xfda7f00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x40000,0x240000,0x240000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0x240000,0x240000,0xfda2000,};
}
private static void jj_la1_init_1() {
- jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,};
+ jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,};
}
- final private JJCalls[] jj_2_rtns = new JJCalls[1];
+ final private JJCalls[] jj_2_rtns = new JJCalls[3];
private boolean jj_rescan = false;
private int jj_gc = 0;
@@ -521,7 +727,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -530,8 +736,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
+ jj_lookingAhead = false;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -541,7 +748,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -551,7 +758,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -614,7 +821,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
/** Get the specific Token. */
final public Token getToken(int index) {
- Token t = token;
+ Token t = jj_lookingAhead ? jj_scanpos : token;
for (int i = 0; i < index; i++) {
if (t.next != null) t = t.next;
else t = t.next = token_source.getNextToken();
@@ -668,7 +875,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
la1tokens[jj_kind] = true;
jj_kind = -1;
}
- for (int i = 0; i < 21; i++) {
+ for (int i = 0; i < 26; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) {
@@ -707,7 +914,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
private void jj_rescan_token() {
jj_rescan = true;
- for (int i = 0; i < 1; i++) {
+ for (int i = 0; i < 3; i++) {
try {
JJCalls p = jj_2_rtns[i];
do {
@@ -715,6 +922,8 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
switch (i) {
case 0: jj_3_1(); break;
+ case 1: jj_3_2(); break;
+ case 2: jj_3_3(); break;
}
}
p = p.next;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
index 9bf154d..71a5c39 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
@@ -27,15 +27,17 @@ package org.apache.lucene.queryparser.classic;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
import java.util.Locale;
+import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.search.TermQuery;
/**
* This class is generated by JavaCC. The most important method is
@@ -106,6 +108,9 @@ public class QueryParser extends QueryParserBase {
*/
static public enum Operator { OR, AND }
+ /** default split on whitespace behavior */
+ public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
+
/** Create a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -114,6 +119,28 @@ public class QueryParser extends QueryParserBase {
this(new FastCharStream(new StringReader("")));
init(f, a);
}
+
+ /**
+ * @see #setSplitOnWhitespace(boolean)
+ */
+ public boolean getSplitOnWhitespace() {
+ return splitOnWhitespace;
+ }
+
+ /**
+ * Whether query text should be split on whitespace prior to analysis.
+ * Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
+ */
+ public void setSplitOnWhitespace(boolean splitOnWhitespace) {
+ this.splitOnWhitespace = splitOnWhitespace;
+ }
+
+ private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
+ private static Set<Integer> disallowedPostMultiTerm
+ = new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
+ private static boolean allowedPostMultiTerm(int tokenKind) {
+ return disallowedPostMultiTerm.contains(tokenKind) == false;
+ }
}
PARSER_END(QueryParser)
@@ -123,15 +150,14 @@ PARSER_END(QueryParser)
/* ***************** */
<*> TOKEN : {
- <#_NUM_CHAR: ["0"-"9"] >
-// every character that follows a backslash is considered as an escaped character
-| <#_ESCAPED_CHAR: "\\" ~[] >
+ <#_NUM_CHAR: ["0"-"9"] >
+| <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
- | <_ESCAPED_CHAR> ) >
-| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
-| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
+ | <_ESCAPED_CHAR> ) >
+| <#_TERM_CHAR: ( <_TERM_START_CHAR> | "-" | "+" ) >
+| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
+| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
}
<DEFAULT, Range> SKIP : {
@@ -139,37 +165,37 @@ PARSER_END(QueryParser)
}
<DEFAULT> TOKEN : {
- <AND: ("AND" | "&&") >
-| <OR: ("OR" | "||") >
-| <NOT: ("NOT" | "!") >
-| <PLUS: "+" >
-| <MINUS: "-" >
-| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
-| <LPAREN: "(" >
-| <RPAREN: ")" >
-| <COLON: ":" >
-| <STAR: "*" >
-| <CARAT: "^" > : Boost
-| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
-| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
-| <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
-| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
-| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
+ <AND: ("AND" | "&&") >
+| <OR: ("OR" | "||") >
+| <NOT: ("NOT" | "!") >
+| <PLUS: "+" >
+| <MINUS: "-" >
+| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
+| <LPAREN: "(" >
+| <RPAREN: ")" >
+| <COLON: ":" >
+| <STAR: "*" >
+| <CARAT: "^" > : Boost
+| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
+| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
+| <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
+| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
+| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
+| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : Range
| <RANGEEX_START: "{" > : Range
}
<Boost> TOKEN : {
-<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
+ <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
<Range> TOKEN : {
-<RANGE_TO: "TO">
-| <RANGEIN_END: "]"> : DEFAULT
-| <RANGEEX_END: "}"> : DEFAULT
+ <RANGE_TO: "TO">
+| <RANGEIN_END: "]"> : DEFAULT
+| <RANGEEX_END: "}"> : DEFAULT
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
-| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
+| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
}
// * Query ::= ( Clause )*
@@ -191,23 +217,20 @@ int Modifiers() : {
}
{
[
- <PLUS> { ret = MOD_REQ; }
- | <MINUS> { ret = MOD_NOT; }
- | <NOT> { ret = MOD_NOT; }
+ <PLUS> { ret = MOD_REQ; }
+ | <MINUS> { ret = MOD_NOT; }
+ | <NOT> { ret = MOD_NOT; }
]
{ return ret; }
}
// This makes sure that there is no garbage after the query string
-Query TopLevelQuery(String field) :
-{
+Query TopLevelQuery(String field) : {
Query q;
}
{
q=Query(field) <EOF>
- {
- return q;
- }
+ { return q; }
}
Query Query(String field) :
@@ -217,23 +240,30 @@ Query Query(String field) :
int conj, mods;
}
{
- mods=Modifiers() q=Clause(field)
- {
- addClause(clauses, CONJ_NONE, mods, q);
- if (mods == MOD_NONE)
- firstQuery=q;
- }
(
- conj=Conjunction() mods=Modifiers() q=Clause(field)
- { addClause(clauses, conj, mods, q); }
- )*
- {
- if (clauses.size() == 1 && firstQuery != null)
- return firstQuery;
- else {
- return getBooleanQuery(clauses);
+ LOOKAHEAD(2)
+ firstQuery=MultiTerm(field, clauses)
+ | mods=Modifiers() q=Clause(field)
+ {
+ addClause(clauses, CONJ_NONE, mods, q);
+ if (mods == MOD_NONE) {
+ firstQuery = q;
+ }
}
+ )
+ (
+ LOOKAHEAD(2)
+ MultiTerm(field, clauses)
+ | conj=Conjunction() mods=Modifiers() q=Clause(field)
+ { addClause(clauses, conj, mods, q); }
+ )*
+ {
+ if (clauses.size() == 1 && firstQuery != null) {
+ return firstQuery;
+ } else {
+ return getBooleanQuery(clauses);
}
+ }
}
Query Clause(String field) : {
@@ -244,20 +274,17 @@ Query Clause(String field) : {
[
LOOKAHEAD(2)
(
- fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
- | <STAR> <COLON> {field="*";}
+ fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
+ | <STAR> <COLON> {field="*";}
)
]
-
(
- q=Term(field)
- | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
-
+ q=Term(field)
+ | <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ]
)
- { return handleBoost(q, boost); }
+ { return handleBoost(q, boost); }
}
-
Query Term(String field) : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
@@ -270,45 +297,85 @@ Query Term(String field) : {
}
{
(
- (
- term=<TERM>
- | term=<STAR> { wildcard=true; }
- | term=<PREFIXTERM> { prefix=true; }
- | term=<WILDTERM> { wildcard=true; }
- | term=<REGEXPTERM> { regexp=true; }
- | term=<NUMBER>
- | term=<BAREOPER> { term.image = term.image.substring(0,1); }
- )
- [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
- [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
- {
- q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
- }
- | ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
- ( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> )
- [ <RANGE_TO> ]
- ( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> )
- ( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
- [ <CARAT> boost=<NUMBER> ]
- {
- boolean startOpen=false;
- boolean endOpen=false;
- if (goop1.kind == RANGE_QUOTED) {
- goop1.image = goop1.image.substring(1, goop1.image.length()-1);
- } else if ("*".equals(goop1.image)) {
- startOpen=true;
- }
- if (goop2.kind == RANGE_QUOTED) {
- goop2.image = goop2.image.substring(1, goop2.image.length()-1);
- } else if ("*".equals(goop2.image)) {
- endOpen=true;
- }
- q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
- }
- | term=<QUOTED>
- [ fuzzySlop=<FUZZY_SLOP> ]
- [ <CARAT> boost=<NUMBER> ]
- { q = handleQuotedTerm(field, term, fuzzySlop); }
+ (
+ term=<TERM>
+ | term=<STAR> { wildcard=true; }
+ | term=<PREFIXTERM> { prefix=true; }
+ | term=<WILDTERM> { wildcard=true; }
+ | term=<REGEXPTERM> { regexp=true; }
+ | term=<NUMBER>
+ | term=<BAREOPER> { term.image = term.image.substring(0,1); }
+ )
+ [
+ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
+ | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
+ ]
+ { q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); }
+
+ | ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> )
+ ( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> )
+ [ <RANGE_TO> ]
+ ( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> )
+ ( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> )
+ [ <CARAT> boost=<NUMBER> ]
+ {
+ boolean startOpen=false;
+ boolean endOpen=false;
+ if (goop1.kind == RANGE_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ } else if ("*".equals(goop1.image)) {
+ startOpen=true;
+ }
+ if (goop2.kind == RANGE_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ } else if ("*".equals(goop2.image)) {
+ endOpen=true;
+ }
+ q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
+ }
+
+ | term=<QUOTED>
+ [
+ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
+ | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
+ ]
+ { q = handleQuotedTerm(field, term, fuzzySlop); }
)
{ return handleBoost(q, boost); }
}
+
+/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */
+Query MultiTerm(String field, List<BooleanClause> clauses) : {
+ Token text, whitespace, followingText;
+ Query firstQuery = null;
+}
+{
+ text=<TERM>
+ {
+ if (splitOnWhitespace) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery);
+ }
+ }
+ // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest
+ LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
+ (
+ LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
+ followingText=<TERM>
+ {
+ if (splitOnWhitespace) {
+ Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, q);
+ } else { // build up the text to send to analysis
+ text.image += " " + followingText.image;
+ }
+ }
+ )+
+ {
+ if (splitOnWhitespace == false) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addMultiTermClauses(clauses, firstQuery);
+ }
+ return firstQuery;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
index c00d88e..cdfa477 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@@ -465,6 +465,45 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
}
/**
+ * Adds clauses generated from analysis over text containing whitespace.
+ * There are no operators, so the query's clauses can either be MUST (if the
+ * default operator is AND) or SHOULD (default OR).
+ *
+ * If all of the clauses in the given Query are TermQuery-s, this method flattens the result
+ * by adding the TermQuery-s individually to the output clause list; otherwise, the given Query
+ * is added as a single clause including its nested clauses.
+ */
+ protected void addMultiTermClauses(List<BooleanClause> clauses, Query q) {
+ // We might have been passed a null query; the term might have been
+ // filtered away by the analyzer.
+ if (q == null) {
+ return;
+ }
+ boolean allNestedTermQueries = false;
+ if (q instanceof BooleanQuery) {
+ allNestedTermQueries = true;
+ for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
+ if ( ! (clause.getQuery() instanceof TermQuery)) {
+ allNestedTermQueries = false;
+ break;
+ }
+ }
+ }
+ if (allNestedTermQueries) {
+ clauses.addAll(((BooleanQuery)q).clauses());
+ } else {
+ BooleanClause.Occur occur = operator == OR_OPERATOR ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
+ if (q instanceof BooleanQuery) {
+ for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
+ clauses.add(newBooleanClause(clause.getQuery(), occur));
+ }
+ } else {
+ clauses.add(newBooleanClause(q, occur));
+ }
+ }
+ }
+
+ /**
* @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
index 8c8951e..065ff8b 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
@@ -285,7 +285,7 @@ private int jjMoveNfa_2(int startState, int curPos)
jjCheckNAddTwoStates(33, 34);
}
else if (curChar == 92)
- jjCheckNAddTwoStates(35, 35);
+ jjCheckNAdd(35);
break;
case 0:
if ((0x97ffffff87ffffffL & l) != 0L)
@@ -384,7 +384,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 26:
if (curChar == 92)
- jjAddStates(27, 28);
+ jjstateSet[jjnewStateCnt++] = 27;
break;
case 27:
if (kind > 21)
@@ -400,7 +400,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 29:
if (curChar == 92)
- jjAddStates(29, 30);
+ jjstateSet[jjnewStateCnt++] = 30;
break;
case 30:
if (kind > 21)
@@ -423,7 +423,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 34:
if (curChar == 92)
- jjCheckNAddTwoStates(35, 35);
+ jjCheckNAdd(35);
break;
case 35:
if (kind > 23)
@@ -453,7 +453,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 43:
if (curChar == 92)
- jjCheckNAddTwoStates(44, 44);
+ jjCheckNAdd(44);
break;
case 44:
if (kind > 20)
@@ -466,7 +466,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 46:
if (curChar == 92)
- jjCheckNAddTwoStates(47, 47);
+ jjCheckNAdd(47);
break;
case 47:
jjCheckNAddStates(18, 20);
@@ -645,7 +645,7 @@ private int jjMoveNfa_0(int startState, int curPos)
break;
if (kind > 27)
kind = 27;
- jjAddStates(31, 32);
+ jjAddStates(27, 28);
break;
case 1:
if (curChar == 46)
@@ -799,11 +799,11 @@ private int jjMoveNfa_1(int startState, int curPos)
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
- jjCheckNAddStates(33, 35);
+ jjCheckNAddStates(29, 31);
break;
case 3:
if (curChar == 34)
- jjCheckNAddStates(33, 35);
+ jjCheckNAddStates(29, 31);
break;
case 5:
if (curChar == 34 && kind > 31)
@@ -836,7 +836,7 @@ private int jjMoveNfa_1(int startState, int curPos)
jjCheckNAdd(6);
break;
case 2:
- jjAddStates(33, 35);
+ jjAddStates(29, 31);
break;
case 4:
if (curChar == 92)
@@ -872,7 +872,7 @@ private int jjMoveNfa_1(int startState, int curPos)
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjAddStates(33, 35);
+ jjAddStates(29, 31);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
@@ -899,9 +899,8 @@ private int jjMoveNfa_1(int startState, int curPos)
}
}
static final int[] jjnextStates = {
- 37, 39, 40, 17, 18, 20, 42, 45, 31, 46, 43, 22, 23, 25, 26, 24,
- 25, 26, 45, 31, 46, 44, 47, 35, 22, 28, 29, 27, 27, 30, 30, 0,
- 1, 2, 4, 5,
+ 37, 39, 40, 17, 18, 20, 42, 43, 45, 46, 31, 22, 23, 25, 26, 24,
+ 25, 26, 45, 46, 31, 44, 47, 35, 22, 28, 29, 0, 1, 2, 4, 5,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
index aa57487..0e52ec2 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
@@ -128,4 +128,4 @@ public class Token implements java.io.Serializable {
}
}
-/* JavaCC - OriginalChecksum=c1e1418b35aa9e47ef8dc98b87423d70 (do not edit this line) */
+/* JavaCC - OriginalChecksum=405bb5d2fcd84e94ac1c8f0b12c1f914 (do not edit this line) */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
index 7101f09..ad111d0 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
@@ -144,4 +144,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=0c275864a1972d9a01601ab81426872d (do not edit this line) */
+/* JavaCC - OriginalChecksum=f433e1a52b8eadbf12f3fbbbf87fd140 (do not edit this line) */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
index 5b4eba8..c3d7b37 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
@@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
@@ -44,7 +46,9 @@ import java.io.IOException;
* Tests QueryParser.
*/
public class TestQueryParser extends QueryParserTestBase {
-
+
+ protected boolean splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
+
public static class QPTestParser extends QueryParser {
public QPTestParser(String f, Analyzer a) {
super(f, a);
@@ -67,6 +71,7 @@ public class TestQueryParser extends QueryParserTestBase {
if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
QueryParser qp = new QueryParser(getDefaultField(), a);
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
+ qp.setSplitOnWhitespace(splitOnWhitespace);
return qp;
}
@@ -310,18 +315,7 @@ public class TestQueryParser extends QueryParserTestBase {
Query unexpanded = new TermQuery(new Term("field", "dogs"));
assertEquals(unexpanded, smart.parse("\"dogs\""));
}
-
- // TODO: fold these into QueryParserTestBase
-
- /** adds synonym of "dog" for "dogs". */
- static class MockSynonymAnalyzer extends Analyzer {
- @Override
- protected TokenStreamComponents createComponents(String fieldName) {
- MockTokenizer tokenizer = new MockTokenizer();
- return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
- }
- }
-
+
/** simple synonyms test */
public void testSynonyms() throws Exception {
Query expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog"));
@@ -483,4 +477,229 @@ public class TestQueryParser extends QueryParserTestBase {
qp.parse("a*aaaaaaa");
});
}
-}
+
+ // TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
+ @Override
+ public void testQPA() throws Exception {
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = false;
+
+ assertQueryEquals("term phrase term", qpAnalyzer, "term phrase1 phrase2 term");
+
+ CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
+ setDefaultOperatorAND(cqpc);
+ assertQueryEquals(cqpc, "field", "term phrase term", "+term +phrase1 +phrase2 +term");
+
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+
+ // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
+ public void testMultiWordSynonyms() throws Exception {
+ QueryParser dumb = new QueryParser("field", new Analyzer1());
+ dumb.setSplitOnWhitespace(false);
+
+ // A multi-word synonym source will form a synonym query for the same-starting-position tokens
+ BooleanQuery.Builder multiWordExpandedBqBuilder = new BooleanQuery.Builder();
+ Query multiWordSynonymQuery = new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy"));
+ multiWordExpandedBqBuilder.add(multiWordSynonymQuery, BooleanClause.Occur.SHOULD);
+ multiWordExpandedBqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD);
+ Query multiWordExpandedBq = multiWordExpandedBqBuilder.build();
+ assertEquals(multiWordExpandedBq, dumb.parse("guinea pig"));
+
+ // With the phrase operator, a multi-word synonym source will form a multiphrase query.
+ // When the number of expanded term(s) is different from that of the original term(s), this is not good.
+ MultiPhraseQuery.Builder multiWordExpandedMpqBuilder = new MultiPhraseQuery.Builder();
+ multiWordExpandedMpqBuilder.add(new Term[]{new Term("field", "guinea"), new Term("field", "cavy")});
+ multiWordExpandedMpqBuilder.add(new Term("field", "pig"));
+ Query multiWordExpandedMPQ = multiWordExpandedMpqBuilder.build();
+ assertEquals(multiWordExpandedMPQ, dumb.parse("\"guinea pig\""));
+
+ // custom behavior, the synonyms are expanded, unless you use quote operator
+ QueryParser smart = new SmartQueryParser();
+ smart.setSplitOnWhitespace(false);
+ assertEquals(multiWordExpandedBq, smart.parse("guinea pig"));
+
+ PhraseQuery.Builder multiWordUnexpandedPqBuilder = new PhraseQuery.Builder();
+ multiWordUnexpandedPqBuilder.add(new Term("field", "guinea"));
+ multiWordUnexpandedPqBuilder.add(new Term("field", "pig"));
+ Query multiWordUnexpandedPq = multiWordUnexpandedPqBuilder.build();
+ assertEquals(multiWordUnexpandedPq, smart.parse("\"guinea pig\""));
+ }
+
+ // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
+ public void testOperatorsAndMultiWordSynonyms() throws Exception {
+ Analyzer a = new MockSynonymAnalyzer();
+
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = false;
+
+ // Operators should interrupt multiword analysis of adjacent words if they associate
+ assertQueryEquals("+guinea pig", a, "+guinea pig");
+ assertQueryEquals("-guinea pig", a, "-guinea pig");
+ assertQueryEquals("!guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea* pig", a, "guinea* pig");
+ assertQueryEquals("guinea? pig", a, "guinea? pig");
+ assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
+ assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
+
+ assertQueryEquals("guinea +pig", a, "guinea +pig");
+ assertQueryEquals("guinea -pig", a, "guinea -pig");
+ assertQueryEquals("guinea !pig", a, "guinea -pig");
+ assertQueryEquals("guinea pig*", a, "guinea pig*");
+ assertQueryEquals("guinea pig?", a, "guinea pig?");
+ assertQueryEquals("guinea pig~2", a, "guinea pig~2");
+ assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
+
+ assertQueryEquals("field:guinea pig", a, "guinea pig");
+ assertQueryEquals("guinea field:pig", a, "guinea pig");
+
+ assertQueryEquals("NOT guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea NOT pig", a, "guinea -pig");
+
+ assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+ assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+
+ assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
+ assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
+
+ assertQueryEquals("\"guinea\" pig", a, "guinea pig");
+ assertQueryEquals("guinea \"pig\"", a, "guinea pig");
+
+ assertQueryEquals("(guinea) pig", a, "guinea pig");
+ assertQueryEquals("guinea (pig)", a, "guinea pig");
+
+ assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
+ assertQueryEquals("guinea /pig/", a, "guinea /pig/");
+
+ // Operators should not interrupt multiword analysis if not don't associate
+ assertQueryEquals("(guinea pig)", a, "Synonym(cavy guinea) pig");
+ assertQueryEquals("+(guinea pig)", a, "+(Synonym(cavy guinea) pig)");
+ assertQueryEquals("-(guinea pig)", a, "-(Synonym(cavy guinea) pig)");
+ assertQueryEquals("!(guinea pig)", a, "-(Synonym(cavy guinea) pig)");
+ assertQueryEquals("NOT (guinea pig)", a, "-(Synonym(cavy guinea) pig)");
+ assertQueryEquals("(guinea pig)^2", a, "(Synonym(cavy guinea) pig)^2.0");
+
+ assertQueryEquals("field:(guinea pig)", a, "Synonym(cavy guinea) pig");
+
+ assertQueryEquals("+small guinea pig", a, "+small Synonym(cavy guinea) pig");
+ assertQueryEquals("-small guinea pig", a, "-small Synonym(cavy guinea) pig");
+ assertQueryEquals("!small guinea pig", a, "-small Synonym(cavy guinea) pig");
+ assertQueryEquals("NOT small guinea pig", a, "-small Synonym(cavy guinea) pig");
+ assertQueryEquals("small* guinea pig", a, "small* Synonym(cavy guinea) pig");
+ assertQueryEquals("small? guinea pig", a, "small? Synonym(cavy guinea) pig");
+ assertQueryEquals("\"small\" guinea pig", a, "small Synonym(cavy guinea) pig");
+
+ assertQueryEquals("guinea pig +running", a, "Synonym(cavy guinea) pig +running");
+ assertQueryEquals("guinea pig -running", a, "Synonym(cavy guinea) pig -running");
+ assertQueryEquals("guinea pig !running", a, "Synonym(cavy guinea) pig -running");
+ assertQueryEquals("guinea pig NOT running", a, "Synonym(cavy guinea) pig -running");
+ assertQueryEquals("guinea pig running*", a, "Synonym(cavy guinea) pig running*");
+ assertQueryEquals("guinea pig running?", a, "Synonym(cavy guinea) pig running?");
+ assertQueryEquals("guinea pig \"running\"", a, "Synonym(cavy guinea) pig running");
+
+ assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2");
+
+ assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\"");
+
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+
+ public void testOperatorsAndMultiWordSynonymsSplitOnWhitespace() throws Exception {
+ Analyzer a = new MockSynonymAnalyzer();
+
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = true;
+
+ assertQueryEquals("+guinea pig", a, "+guinea pig");
+ assertQueryEquals("-guinea pig", a, "-guinea pig");
+ assertQueryEquals("!guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea* pig", a, "guinea* pig");
+ assertQueryEquals("guinea? pig", a, "guinea? pig");
+ assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
+ assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
+
+ assertQueryEquals("guinea +pig", a, "guinea +pig");
+ assertQueryEquals("guinea -pig", a, "guinea -pig");
+ assertQueryEquals("guinea !pig", a, "guinea -pig");
+ assertQueryEquals("guinea pig*", a, "guinea pig*");
+ assertQueryEquals("guinea pig?", a, "guinea pig?");
+ assertQueryEquals("guinea pig~2", a, "guinea pig~2");
+ assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
+
+ assertQueryEquals("field:guinea pig", a, "guinea pig");
+ assertQueryEquals("guinea field:pig", a, "guinea pig");
+
+ assertQueryEquals("NOT guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea NOT pig", a, "guinea -pig");
+
+ assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+ assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+
+ assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
+ assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
+
+ assertQueryEquals("\"guinea\" pig", a, "guinea pig");
+ assertQueryEquals("guinea \"pig\"", a, "guinea pig");
+
+ assertQueryEquals("(guinea) pig", a, "guinea pig");
+ assertQueryEquals("guinea (pig)", a, "guinea pig");
+
+ assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
+ assertQueryEquals("guinea /pig/", a, "guinea /pig/");
+
+ assertQueryEquals("(guinea pig)", a, "guinea pig");
+ assertQueryEquals("+(guinea pig)", a, "+(guinea pig)");
+ assertQueryEquals("-(guinea pig)", a, "-(guinea pig)");
+ assertQueryEquals("!(guinea pig)", a, "-(guinea pig)");
+ assertQueryEquals("NOT (guinea pig)", a, "-(guinea pig)");
+ assertQueryEquals("(guinea pig)^2", a, "(guinea pig)^2.0");
+
+ assertQueryEquals("field:(guinea pig)", a, "guinea pig");
+
+ assertQueryEquals("+small guinea pig", a, "+small guinea pig");
+ assertQueryEquals("-small guinea pig", a, "-small guinea pig");
+ assertQueryEquals("!small guinea pig", a, "-small guinea pig");
+ assertQueryEquals("NOT small guinea pig", a, "-small guinea pig");
+ assertQueryEquals("small* guinea pig", a, "small* guinea pig");
+ assertQueryEquals("small? guinea pig", a, "small? guinea pig");
+ assertQueryEquals("\"small\" guinea pig", a, "small guinea pig");
+
+ assertQueryEquals("guinea pig +running", a, "guinea pig +running");
+ assertQueryEquals("guinea pig -running", a, "guinea pig -running");
+ assertQueryEquals("guinea pig !running", a, "guinea pig -running");
+ assertQueryEquals("guinea pig NOT running", a, "guinea pig -running");
+ assertQueryEquals("guinea pig running*", a, "guinea pig running*");
+ assertQueryEquals("guinea pig running?", a, "guinea pig running?");
+ assertQueryEquals("guinea pig \"running\"", a, "guinea pig running");
+
+ assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2");
+
+ assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\"");
+
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+
+ public void testDefaultSplitOnWhitespace() throws Exception {
+ QueryParser parser = new QueryParser("field", new Analyzer1());
+
+ assertTrue(parser.getSplitOnWhitespace()); // default is true
+
+ BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "guinea")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD);
+ assertEquals(bqBuilder.build(), parser.parse("guinea pig"));
+
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
+ assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig");
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
index 785dd1c..934a4da 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
@@ -50,6 +50,7 @@ public class TestExtendableQueryParser extends TestQueryParser {
getDefaultField(), a) : new ExtendableQueryParser(
getDefaultField(), a, extensions);
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
+ qp.setSplitOnWhitespace(splitOnWhitespace);
return qp;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
index 25c737f..78d2bfd 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
@@ -203,4 +203,15 @@ public class TestStandardQP extends QueryParserTestBase {
//TODO test something like "SmartQueryParser()"
}
+ // TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
+ @Override
+ public void testQPA() throws Exception {
+ super.testQPA();
+
+ assertQueryEquals("term phrase term", qpAnalyzer, "term (phrase1 phrase2) term");
+
+ CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
+ setDefaultOperatorAND(cqpc);
+ assertQueryEquals(cqpc, "field", "term phrase term", "+term +(+phrase1 +phrase2) +term");
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
index 70dc15a..f1eccf4 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
@@ -27,7 +27,6 @@ import java.util.TimeZone;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -535,8 +534,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
assertQueryEquals("term -(stop) term", qpAnalyzer, "term term");
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
- assertQueryEquals("term phrase term", qpAnalyzer,
- "term (phrase1 phrase2) term");
+
+// TODO: Re-enable once flexible standard parser gets multi-word synonym support
+// assertQueryEquals("term phrase term", qpAnalyzer,
+// "term phrase1 phrase2 term");
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
"+term -(phrase1 phrase2) term");
assertQueryEquals("stop^3", qpAnalyzer, "");
@@ -552,8 +553,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
setDefaultOperatorAND(cqpc);
- assertQueryEquals(cqpc, "field", "term phrase term",
- "+term +(+phrase1 +phrase2) +term");
+// TODO: Re-enable once flexible standard parser gets multi-word synonym support
+// assertQueryEquals(cqpc, "field", "term phrase term",
+// "+term +phrase1 +phrase2 +term");
assertQueryEquals(cqpc, "field", "phrase",
"+phrase1 +phrase2");
}
@@ -1101,37 +1103,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
dir.close();
}
- /**
- * adds synonym of "dog" for "dogs".
- */
- protected static class MockSynonymFilter extends TokenFilter {
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- boolean addSynonym = false;
-
- public MockSynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public final boolean incrementToken() throws IOException {
- if (addSynonym) { // inject our synonym
- clearAttributes();
- termAtt.setEmpty().append("dog");
- posIncAtt.setPositionIncrement(0);
- addSynonym = false;
- return true;
- }
-
- if (input.incrementToken()) {
- addSynonym = termAtt.toString().equals("dogs");
- return true;
- } else {
- return false;
- }
- }
- }
-
/** whitespace+lowercase analyzer with synonyms */
protected class Analyzer1 extends Analyzer {
public Analyzer1(){
@@ -1251,10 +1222,8 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
CharacterRunAutomaton stopStopList =
new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
- CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
-
- qp = getParserConfig(
- new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
+ CommonQueryParserConfiguration qp
+ = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
qp.setEnablePositionIncrements(true);
PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java
new file mode 100644
index 0000000..a2ce33e
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java
@@ -0,0 +1,28 @@
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
+public class MockSynonymAnalyzer extends Analyzer {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ MockTokenizer tokenizer = new MockTokenizer();
+ return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7d092fac/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java
new file mode 100644
index 0000000..b50be07
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java
@@ -0,0 +1,97 @@
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
+public class MockSynonymFilter extends TokenFilter {
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
+ List<AttributeSource> tokenQueue = new ArrayList<>();
+ boolean endOfInput = false;
+
+ public MockSynonymFilter(TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ tokenQueue.clear();
+ endOfInput = false;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (tokenQueue.size() > 0) {
+ tokenQueue.remove(0).copyTo(this);
+ return true;
+ }
+ if (endOfInput == false && input.incrementToken()) {
+ if (termAtt.toString().equals("dogs")) {
+ addSynonymAndRestoreOrigToken("dog", 1, offsetAtt.endOffset());
+ } else if (termAtt.toString().equals("guinea")) {
+ AttributeSource firstSavedToken = cloneAttributes();
+ if (input.incrementToken()) {
+ if (termAtt.toString().equals("pig")) {
+ AttributeSource secondSavedToken = cloneAttributes();
+ int secondEndOffset = offsetAtt.endOffset();
+ firstSavedToken.copyTo(this);
+ addSynonym("cavy", 2, secondEndOffset);
+ tokenQueue.add(secondSavedToken);
+ } else if (termAtt.toString().equals("dogs")) {
+ tokenQueue.add(cloneAttributes());
+ addSynonym("dog", 1, offsetAtt.endOffset());
+ }
+ } else {
+ endOfInput = true;
+ }
+ firstSavedToken.copyTo(this);
+ }
+ return true;
+ } else {
+ endOfInput = true;
+ return false;
+ }
+ }
+ private void addSynonym(String synonymText, int posLen, int endOffset) {
+ termAtt.setEmpty().append(synonymText);
+ posIncAtt.setPositionIncrement(0);
+ posLenAtt.setPositionLength(posLen);
+ offsetAtt.setOffset(offsetAtt.startOffset(), endOffset);
+ tokenQueue.add(cloneAttributes());
+ }
+ private void addSynonymAndRestoreOrigToken(String synonymText, int posLen, int endOffset) {
+ AttributeSource origToken = cloneAttributes();
+ addSynonym(synonymText, posLen, endOffset);
+ origToken.copyTo(this);
+ }
+}
+
+