You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by go...@apache.org on 2004/10/01 11:59:23 UTC
cvs commit: jakarta-lucene/src/test/org/apache/lucene/queryParser TestQueryParser.java
goller 2004/10/01 02:59:23
Modified: src/java/org/apache/lucene/queryParser Tag: lucene_1_4_2_dev
QueryParser.java QueryParser.jj
QueryParserConstants.java
QueryParserTokenManager.java
src/test/org/apache/lucene/queryParser Tag: lucene_1_4_2_dev
TestQueryParser.java
Log:
Fix for ArrayIndexOutOfBoundsException inQueryParser
(patch #9110), some unused method
parameters removed, minimum similarity for FuzzyQuery.
(Backport)
Revision Changes Path
No revision
No revision
1.11.2.1 +84 -45 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
Index: QueryParser.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java,v
retrieving revision 1.11
retrieving revision 1.11.2.1
diff -u -r1.11 -r1.11.2.1
--- QueryParser.java 22 May 2004 17:34:31 -0000 1.11
+++ QueryParser.java 1 Oct 2004 09:59:23 -0000 1.11.2.1
@@ -73,6 +73,7 @@
Analyzer analyzer;
String field;
int phraseSlop = 0;
+ float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@@ -115,6 +116,33 @@
}
}
+ /**
+ * @return Returns the analyzer.
+ */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ /**
+ * @return Returns the field.
+ */
+ public String getField() {
+ return field;
+ }
+
+ /**
+ * Get the default minimal similarity for fuzzy queries.
+ */
+ public float getFuzzyMinSim() {
+ return fuzzyMinSim;
+ }
+ /**
+ *Set the default minimum similarity for fuzzy queries.
+ */
+ public void setFuzzyMinSim(float fuzzyMinSim) {
+ this.fuzzyMinSim = fuzzyMinSim;
+ }
+
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero.
@@ -172,18 +200,18 @@
return locale;
}
- protected void addClause(Vector clauses, int conj, int mods, Query q) {
+ protected void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
- if (conj == CONJ_AND) {
+ if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited)
c.required = true;
}
- if (operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
+ if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
@@ -218,9 +246,7 @@
/**
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText) throws ParseException {
+ protected Query getFieldQuery(String field, String queryText) throws ParseException {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
@@ -262,17 +288,15 @@
}
/**
- * Base implementation delegates to {@link #getFieldQuery(String,Analyzer,String)}.
+ * Base implementation delegates to {@link #getFieldQuery(String,String)}.
* This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText,
- int slop) throws ParseException {
- Query query = getFieldQuery(field, analyzer, queryText);
+ protected Query getFieldQuery(String field, String queryText, int slop)
+ throws ParseException {
+ Query query = getFieldQuery(field, queryText);
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
@@ -285,7 +309,6 @@
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
- Analyzer analyzer,
String part1,
String part2,
boolean inclusive) throws ParseException
@@ -400,10 +423,10 @@
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFuzzyQuery(String field, String termStr) throws ParseException
+ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
Term t = new Term(field, termStr);
- return new FuzzyQuery(t);
+ return new FuzzyQuery(t, minSimilarity);
}
/**
@@ -422,6 +445,25 @@
return new String(caDest, 0, j);
}
+ /**
+ * Returns a String where those characters that QueryParser
+ * expects to be escaped are escaped, i.e. preceded by a <code>\</code>.
+ */
+ public static String escape(String s) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ // NOTE: keep this in sync with _ESCAPED_CHAR below!
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
+ || c == '*' || c == '?') {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
public static void main(String[] args) throws Exception {
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
@@ -587,7 +629,7 @@
}
final public Query Term(String field) throws ParseException {
- Token term, boost=null, slop=null, goop1, goop2;
+ Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
@@ -619,9 +661,9 @@
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case FUZZY:
- jj_consume_token(FUZZY);
- fuzzy=true;
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
break;
default:
jj_la1[8] = jj_gen;
@@ -632,9 +674,9 @@
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case FUZZY:
- jj_consume_token(FUZZY);
- fuzzy=true;
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
break;
default:
jj_la1[9] = jj_gen;
@@ -653,9 +695,16 @@
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) {
- q = getFuzzyQuery(field, termImage);
+ float fms = fuzzyMinSim;
+ try {
+ fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
+ } catch (Exception ignored) { }
+ if(fms < 0.0f || fms > 1.0f){
+ {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");}
+ }
+ q = getFuzzyQuery(field, termImage, fms);
} else {
- q = getFieldQuery(field, analyzer, termImage);
+ q = getFieldQuery(field, termImage);
}
break;
case RANGEIN_START:
@@ -712,7 +761,7 @@
} else {
goop2.image = discardEscapeChar(goop2.image);
}
- q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
+ q = getRangeQuery(field, goop1.image, goop2.image, true);
break;
case RANGEEX_START:
jj_consume_token(RANGEEX_START);
@@ -769,13 +818,13 @@
goop2.image = discardEscapeChar(goop2.image);
}
- q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
+ q = getRangeQuery(field, goop1.image, goop2.image, false);
break;
case QUOTED:
term = jj_consume_token(QUOTED);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case SLOP:
- slop = jj_consume_token(SLOP);
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
break;
default:
jj_la1[19] = jj_gen;
@@ -792,15 +841,13 @@
}
int s = phraseSlop;
- if (slop != null) {
+ if (fuzzySlop != null) {
try {
- s = Float.valueOf(slop.image.substring(1)).intValue();
+ s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
}
catch (Exception ignored) { }
}
- q = getFieldQuery(field, analyzer,
- term.image.substring(1, term.image.length()-1),
- s);
+ q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
break;
default:
jj_la1[21] = jj_gen;
@@ -850,16 +897,11 @@
private int jj_gen;
final private int[] jj_la1 = new int[22];
static private int[] jj_la1_0;
- static private int[] jj_la1_1;
static {
jj_la1_0();
- jj_la1_1();
}
private static void jj_la1_0() {
- jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1f31f80,0x8000,0x1f31000,0x1320000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x80000,0x8000,0x1f30000,};
- }
- private static void jj_la1_1() {
- jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,};
+ jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@@ -1008,8 +1050,8 @@
public ParseException generateParseException() {
jj_expentries.removeAllElements();
- boolean[] la1tokens = new boolean[33];
- for (int i = 0; i < 33; i++) {
+ boolean[] la1tokens = new boolean[32];
+ for (int i = 0; i < 32; i++) {
la1tokens[i] = false;
}
if (jj_kind >= 0) {
@@ -1022,13 +1064,10 @@
if ((jj_la1_0[i] & (1<<j)) != 0) {
la1tokens[j] = true;
}
- if ((jj_la1_1[i] & (1<<j)) != 0) {
- la1tokens[32+j] = true;
- }
}
}
}
- for (int i = 0; i < 33; i++) {
+ for (int i = 0; i < 32; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
1.43.2.1 +78 -31 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
Index: QueryParser.jj
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
retrieving revision 1.43
retrieving revision 1.43.2.1
diff -u -r1.43 -r1.43.2.1
--- QueryParser.jj 22 May 2004 17:34:31 -0000 1.43
+++ QueryParser.jj 1 Oct 2004 09:59:23 -0000 1.43.2.1
@@ -96,6 +96,7 @@
Analyzer analyzer;
String field;
int phraseSlop = 0;
+ float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@@ -137,6 +138,33 @@
throw new ParseException("Too many boolean clauses");
}
}
+
+ /**
+ * @return Returns the analyzer.
+ */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ /**
+ * @return Returns the field.
+ */
+ public String getField() {
+ return field;
+ }
+
+ /**
+ * Get the default minimal similarity for fuzzy queries.
+ */
+ public float getFuzzyMinSim() {
+ return fuzzyMinSim;
+ }
+ /**
+ *Set the default minimum similarity for fuzzy queries.
+ */
+ public void setFuzzyMinSim(float fuzzyMinSim) {
+ this.fuzzyMinSim = fuzzyMinSim;
+ }
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
@@ -194,19 +222,19 @@
public Locale getLocale() {
return locale;
}
-
- protected void addClause(Vector clauses, int conj, int mods, Query q) {
+
+ protected void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
- if (conj == CONJ_AND) {
+ if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited)
c.required = true;
}
- if (operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
+ if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
@@ -241,9 +269,7 @@
/**
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText) throws ParseException {
+ protected Query getFieldQuery(String field, String queryText) throws ParseException {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
@@ -285,17 +311,15 @@
}
/**
- * Base implementation delegates to {@link #getFieldQuery(String,Analyzer,String)}.
+ * Base implementation delegates to {@link #getFieldQuery(String,String)}.
* This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText,
- int slop) throws ParseException {
- Query query = getFieldQuery(field, analyzer, queryText);
+ protected Query getFieldQuery(String field, String queryText, int slop)
+ throws ParseException {
+ Query query = getFieldQuery(field, queryText);
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
@@ -308,7 +332,6 @@
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
- Analyzer analyzer,
String part1,
String part2,
boolean inclusive) throws ParseException
@@ -423,10 +446,10 @@
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
- protected Query getFuzzyQuery(String field, String termStr) throws ParseException
+ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
Term t = new Term(field, termStr);
- return new FuzzyQuery(t);
+ return new FuzzyQuery(t, minSimilarity);
}
/**
@@ -445,6 +468,25 @@
return new String(caDest, 0, j);
}
+ /**
+ * Returns a String where those characters that QueryParser
+ * expects to be escaped are escaped, i.e. preceded by a <code>\</code>.
+ */
+ public static String escape(String s) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ // NOTE: keep this in sync with _ESCAPED_CHAR below!
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
+ || c == '*' || c == '?') {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
public static void main(String[] args) throws Exception {
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
@@ -461,6 +503,7 @@
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
+// NOTE: keep this in sync with escape(String) above!
| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
@@ -495,8 +538,7 @@
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (~["\""])+ "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
-| <FUZZY: "~" >
-| <SLOP: "~" (<_NUM_CHAR>)+ >
+| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
@@ -605,7 +647,7 @@
Query Term(String field) : {
- Token term, boost=null, slop=null, goop1, goop2;
+ Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
@@ -620,8 +662,8 @@
| term=<WILDTERM> { wildcard=true; }
| term=<NUMBER>
)
- [ <FUZZY> { fuzzy=true; } ]
- [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
+ [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
+ [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
{
String termImage=discardEscapeChar(term.image);
if (wildcard) {
@@ -631,9 +673,16 @@
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) {
- q = getFuzzyQuery(field, termImage);
+ float fms = fuzzyMinSim;
+ try {
+ fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
+ } catch (Exception ignored) { }
+ if(fms < 0.0f || fms > 1.0f){
+ throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
+ }
+ q = getFuzzyQuery(field, termImage, fms);
} else {
- q = getFieldQuery(field, analyzer, termImage);
+ q = getFieldQuery(field, termImage);
}
}
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
@@ -651,7 +700,7 @@
} else {
goop2.image = discardEscapeChar(goop2.image);
}
- q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
+ q = getRangeQuery(field, goop1.image, goop2.image, true);
}
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
@@ -669,23 +718,21 @@
goop2.image = discardEscapeChar(goop2.image);
}
- q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
+ q = getRangeQuery(field, goop1.image, goop2.image, false);
}
| term=<QUOTED>
- [ slop=<SLOP> ]
+ [ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
int s = phraseSlop;
- if (slop != null) {
+ if (fuzzySlop != null) {
try {
- s = Float.valueOf(slop.image.substring(1)).intValue();
+ s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
}
catch (Exception ignored) { }
}
- q = getFieldQuery(field, analyzer,
- term.image.substring(1, term.image.length()-1),
- s);
+ q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
}
)
{
1.1.2.1 +15 -17 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java
Index: QueryParserConstants.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java,v
retrieving revision 1.1
retrieving revision 1.1.2.1
diff -u -r1.1 -r1.1.2.1
--- QueryParserConstants.java 11 Sep 2003 01:51:33 -0000 1.1
+++ QueryParserConstants.java 1 Oct 2004 09:59:23 -0000 1.1.2.1
@@ -20,21 +20,20 @@
int CARAT = 15;
int QUOTED = 16;
int TERM = 17;
- int FUZZY = 18;
- int SLOP = 19;
- int PREFIXTERM = 20;
- int WILDTERM = 21;
- int RANGEIN_START = 22;
- int RANGEEX_START = 23;
- int NUMBER = 24;
- int RANGEIN_TO = 25;
- int RANGEIN_END = 26;
- int RANGEIN_QUOTED = 27;
- int RANGEIN_GOOP = 28;
- int RANGEEX_TO = 29;
- int RANGEEX_END = 30;
- int RANGEEX_QUOTED = 31;
- int RANGEEX_GOOP = 32;
+ int FUZZY_SLOP = 18;
+ int PREFIXTERM = 19;
+ int WILDTERM = 20;
+ int RANGEIN_START = 21;
+ int RANGEEX_START = 22;
+ int NUMBER = 23;
+ int RANGEIN_TO = 24;
+ int RANGEIN_END = 25;
+ int RANGEIN_QUOTED = 26;
+ int RANGEIN_GOOP = 27;
+ int RANGEEX_TO = 28;
+ int RANGEEX_END = 29;
+ int RANGEEX_QUOTED = 30;
+ int RANGEEX_GOOP = 31;
int Boost = 0;
int RangeEx = 1;
@@ -60,8 +59,7 @@
"\"^\"",
"<QUOTED>",
"<TERM>",
- "\"~\"",
- "<SLOP>",
+ "<FUZZY_SLOP>",
"<PREFIXTERM>",
"<WILDTERM>",
"\"[\"",
1.3.2.1 +133 -117 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
Index: QueryParserTokenManager.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java,v
retrieving revision 1.3
retrieving revision 1.3.2.1
diff -u -r1.3 -r1.3.2.1
--- QueryParserTokenManager.java 24 Mar 2004 10:12:27 -0000 1.3
+++ QueryParserTokenManager.java 1 Oct 2004 09:59:23 -0000 1.3.2.1
@@ -54,13 +54,11 @@
case 58:
return jjStopAtPos(0, 14);
case 91:
- return jjStopAtPos(0, 22);
+ return jjStopAtPos(0, 21);
case 94:
return jjStopAtPos(0, 15);
case 123:
- return jjStopAtPos(0, 23);
- case 126:
- return jjStartNfaWithStates_3(0, 18, 18);
+ return jjStopAtPos(0, 22);
default :
return jjMoveNfa_3(0, 0);
}
@@ -105,7 +103,7 @@
{
int[] nextStates;
int startsAt = 0;
- jjnewStateCnt = 31;
+ jjnewStateCnt = 33;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
@@ -169,56 +167,67 @@
case 18:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 19)
- kind = 19;
- jjstateSet[jjnewStateCnt++] = 18;
+ if (kind > 18)
+ kind = 18;
+ jjAddStates(7, 8);
break;
case 19:
+ if (curChar == 46)
+ jjCheckNAdd(20);
+ break;
+ case 20:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 18)
+ kind = 18;
+ jjCheckNAdd(20);
+ break;
+ case 21:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
- case 20:
+ case 22:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(20, 21);
+ jjCheckNAddTwoStates(22, 23);
break;
- case 22:
+ case 24:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(20, 21);
+ jjCheckNAddTwoStates(22, 23);
break;
- case 23:
+ case 25:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
- jjCheckNAddStates(7, 9);
- break;
- case 24:
- if (curChar == 42 && kind > 20)
- kind = 20;
+ jjCheckNAddStates(9, 11);
break;
case 26:
+ if (curChar == 42 && kind > 19)
+ kind = 19;
+ break;
+ case 28:
if ((0x84002f0600000000L & l) != 0L)
- jjCheckNAddStates(7, 9);
+ jjCheckNAddStates(9, 11);
break;
- case 27:
+ case 29:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
- if (kind > 21)
- kind = 21;
- jjCheckNAddTwoStates(27, 28);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(29, 30);
break;
- case 29:
+ case 31:
if ((0x84002f0600000000L & l) == 0L)
break;
- if (kind > 21)
- kind = 21;
- jjCheckNAddTwoStates(27, 28);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(29, 30);
break;
default : break;
}
@@ -239,9 +248,13 @@
jjCheckNAddStates(0, 6);
}
else if (curChar == 126)
+ {
+ if (kind > 18)
+ kind = 18;
jjstateSet[jjnewStateCnt++] = 18;
+ }
if (curChar == 92)
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(12, 14);
else if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
@@ -292,70 +305,73 @@
jjstateSet[jjnewStateCnt++] = 11;
break;
case 15:
- jjAddStates(13, 14);
+ jjAddStates(15, 16);
break;
case 17:
- if (curChar == 126)
- jjstateSet[jjnewStateCnt++] = 18;
+ if (curChar != 126)
+ break;
+ if (kind > 18)
+ kind = 18;
+ jjstateSet[jjnewStateCnt++] = 18;
break;
- case 19:
+ case 21:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
- case 20:
+ case 22:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(20, 21);
+ jjCheckNAddTwoStates(22, 23);
break;
- case 21:
+ case 23:
if (curChar == 92)
- jjCheckNAddTwoStates(22, 22);
+ jjCheckNAddTwoStates(24, 24);
break;
- case 22:
+ case 24:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(20, 21);
+ jjCheckNAddTwoStates(22, 23);
break;
- case 23:
+ case 25:
if ((0x97ffffff97ffffffL & l) != 0L)
- jjCheckNAddStates(7, 9);
+ jjCheckNAddStates(9, 11);
break;
- case 25:
+ case 27:
if (curChar == 92)
- jjCheckNAddTwoStates(26, 26);
+ jjCheckNAddTwoStates(28, 28);
break;
- case 26:
+ case 28:
if ((0x6800000078000000L & l) != 0L)
- jjCheckNAddStates(7, 9);
+ jjCheckNAddStates(9, 11);
break;
- case 27:
+ case 29:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
- if (kind > 21)
- kind = 21;
- jjCheckNAddTwoStates(27, 28);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(29, 30);
break;
- case 28:
+ case 30:
if (curChar == 92)
- jjCheckNAddTwoStates(29, 29);
+ jjCheckNAddTwoStates(31, 31);
break;
- case 29:
+ case 31:
if ((0x6800000078000000L & l) == 0L)
break;
- if (kind > 21)
- kind = 21;
- jjCheckNAddTwoStates(27, 28);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(29, 30);
break;
- case 30:
+ case 32:
if (curChar == 92)
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(12, 14);
break;
default : break;
}
@@ -381,25 +397,25 @@
break;
case 15:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(13, 14);
+ jjAddStates(15, 16);
break;
- case 20:
+ case 22:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(20, 21);
+ jjCheckNAddTwoStates(22, 23);
break;
- case 23:
+ case 25:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjCheckNAddStates(7, 9);
+ jjCheckNAddStates(9, 11);
break;
- case 27:
+ case 29:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
- if (kind > 21)
- kind = 21;
- jjCheckNAddTwoStates(27, 28);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(29, 30);
break;
default : break;
}
@@ -412,7 +428,7 @@
kind = 0x7fffffff;
}
++curPos;
- if ((i = jjnewStateCnt) == (startsAt = 31 - (jjnewStateCnt = startsAt)))
+ if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@@ -423,9 +439,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x20000000L) != 0L)
+ if ((active0 & 0x10000000L) != 0L)
{
- jjmatchedKind = 32;
+ jjmatchedKind = 31;
return 4;
}
return -1;
@@ -450,9 +466,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_1(0x20000000L);
+ return jjMoveStringLiteralDfa1_1(0x10000000L);
case 125:
- return jjStopAtPos(0, 30);
+ return jjStopAtPos(0, 29);
default :
return jjMoveNfa_1(0, 0);
}
@@ -467,8 +483,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x20000000L) != 0L)
- return jjStartNfaWithStates_1(1, 29, 4);
+ if ((active0 & 0x10000000L) != 0L)
+ return jjStartNfaWithStates_1(1, 28, 4);
break;
default :
break;
@@ -497,8 +513,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 32)
- kind = 32;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@@ -518,14 +534,14 @@
jjCheckNAddTwoStates(2, 3);
break;
case 3:
- if (curChar == 34 && kind > 31)
- kind = 31;
+ if (curChar == 34 && kind > 30)
+ kind = 30;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 32)
- kind = 32;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(4);
break;
default : break;
@@ -543,12 +559,12 @@
case 4:
if ((0xdfffffffffffffffL & l) == 0L)
break;
- if (kind > 32)
- kind = 32;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(4);
break;
case 2:
- jjAddStates(15, 16);
+ jjAddStates(17, 18);
break;
default : break;
}
@@ -569,13 +585,13 @@
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
- if (kind > 32)
- kind = 32;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(15, 16);
+ jjAddStates(17, 18);
break;
default : break;
}
@@ -620,9 +636,9 @@
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 24)
- kind = 24;
- jjAddStates(17, 18);
+ if (kind > 23)
+ kind = 23;
+ jjAddStates(19, 20);
break;
case 1:
if (curChar == 46)
@@ -631,8 +647,8 @@
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 24)
- kind = 24;
+ if (kind > 23)
+ kind = 23;
jjCheckNAdd(2);
break;
default : break;
@@ -683,9 +699,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x2000000L) != 0L)
+ if ((active0 & 0x1000000L) != 0L)
{
- jjmatchedKind = 28;
+ jjmatchedKind = 27;
return 4;
}
return -1;
@@ -710,9 +726,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_2(0x2000000L);
+ return jjMoveStringLiteralDfa1_2(0x1000000L);
case 93:
- return jjStopAtPos(0, 26);
+ return jjStopAtPos(0, 25);
default :
return jjMoveNfa_2(0, 0);
}
@@ -727,8 +743,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x2000000L) != 0L)
- return jjStartNfaWithStates_2(1, 25, 4);
+ if ((active0 & 0x1000000L) != 0L)
+ return jjStartNfaWithStates_2(1, 24, 4);
break;
default :
break;
@@ -757,8 +773,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 28)
- kind = 28;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@@ -778,14 +794,14 @@
jjCheckNAddTwoStates(2, 3);
break;
case 3:
- if (curChar == 34 && kind > 27)
- kind = 27;
+ if (curChar == 34 && kind > 26)
+ kind = 26;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 28)
- kind = 28;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(4);
break;
default : break;
@@ -803,12 +819,12 @@
case 4:
if ((0xffffffffdfffffffL & l) == 0L)
break;
- if (kind > 28)
- kind = 28;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(4);
break;
case 2:
- jjAddStates(15, 16);
+ jjAddStates(17, 18);
break;
default : break;
}
@@ -829,13 +845,13 @@
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
- if (kind > 28)
- kind = 28;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(15, 16);
+ jjAddStates(17, 18);
break;
default : break;
}
@@ -855,8 +871,8 @@
}
}
static final int[] jjnextStates = {
- 20, 23, 24, 27, 28, 25, 21, 23, 24, 25, 22, 26, 29, 15, 16, 2,
- 3, 0, 1,
+ 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15,
+ 16, 2, 3, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@@ -872,8 +888,8 @@
}
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50",
-"\51", "\72", "\136", null, null, "\176", null, null, null, "\133", "\173", null,
-"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
+"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117",
+"\135", null, null, "\124\117", "\175", null, null, };
public static final String[] lexStateNames = {
"Boost",
"RangeEx",
@@ -881,18 +897,18 @@
"DEFAULT",
};
public static final int[] jjnewLexState = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3,
- -1, 3, -1, -1, -1, 3, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1,
+ 3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
- 0x1ffffff81L,
+ 0xffffff81L,
};
static final long[] jjtoSkip = {
0x40L,
};
protected CharStream input_stream;
-private final int[] jjrounds = new int[31];
-private final int[] jjstateSet = new int[62];
+private final int[] jjrounds = new int[33];
+private final int[] jjstateSet = new int[66];
protected char curChar;
public QueryParserTokenManager(CharStream stream)
{
@@ -914,7 +930,7 @@
{
int i;
jjround = 0x80000001;
- for (i = 31; i-- > 0;)
+ for (i = 33; i-- > 0;)
jjrounds[i] = 0x80000000;
}
public void ReInit(CharStream stream, int lexState)
No revision
No revision
1.26.2.1 +54 -37 jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
Index: TestQueryParser.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v
retrieving revision 1.26
retrieving revision 1.26.2.1
diff -u -r1.26 -r1.26.2.1
--- TestQueryParser.java 30 May 2004 20:24:20 -0000 1.26
+++ TestQueryParser.java 1 Oct 2004 09:59:23 -0000 1.26.2.1
@@ -89,7 +89,7 @@
super(f, a);
}
- protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
+ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
throw new ParseException("Fuzzy queries not allowed");
}
@@ -235,15 +235,29 @@
public void testWildcard() throws Exception {
assertQueryEquals("term*", null, "term*");
assertQueryEquals("term*^2", null, "term*^2.0");
- assertQueryEquals("term~", null, "term~");
- assertQueryEquals("term~^2", null, "term^2.0~");
- assertQueryEquals("term^2~", null, "term^2.0~");
+ assertQueryEquals("term~", null, "term~0.5");
+ assertQueryEquals("term~0.7", null, "term~0.7");
+ assertQueryEquals("term~^2", null, "term^2.0~0.5");
+ assertQueryEquals("term^2~", null, "term^2.0~0.5");
assertQueryEquals("term*germ", null, "term*germ");
assertQueryEquals("term*germ^3", null, "term*germ^3.0");
assertTrue(getQuery("term*", null) instanceof PrefixQuery);
assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
+ assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
+ FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
+ assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
+ assertEquals(0, fq.getPrefixLength());
+ fq = (FuzzyQuery)getQuery("term~", null);
+ assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
+ assertEquals(0, fq.getPrefixLength());
+ try {
+ getQuery("term~1.1", null); // value > 1, throws exception
+ fail();
+ } catch(ParseException pe) {
+ // expected exception
+ }
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
/* Tests to see that wild card terms are (or are not) properly
@@ -317,7 +331,8 @@
public void testEscaped() throws Exception {
Analyzer a = new WhitespaceAnalyzer();
- /* assertQueryEquals("\\[brackets", a, "\\[brackets");
+
+ /*assertQueryEquals("\\[brackets", a, "\\[brackets");
assertQueryEquals("\\[brackets", null, "brackets");
assertQueryEquals("\\\\", a, "\\\\");
assertQueryEquals("\\+blah", a, "\\+blah");
@@ -337,38 +352,40 @@
assertQueryEquals("\\~blah", a, "\\~blah");
assertQueryEquals("\\*blah", a, "\\*blah");
assertQueryEquals("\\?blah", a, "\\?blah");
- assertQueryEquals("foo \\&& bar", a, "foo \\&& bar");
- assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
- assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); */
-
- assertQueryEquals("a\\-b:c",a,"a-b:c");
- assertQueryEquals("a\\+b:c",a,"a+b:c");
- assertQueryEquals("a\\:b:c",a,"a:b:c");
- assertQueryEquals("a\\\\b:c",a,"a\\b:c");
-
- assertQueryEquals("a:b\\-c",a,"a:b-c");
- assertQueryEquals("a:b\\+c",a,"a:b+c");
- assertQueryEquals("a:b\\:c",a,"a:b:c");
- assertQueryEquals("a:b\\\\c",a,"a:b\\c");
-
- assertQueryEquals("a:b\\-c*",a,"a:b-c*");
- assertQueryEquals("a:b\\+c*",a,"a:b+c*");
- assertQueryEquals("a:b\\:c*",a,"a:b:c*");
- assertQueryEquals("a:b\\\\c*",a,"a:b\\c*");
-
- assertQueryEquals("a:b\\-?c",a,"a:b-?c");
- assertQueryEquals("a:b\\+?c",a,"a:b+?c");
- assertQueryEquals("a:b\\:?c",a,"a:b:?c");
- assertQueryEquals("a:b\\\\?c",a,"a:b\\?c");
-
- assertQueryEquals("a:b\\-c~",a,"a:b-c~");
- assertQueryEquals("a:b\\+c~",a,"a:b+c~");
- assertQueryEquals("a:b\\:c~",a,"a:b:c~");
- assertQueryEquals("a:b\\\\c~",a,"a:b\\c~");
-
- assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
- assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
- assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
+ //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
+ //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
+ //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
+
+ assertQueryEquals("a\\-b:c", a, "a-b:c");
+ assertQueryEquals("a\\+b:c", a, "a+b:c");
+ assertQueryEquals("a\\:b:c", a, "a:b:c");
+ assertQueryEquals("a\\\\b:c", a, "a\\b:c");
+
+ assertQueryEquals("a:b\\-c", a, "a:b-c");
+ assertQueryEquals("a:b\\+c", a, "a:b+c");
+ assertQueryEquals("a:b\\:c", a, "a:b:c");
+ assertQueryEquals("a:b\\\\c", a, "a:b\\c");
+
+ assertQueryEquals("a:b\\-c*", a, "a:b-c*");
+ assertQueryEquals("a:b\\+c*", a, "a:b+c*");
+ assertQueryEquals("a:b\\:c*", a, "a:b:c*");
+
+ assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
+
+ assertQueryEquals("a:b\\-?c", a, "a:b-?c");
+ assertQueryEquals("a:b\\+?c", a, "a:b+?c");
+ assertQueryEquals("a:b\\:?c", a, "a:b:?c");
+
+ assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
+
+ assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
+ assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
+ assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
+ assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
+
+ assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
+ assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
+ assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
}
public void testTabNewlineCarriageReturn()
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org