You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2007/03/08 04:37:13 UTC
svn commit: r515908 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/queryParser/
src/test/org/apache/lucene/queryParser/ src/test/org/apache/lucene/search/
Author: buschmi
Date: Wed Mar 7 19:37:12 2007
New Revision: 515908
URL: http://svn.apache.org/viewvc?view=rev&rev=515908
Log:
LUCENE-800: removed backslash from the TERM_CHAR list in the QueryParser to support escaping of backslashes
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=515908&r1=515907&r2=515908
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Mar 7 19:37:12 2007
@@ -1,4 +1,4 @@
-Lucene Change Log
+Lucene Change Log
$Id$
@@ -47,6 +47,11 @@
FSDirectory.getDirectory() but before IndexReader.open you now get
a FileNotFoundException like Lucene pre-2.1 (before this fix you
got an NPE). (Mike McCandless)
+
+ 7. LUCENE-800: Removed backslash from the TERM_CHAR list in the queryparser,
+ because the backslash is the escape character. Also changed the ESCAPED_CHAR
+ list to contain all possible characters, because every character that
+ follows a backslash should be considered as escaped. (Michael Busch)
New features
Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java?view=diff&rev=515908&r1=515907&r2=515908
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java Wed Mar 7 19:37:12 2007
@@ -794,7 +794,7 @@
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
- // NOTE: keep this in sync with _ESCAPED_CHAR below!
+ // These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?') {
Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?view=diff&rev=515908&r1=515907&r2=515908
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Wed Mar 7 19:37:12 2007
@@ -818,7 +818,7 @@
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
- // NOTE: keep this in sync with _ESCAPED_CHAR below!
+ // These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?') {
@@ -854,11 +854,10 @@
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
-// NOTE: keep this in sync with escape(String) above!
-| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ] >
+// every character that follows a backslash is considered as an escaped character
+| <#_ESCAPED_CHAR: "\\" ~[] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ]
+ "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java?view=diff&rev=515908&r1=515907&r2=515908
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java Wed Mar 7 19:37:12 2007
@@ -220,8 +220,6 @@
jjCheckNAddTwoStates(25, 26);
break;
case 27:
- if ((0x84002f0600000000L & l) == 0L)
- break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
@@ -241,8 +239,6 @@
jjCheckNAddTwoStates(29, 30);
break;
case 31:
- if ((0x84002f0600000000L & l) == 0L)
- break;
if (kind > 18)
kind = 18;
jjCheckNAddTwoStates(29, 30);
@@ -252,8 +248,7 @@
jjCheckNAddStates(10, 12);
break;
case 34:
- if ((0x84002f0600000000L & l) != 0L)
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(10, 12);
break;
default : break;
}
@@ -267,37 +262,37 @@
switch(jjstateSet[--i])
{
case 36:
- if ((0x97ffffff97ffffffL & l) != 0L)
+ if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
}
- if (curChar == 92)
+ else if (curChar == 92)
jjCheckNAddTwoStates(27, 27);
break;
case 0:
- if ((0x97ffffff97ffffffL & l) != 0L)
+ if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 18)
kind = 18;
jjCheckNAddStates(0, 4);
}
+ else if (curChar == 92)
+ jjCheckNAddStates(13, 15);
else if (curChar == 126)
{
if (kind > 19)
kind = 19;
jjstateSet[jjnewStateCnt++] = 20;
}
- if ((0x97ffffff97ffffffL & l) != 0L)
+ if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
}
- if (curChar == 92)
- jjCheckNAddStates(13, 15);
- else if (curChar == 78)
+ if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
jjstateSet[jjnewStateCnt++] = 8;
@@ -361,14 +356,14 @@
jjstateSet[jjnewStateCnt++] = 20;
break;
case 24:
- if ((0x97ffffff97ffffffL & l) == 0L)
+ if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
break;
case 25:
- if ((0x97ffffff97ffffffL & l) == 0L)
+ if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 21)
kind = 21;
@@ -379,21 +374,19 @@
jjCheckNAddTwoStates(27, 27);
break;
case 27:
- if ((0x6800000078000000L & l) == 0L)
- break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
break;
case 28:
- if ((0x97ffffff97ffffffL & l) == 0L)
+ if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 18)
kind = 18;
jjCheckNAddStates(0, 4);
break;
case 29:
- if ((0x97ffffff97ffffffL & l) == 0L)
+ if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 18)
kind = 18;
@@ -404,14 +397,12 @@
jjCheckNAddTwoStates(31, 31);
break;
case 31:
- if ((0x6800000078000000L & l) == 0L)
- break;
if (kind > 18)
kind = 18;
jjCheckNAddTwoStates(29, 30);
break;
case 32:
- if ((0x97ffffff97ffffffL & l) != 0L)
+ if ((0x97ffffff87ffffffL & l) != 0L)
jjCheckNAddStates(10, 12);
break;
case 33:
@@ -419,8 +410,7 @@
jjCheckNAddTwoStates(34, 34);
break;
case 34:
- if ((0x6800000078000000L & l) != 0L)
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(10, 12);
break;
case 35:
if (curChar == 92)
@@ -443,6 +433,7 @@
{
case 36:
case 25:
+ case 27:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 21)
@@ -482,6 +473,7 @@
jjCheckNAddStates(0, 4);
break;
case 29:
+ case 31:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 18)
@@ -489,6 +481,7 @@
jjCheckNAddTwoStates(29, 30);
break;
case 32:
+ case 34:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjCheckNAddStates(10, 12);
break;
Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?view=diff&rev=515908&r1=515907&r2=515908
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Wed Mar 7 19:37:12 2007
@@ -522,6 +522,8 @@
//assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
//assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
+ assertQueryEquals("\\a", a, "a");
+
assertQueryEquals("a\\-b:c", a, "a-b:c");
assertQueryEquals("a\\+b:c", a, "a+b:c");
assertQueryEquals("a\\:b:c", a, "a:b:c");
@@ -584,6 +586,15 @@
try {
assertQueryEquals("XY\\u005", a, "XYZ");
fail("ParseException expected, not thrown");
+ } catch (ParseException expected) {}
+
+ // Tests bug LUCENE-800
+ assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\");
+ assertQueryEquals("\\*", a, "*");
+ assertQueryEquals("\\\\", a, "\\"); // escaped backslash
+ try {
+ assertQueryEquals("\\", a, "\\");
+ fail("ParseException expected not thrown (backslash must be escaped)");
} catch (ParseException expected) {}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java?view=diff&rev=515908&r1=515907&r2=515908
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestWildcard.java Wed Mar 7 19:37:12 2007
@@ -170,13 +170,13 @@
QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer());
qp.setAllowLeadingWildcard(true);
String docs[] = {
- "abcdefg1",
- "hijklmn1",
- "opqrstu1",
+ "\\ abcdefg1",
+ "\\79 hijklmn1",
+ "\\\\ opqrstu1",
};
// queries that should find all docs
String matchAll[] = {
- "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***",
+ "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"
};
// queries that should find no docs
String matchNone[] = {
@@ -184,9 +184,9 @@
};
// queries that should be parsed to prefix queries
String matchOneDocPrefix[][] = {
- {"a*", "ab*", "abc*"}, // these should find only doc 0
- {"h*", "hi*", "hij*"}, // these should find only doc 1
- {"o*", "op*", "opq*"}, // these should find only doc 2
+ {"a*", "ab*", "abc*", }, // these should find only doc 0
+ {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1
+ {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2
};
// queries that should be parsed to wildcard queries
String matchOneDocWild[][] = {
@@ -200,7 +200,7 @@
IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer());
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
- doc.add(new Field(field,docs[i],Store.NO,Index.UN_TOKENIZED));
+ doc.add(new Field(field,docs[i],Store.NO,Index.TOKENIZED));
iw.addDocument(doc);
}
iw.close();