You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2012/06/28 10:57:00 UTC
svn commit: r1354865 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
Author: janhoy
Date: Thu Jun 28 08:56:59 2012
New Revision: 1354865
URL: http://svn.apache.org/viewvc?rev=1354865&view=rev
Log:
SOLR-3467: ExtendedDismax escaping is missing several reserved characters
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1354865&r1=1354864&r2=1354865&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Jun 28 08:56:59 2012
@@ -517,6 +517,9 @@ Bug Fixes
* SOLR-3522: fixed parsing of the 'literal()' function (hossman)
+* SOLR-3467: ExtendedDismax escaping is missing several reserved characters
+ (Michael Dodsworth via janhoy)
+
* SOLR-3548: Fixed a bug in the cachability of queries using the {!join}
parser or the strdist() function, as well as some minor improvements to
the hashCode implementation of {!bbox} and {!geofilt} queries.
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=1354865&r1=1354864&r2=1354865&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Thu Jun 28 08:56:59 2012
@@ -541,81 +541,81 @@ class ExtendedDismaxQParser extends QPar
}
-
- public static CharSequence partialEscape(CharSequence s) {
- StringBuilder sb = new StringBuilder();
-
- int len = s.length();
- for (int i = 0; i < len; i++) {
- char c = s.charAt(i);
- if (c == ':') {
- // look forward to make sure it's something that won't
- // cause a parse exception (something that won't be escaped... like
- // +,-,:, whitespace
- if (i+1<len && i>0) {
- char ch = s.charAt(i+1);
- if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
- // OK, at this point the chars after the ':' will be fine.
- // now look back and try to determine if this is a fieldname
- // [+,-]? [letter,_] [letter digit,_,-,.]*
- // This won't cover *all* possible lucene fieldnames, but we should
- // only pick nice names to begin with
- int start, pos;
- for (start=i-1; start>=0; start--) {
- ch = s.charAt(start);
- if (Character.isWhitespace(ch)) break;
- }
-
- // skip whitespace
- pos = start+1;
-
- // skip leading + or -
- ch = s.charAt(pos);
- if (ch=='+' || ch=='-') {
- pos++;
- }
-
- // we don't need to explicitly check for end of string
- // since ':' will act as our sentinal
-
- // first char can't be '-' or '.'
- ch = s.charAt(pos++);
- if (Character.isJavaIdentifierPart(ch)) {
-
- for(;;) {
- ch = s.charAt(pos++);
- if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
- break;
- }
- }
-
- if (pos<=i) {
- // OK, we got to the ':' and everything looked like a valid fieldname, so
- // don't escape the ':'
- sb.append(':');
- continue; // jump back to start of outer-most loop
- }
-
- }
-
-
- }
- }
-
- // we fell through to here, so we should escape this like other reserved chars.
- sb.append('\\');
- }
- else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
- c == '^' || c == '[' || c == ']' ||
- c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
- )
- {
- sb.append('\\');
- }
- sb.append(c);
- }
- return sb;
- }
+// FIXME: Not in use
+// public static CharSequence partialEscape(CharSequence s) {
+// StringBuilder sb = new StringBuilder();
+//
+// int len = s.length();
+// for (int i = 0; i < len; i++) {
+// char c = s.charAt(i);
+// if (c == ':') {
+// // look forward to make sure it's something that won't
+// // cause a parse exception (something that won't be escaped... like
+// // +,-,:, whitespace
+// if (i+1<len && i>0) {
+// char ch = s.charAt(i+1);
+// if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
+// // OK, at this point the chars after the ':' will be fine.
+// // now look back and try to determine if this is a fieldname
+// // [+,-]? [letter,_] [letter digit,_,-,.]*
+// // This won't cover *all* possible lucene fieldnames, but we should
+// // only pick nice names to begin with
+// int start, pos;
+// for (start=i-1; start>=0; start--) {
+// ch = s.charAt(start);
+// if (Character.isWhitespace(ch)) break;
+// }
+//
+// // skip whitespace
+// pos = start+1;
+//
+// // skip leading + or -
+// ch = s.charAt(pos);
+// if (ch=='+' || ch=='-') {
+// pos++;
+// }
+//
+// // we don't need to explicitly check for end of string
+// // since ':' will act as our sentinal
+//
+// // first char can't be '-' or '.'
+// ch = s.charAt(pos++);
+// if (Character.isJavaIdentifierPart(ch)) {
+//
+// for(;;) {
+// ch = s.charAt(pos++);
+// if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
+// break;
+// }
+// }
+//
+// if (pos<=i) {
+// // OK, we got to the ':' and everything looked like a valid fieldname, so
+// // don't escape the ':'
+// sb.append(':');
+// continue; // jump back to start of outer-most loop
+// }
+//
+// }
+//
+//
+// }
+// }
+//
+// // we fell through to here, so we should escape this like other reserved chars.
+// sb.append('\\');
+// }
+// else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
+// c == '^' || c == '[' || c == ']' ||
+// c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
+// )
+// {
+// sb.append('\\');
+// }
+// sb.append(c);
+// }
+// return sb;
+// }
static class Clause {
@@ -726,6 +726,10 @@ class ExtendedDismaxQParser extends QPar
case '"':
case '+':
case '-':
+ case '\\':
+ case '|':
+ case '&':
+ case '/':
clause.hasSpecialSyntax = true;
sb.append('\\');
}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java?rev=1354865&r1=1354864&r2=1354865&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java Thu Jun 28 08:56:59 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.AbstractSolrTestCase;
+import org.junit.Test;
public class TestExtendedDismaxParser extends AbstractSolrTestCase {
@Override
@@ -701,4 +702,43 @@ public class TestExtendedDismaxParser ex
);
}
+
+ /**
+ * verify that all reserved characters are properly escaped when being set in
+ * {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
+ *
+ * @see ExtendedDismaxQParser#splitIntoClauses(String, boolean)
+ */
+ @Test
+ public void testEscapingOfReservedCharacters() throws Exception {
+ // create a document that contains all reserved characters
+ String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
+
+ assertU(adoc("id", "reservedChars",
+ "name", allReservedCharacters,
+ "cat_s", "foo/"));
+ assertU(commit());
+
+ // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
+ // character)
+ String query = allReservedCharacters.replace("\\", "\\\\");
+
+ // query for all those reserved characters. This will fail to parse in the initial parse, meaning that the escaped
+ // query will then be used
+ assertQ("Escaping reserved characters",
+ req("q", query,
+ "qf", "name",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ // Query string field 'cat_s' for special char / - causes ParseException without patch SOLR-3467
+ assertQ("Escaping string with reserved / character",
+ req("q", "foo/",
+ "qf", "cat_s",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ }
}