You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2012/06/28 11:06:04 UTC
svn commit: r1354872 - in /lucene/dev/branches/branch_4x: ./ dev-tools/
lucene/ lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/
lucene/analysis/common/src/java/org/apache/lucene/analys...
Author: janhoy
Date: Thu Jun 28 09:05:59 2012
New Revision: 1354872
URL: http://svn.apache.org/viewvc?rev=1354872&view=rev
Log:
SOLR-3467: ExtendedDismax escaping is missing several reserved characters (merge from trunk)
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/dev-tools/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/BUILD.txt (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (props changed)
lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt (props changed)
lucene/dev/branches/branch_4x/lucene/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/MIGRATE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/README.txt (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/package.html (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html (props changed)
lucene/dev/branches/branch_4x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_4x/lucene/benchmark/ (props changed)
lucene/dev/branches/branch_4x/lucene/build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/common-build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/demo/ (props changed)
lucene/dev/branches/branch_4x/lucene/facet/ (props changed)
lucene/dev/branches/branch_4x/lucene/grouping/ (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_4x/lucene/ivy-settings.xml (props changed)
lucene/dev/branches/branch_4x/lucene/join/ (props changed)
lucene/dev/branches/branch_4x/lucene/memory/ (props changed)
lucene/dev/branches/branch_4x/lucene/misc/ (props changed)
lucene/dev/branches/branch_4x/lucene/module-build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/queries/ (props changed)
lucene/dev/branches/branch_4x/lucene/queryparser/ (props changed)
lucene/dev/branches/branch_4x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_4x/lucene/site/ (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_4x/lucene/tools/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/solr/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/README.txt (props changed)
lucene/dev/branches/branch_4x/solr/build.xml (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/ (props changed)
lucene/dev/branches/branch_4x/solr/common-build.xml (props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
lucene/dev/branches/branch_4x/solr/dev-tools/ (props changed)
lucene/dev/branches/branch_4x/solr/example/ (props changed)
lucene/dev/branches/branch_4x/solr/lib/ (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpclient-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpclient-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpcore-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpcore-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpmime-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/lib/httpmime-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/scripts/ (props changed)
lucene/dev/branches/branch_4x/solr/solrj/ (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/ (props changed)
lucene/dev/branches/branch_4x/solr/testlogging.properties (props changed)
lucene/dev/branches/branch_4x/solr/webapp/ (props changed)
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1354872&r1=1354871&r2=1354872&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Thu Jun 28 09:05:59 2012
@@ -513,6 +513,9 @@ Bug Fixes
* SOLR-3522: fixed parsing of the 'literal()' function (hossman)
+* SOLR-3467: ExtendedDismax escaping is missing several reserved characters
+ (Michael Dodsworth via janhoy)
+
* SOLR-3548: Fixed a bug in the cachability of queries using the {!join}
parser or the strdist() function, as well as some minor improvements to
the hashCode implementation of {!bbox} and {!geofilt} queries.
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=1354872&r1=1354871&r2=1354872&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Thu Jun 28 09:05:59 2012
@@ -541,81 +541,81 @@ class ExtendedDismaxQParser extends QPar
}
-
- public static CharSequence partialEscape(CharSequence s) {
- StringBuilder sb = new StringBuilder();
-
- int len = s.length();
- for (int i = 0; i < len; i++) {
- char c = s.charAt(i);
- if (c == ':') {
- // look forward to make sure it's something that won't
- // cause a parse exception (something that won't be escaped... like
- // +,-,:, whitespace
- if (i+1<len && i>0) {
- char ch = s.charAt(i+1);
- if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
- // OK, at this point the chars after the ':' will be fine.
- // now look back and try to determine if this is a fieldname
- // [+,-]? [letter,_] [letter digit,_,-,.]*
- // This won't cover *all* possible lucene fieldnames, but we should
- // only pick nice names to begin with
- int start, pos;
- for (start=i-1; start>=0; start--) {
- ch = s.charAt(start);
- if (Character.isWhitespace(ch)) break;
- }
-
- // skip whitespace
- pos = start+1;
-
- // skip leading + or -
- ch = s.charAt(pos);
- if (ch=='+' || ch=='-') {
- pos++;
- }
-
- // we don't need to explicitly check for end of string
- // since ':' will act as our sentinal
-
- // first char can't be '-' or '.'
- ch = s.charAt(pos++);
- if (Character.isJavaIdentifierPart(ch)) {
-
- for(;;) {
- ch = s.charAt(pos++);
- if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
- break;
- }
- }
-
- if (pos<=i) {
- // OK, we got to the ':' and everything looked like a valid fieldname, so
- // don't escape the ':'
- sb.append(':');
- continue; // jump back to start of outer-most loop
- }
-
- }
-
-
- }
- }
-
- // we fell through to here, so we should escape this like other reserved chars.
- sb.append('\\');
- }
- else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
- c == '^' || c == '[' || c == ']' ||
- c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
- )
- {
- sb.append('\\');
- }
- sb.append(c);
- }
- return sb;
- }
+// FIXME: Not in use
+// public static CharSequence partialEscape(CharSequence s) {
+// StringBuilder sb = new StringBuilder();
+//
+// int len = s.length();
+// for (int i = 0; i < len; i++) {
+// char c = s.charAt(i);
+// if (c == ':') {
+// // look forward to make sure it's something that won't
+// // cause a parse exception (something that won't be escaped... like
+// // +,-,:, whitespace
+// if (i+1<len && i>0) {
+// char ch = s.charAt(i+1);
+// if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
+// // OK, at this point the chars after the ':' will be fine.
+// // now look back and try to determine if this is a fieldname
+// // [+,-]? [letter,_] [letter digit,_,-,.]*
+// // This won't cover *all* possible lucene fieldnames, but we should
+// // only pick nice names to begin with
+// int start, pos;
+// for (start=i-1; start>=0; start--) {
+// ch = s.charAt(start);
+// if (Character.isWhitespace(ch)) break;
+// }
+//
+// // skip whitespace
+// pos = start+1;
+//
+// // skip leading + or -
+// ch = s.charAt(pos);
+// if (ch=='+' || ch=='-') {
+// pos++;
+// }
+//
+// // we don't need to explicitly check for end of string
+// // since ':' will act as our sentinal
+//
+// // first char can't be '-' or '.'
+// ch = s.charAt(pos++);
+// if (Character.isJavaIdentifierPart(ch)) {
+//
+// for(;;) {
+// ch = s.charAt(pos++);
+// if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
+// break;
+// }
+// }
+//
+// if (pos<=i) {
+// // OK, we got to the ':' and everything looked like a valid fieldname, so
+// // don't escape the ':'
+// sb.append(':');
+// continue; // jump back to start of outer-most loop
+// }
+//
+// }
+//
+//
+// }
+// }
+//
+// // we fell through to here, so we should escape this like other reserved chars.
+// sb.append('\\');
+// }
+// else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
+// c == '^' || c == '[' || c == ']' ||
+// c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
+// )
+// {
+// sb.append('\\');
+// }
+// sb.append(c);
+// }
+// return sb;
+// }
static class Clause {
@@ -726,6 +726,10 @@ class ExtendedDismaxQParser extends QPar
case '"':
case '+':
case '-':
+ case '\\':
+ case '|':
+ case '&':
+ case '/':
clause.hasSpecialSyntax = true;
sb.append('\\');
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java?rev=1354872&r1=1354871&r2=1354872&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java Thu Jun 28 09:05:59 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.AbstractSolrTestCase;
+import org.junit.Test;
public class TestExtendedDismaxParser extends AbstractSolrTestCase {
@Override
@@ -701,4 +702,43 @@ public class TestExtendedDismaxParser ex
);
}
+
+ /**
+ * verify that all reserved characters are properly escaped when being set in
+ * {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
+ *
+ * @see ExtendedDismaxQParser#splitIntoClauses(String, boolean)
+ */
+ @Test
+ public void testEscapingOfReservedCharacters() throws Exception {
+ // create a document that contains all reserved characters
+ String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
+
+ assertU(adoc("id", "reservedChars",
+ "name", allReservedCharacters,
+ "cat_s", "foo/"));
+ assertU(commit());
+
+ // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
+ // character)
+ String query = allReservedCharacters.replace("\\", "\\\\");
+
+ // query for all those reserved characters. This will fail to parse in the initial parse, meaning that the escaped
+ // query will then be used
+ assertQ("Escaping reserved characters",
+ req("q", query,
+ "qf", "name",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ // Query string field 'cat_s' for special char / - causes ParseException without patch SOLR-3467
+ assertQ("Escaping string with reserved / character",
+ req("q", "foo/",
+ "qf", "cat_s",
+ "mm", "100%",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ }
}