You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2012/06/28 11:06:04 UTC

svn commit: r1354872 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ lucene/analysis/common/src/java/org/apache/lucene/analys...

Author: janhoy
Date: Thu Jun 28 09:05:59 2012
New Revision: 1354872

URL: http://svn.apache.org/viewvc?rev=1354872&view=rev
Log:
SOLR-3467: ExtendedDismax escaping is missing several reserved characters (merge from trunk)

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/package.html   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html   (props changed)
    lucene/dev/branches/branch_4x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
    lucene/dev/branches/branch_4x/solr/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/testlogging.properties   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1354872&r1=1354871&r2=1354872&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Thu Jun 28 09:05:59 2012
@@ -513,6 +513,9 @@ Bug Fixes
 
 * SOLR-3522: fixed parsing of the 'literal()' function (hossman)
 
+* SOLR-3467: ExtendedDismax escaping is missing several reserved characters
+  (Michael Dodsworth via janhoy)
+
 * SOLR-3548: Fixed a bug in the cachability of queries using the {!join} 
   parser or the strdist() function, as well as some minor improvements to 
   the hashCode implementation of {!bbox} and {!geofilt} queries.

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=1354872&r1=1354871&r2=1354872&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Thu Jun 28 09:05:59 2012
@@ -541,81 +541,81 @@ class ExtendedDismaxQParser extends QPar
   }
 
 
-  
-  public static CharSequence partialEscape(CharSequence s) {
-    StringBuilder sb = new StringBuilder();
-
-    int len = s.length();
-    for (int i = 0; i < len; i++) {
-      char c = s.charAt(i);
-      if (c == ':') {
-        // look forward to make sure it's something that won't
-        // cause a parse exception (something that won't be escaped... like
-        // +,-,:, whitespace
-        if (i+1<len && i>0) {
-          char ch = s.charAt(i+1);
-          if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
-            // OK, at this point the chars after the ':' will be fine.
-            // now look back and try to determine if this is a fieldname
-            // [+,-]? [letter,_] [letter digit,_,-,.]*
-            // This won't cover *all* possible lucene fieldnames, but we should
-            // only pick nice names to begin with
-            int start, pos;
-            for (start=i-1; start>=0; start--) {
-              ch = s.charAt(start);
-              if (Character.isWhitespace(ch)) break;
-            }
-
-            // skip whitespace
-            pos = start+1;
-
-            // skip leading + or -
-            ch = s.charAt(pos);
-            if (ch=='+' || ch=='-') {
-              pos++;
-            }
-
-            // we don't need to explicitly check for end of string
-            // since ':' will act as our sentinal
-
-              // first char can't be '-' or '.'
-              ch = s.charAt(pos++);
-              if (Character.isJavaIdentifierPart(ch)) {
-
-                for(;;) {
-                  ch = s.charAt(pos++);
-                  if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
-                    break;
-                  }
-                }
-
-                if (pos<=i) {
-                  // OK, we got to the ':' and everything looked like a valid fieldname, so
-                  // don't escape the ':'
-                  sb.append(':');
-                  continue;  // jump back to start of outer-most loop
-                }
-
-              }
-
-
-          }
-        }
-
-        // we fell through to here, so we should escape this like other reserved chars.
-        sb.append('\\');
-      }
-      else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
-          c == '^' || c == '[' || c == ']' ||
-          c == '{'  || c == '}' || c == '~' || c == '*' || c == '?'
-          )
-      {
-        sb.append('\\');
-      }
-      sb.append(c);
-    }
-    return sb;
-  }
+// FIXME: Not in use
+//  public static CharSequence partialEscape(CharSequence s) {
+//    StringBuilder sb = new StringBuilder();
+//
+//    int len = s.length();
+//    for (int i = 0; i < len; i++) {
+//      char c = s.charAt(i);
+//      if (c == ':') {
+//        // look forward to make sure it's something that won't
+//        // cause a parse exception (something that won't be escaped... like
+//        // +,-,:, whitespace
+//        if (i+1<len && i>0) {
+//          char ch = s.charAt(i+1);
+//          if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
+//            // OK, at this point the chars after the ':' will be fine.
+//            // now look back and try to determine if this is a fieldname
+//            // [+,-]? [letter,_] [letter digit,_,-,.]*
+//            // This won't cover *all* possible lucene fieldnames, but we should
+//            // only pick nice names to begin with
+//            int start, pos;
+//            for (start=i-1; start>=0; start--) {
+//              ch = s.charAt(start);
+//              if (Character.isWhitespace(ch)) break;
+//            }
+//
+//            // skip whitespace
+//            pos = start+1;
+//
+//            // skip leading + or -
+//            ch = s.charAt(pos);
+//            if (ch=='+' || ch=='-') {
+//              pos++;
+//            }
+//
+//            // we don't need to explicitly check for end of string
+//            // since ':' will act as our sentinal
+//
+//              // first char can't be '-' or '.'
+//              ch = s.charAt(pos++);
+//              if (Character.isJavaIdentifierPart(ch)) {
+//
+//                for(;;) {
+//                  ch = s.charAt(pos++);
+//                  if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
+//                    break;
+//                  }
+//                }
+//
+//                if (pos<=i) {
+//                  // OK, we got to the ':' and everything looked like a valid fieldname, so
+//                  // don't escape the ':'
+//                  sb.append(':');
+//                  continue;  // jump back to start of outer-most loop
+//                }
+//
+//              }
+//
+//
+//          }
+//        }
+//
+//        // we fell through to here, so we should escape this like other reserved chars.
+//        sb.append('\\');
+//      }
+//      else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
+//          c == '^' || c == '[' || c == ']' ||
+//          c == '{'  || c == '}' || c == '~' || c == '*' || c == '?'
+//          )
+//      {
+//        sb.append('\\');
+//      }
+//      sb.append(c);
+//    }
+//    return sb;
+//  }
 
 
   static class Clause {
@@ -726,6 +726,10 @@ class ExtendedDismaxQParser extends QPar
             case '"':
             case '+':
             case '-':
+            case '\\':
+            case '|':
+            case '&':
+            case '/':
               clause.hasSpecialSyntax = true;
               sb.append('\\');
           }

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java?rev=1354872&r1=1354871&r2=1354872&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java Thu Jun 28 09:05:59 2012
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import org.apache.solr.common.SolrException;
 import org.apache.solr.util.AbstractSolrTestCase;
+import org.junit.Test;
 
 public class TestExtendedDismaxParser extends AbstractSolrTestCase {
   @Override
@@ -701,4 +702,43 @@ public class TestExtendedDismaxParser ex
      );
 
   }
+
+  /**
+   * verify that all reserved characters are properly escaped when being set in
+   * {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
+   *
+   * @see ExtendedDismaxQParser#splitIntoClauses(String, boolean)
+   */
+  @Test
+  public void testEscapingOfReservedCharacters() throws Exception {
+    // create a document that contains all reserved characters
+    String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
+
+    assertU(adoc("id", "reservedChars",
+                 "name", allReservedCharacters,
+                 "cat_s", "foo/"));
+    assertU(commit());
+
+    // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
+    // character)
+    String query = allReservedCharacters.replace("\\", "\\\\");
+
+    // query for all those reserved characters. This will fail to parse in the initial parse, meaning that the escaped
+    // query will then be used
+    assertQ("Escaping reserved characters",
+        req("q", query,
+            "qf", "name",
+            "mm", "100%",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+    
+    // Query string field 'cat_s' for special char / - causes ParseException without patch SOLR-3467
+    assertQ("Escaping string with reserved / character",
+        req("q", "foo/",
+            "qf", "cat_s",
+            "mm", "100%",
+            "defType", "edismax")
+        , "*[count(//doc)=1]");
+    
+  }
 }