You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2020/08/14 20:33:57 UTC

[lucene-solr] branch branch_8x updated: SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713)

This is an automated email from the ASF dual-hosted git repository.

gerlowskija pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new dad6cdb  SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713)
dad6cdb is described below

commit dad6cdb048bb4ac22b50afd38f4d3283b642c74d
Author: Yuriy Koval <yu...@users.noreply.github.com>
AuthorDate: Fri Aug 14 11:12:53 2020 -0700

    SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713)
---
 solr/CHANGES.txt                                   |   2 +
 .../apache/solr/search/ExtendedDismaxQParser.java  |   6 +
 .../solr/search/TestExtendedDismaxParser.java      | 146 ++++++++++++++++++++-
 3 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ec89f64c..47186bf 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -74,6 +74,8 @@ Bug Fixes
 
 * SOLR-14677: Improve DIH termination logic to close all DataSources, EntityProcessors (Jason Gerlowski)
 
+* SOLR-14703: Fix edismax replacement of all whitespace characters with spaces (Yuriy Koval via Jason Gerlowski)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
index 1097d97..bbaadd4 100644
--- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
@@ -800,6 +800,12 @@ public class ExtendedDismaxQParser extends QParser {
         }
         
         if (inString == 0) {
+          if (!ignoreQuote && ch == '"') {
+            // end of the token if we aren't in a string, backing
+            // up the position.
+            pos--;
+            break;
+          }
           switch (ch) {
             case '!':
             case '(':
diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
index 53c9a97..f7e4bb0 100644
--- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
@@ -18,6 +18,7 @@ package org.apache.solr.search;
 
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
@@ -39,6 +40,7 @@ import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.util.SolrPluginUtils;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -964,6 +966,62 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
     );
   }
 
+    @Test
+    public void testWhitespaceCharacters() throws Exception {
+        assertU(adoc("id", "whitespaceChars",
+                "cat_s", "foo\nfoo"));
+        assertU(commit());
+
+        assertQ(req("q", "(\"foo\nfoo\")",
+                        "qf", "cat_s",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]",
+                        "qf", "name",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]",
+                        "qf", "name",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "{!edismax qf=cat_s v='[\"foo\nfoo\" TO \"foo\nfoo\"]'}")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "{!edismax qf=cat_s v='[ \"foo\nfoo\" TO \"foo\nfoo\"]'}")
+                , "*[count(//doc)=1]");
+
+    }
+
+    @Test
+    public void testDoubleQuoteCharacters() throws Exception {
+        assertU(adoc("id", "doubleQuote",
+                "cat_s", "foo\"foo"));
+        assertU(commit());
+
+        assertQ(req("q", "cat_s:[\"foo\\\"foo\" TO \"foo\\\"foo\"]",
+                        "qf", "name",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "cat_s:\"foo\\\"foo\"",
+                        "qf", "name",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "cat_s:foo\\\"foo",
+                        "qf", "name",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+
+        assertQ(req("q", "cat_s:foo\"foo",
+                        "qf", "name",
+                        "defType", "edismax")
+                , "*[count(//doc)=1]");
+    }
+
   /**
    * verify that all reserved characters are properly escaped when being set in
    * {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
@@ -1011,8 +1069,92 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
         "*[count(//doc)=3]");    
     
   }
-  
-  /**
+
+
+    /**
+     * Repeating some of test cases as direct calls to splitIntoClauses
+     */
+    @Test
+    public void testSplitIntoClauses() throws Exception {
+        String query = "(\"foo\nfoo\")";
+        SolrQueryRequest request = req("q", query,
+                "qf", "cat_s",
+                "defType", "edismax");
+        ExtendedDismaxQParser parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+        List<ExtendedDismaxQParser.Clause> clauses = parser.splitIntoClauses(query, false);
+        Assert.assertEquals(3, clauses.size());
+        assertClause(clauses.get(0), "\\(", false, true);
+        assertClause(clauses.get(1), "foo\nfoo", true, false);
+        assertClause(clauses.get(2), "\\)", false, true);
+
+        query = "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]";
+        request = req("q", query,
+                "qf", "cat_s",
+                "defType", "edismax");
+        parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+        clauses = parser.splitIntoClauses(query, false);
+        Assert.assertEquals(5, clauses.size());
+        assertClause(clauses.get(0), "\\[", false, true, "cat_s");
+        assertClause(clauses.get(1), "foo\nfoo", true, false);
+        assertClause(clauses.get(2), "TO", true, false);
+        assertClause(clauses.get(3), "foo\nfoo", true, false);
+        assertClause(clauses.get(4), "\\]", false, true);
+
+        query = "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]";
+        request = req("q", query,
+                "qf", "cat_s",
+                "defType", "edismax");
+        parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+        clauses = parser.splitIntoClauses(query, false);
+        Assert.assertEquals(5, clauses.size());
+        assertClause(clauses.get(0), "\\[", true, true, "cat_s");
+        assertClause(clauses.get(1), "foo\nfoo", true, false);
+        assertClause(clauses.get(2), "TO", true, false);
+        assertClause(clauses.get(3), "foo\nfoo", true, false);
+        assertClause(clauses.get(4), "\\]", false, true);
+
+        String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
+        // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
+        // character)
+        query = allReservedCharacters.replace("\\", "\\\\");
+
+        request = req("q", query,
+                "qf", "name",
+                "mm", "100%",
+                "defType", "edismax");
+
+        parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+        clauses = parser.splitIntoClauses(query, false);
+        Assert.assertEquals(1, clauses.size());
+        assertClause(clauses.get(0), "\\!\\(\\)\\:\\^\\[\\]\\{\\}\\~\\*\\?\\\"\\+\\-\\\\\\|\\&\\/", false, true);
+
+        query = "foo/";
+        request = req("q", query,
+                "qf", "name",
+                "mm", "100%",
+                "defType", "edismax");
+
+        parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+        clauses = parser.splitIntoClauses(query, false);
+        Assert.assertEquals(1, clauses.size());
+        assertClause(clauses.get(0), "foo\\/", false, true);
+    }
+
+    private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
+                                     boolean hasSpecialSyntax, String field) {
+        Assert.assertEquals(value, clause.val);
+        Assert.assertEquals(hasWhitespace, clause.hasWhitespace);
+        Assert.assertEquals(hasSpecialSyntax, clause.hasSpecialSyntax);
+        Assert.assertEquals(field, clause.field);
+    }
+
+    private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
+                                     boolean hasSpecialSyntax) {
+        assertClause(clause, value, hasWhitespace, hasSpecialSyntax, null);
+
+    }
+
+    /**
    * SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
    */
   public void testCJK() throws Exception {