You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ge...@apache.org on 2020/08/14 18:13:04 UTC
[lucene-solr] branch master updated: SOLR-14703 Edismax parser
replaces whitespace characters with spaces (#1713)
This is an automated email from the ASF dual-hosted git repository.
gerlowskija pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 44c4e6e SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713)
44c4e6e is described below
commit 44c4e6ef311ea0dffe72d33413ede260de0fb6e9
Author: Yuriy Koval <yu...@users.noreply.github.com>
AuthorDate: Fri Aug 14 11:12:53 2020 -0700
SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713)
---
solr/CHANGES.txt | 2 +
.../apache/solr/search/ExtendedDismaxQParser.java | 6 +
.../solr/search/TestExtendedDismaxParser.java | 146 ++++++++++++++++++++-
3 files changed, 152 insertions(+), 2 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9945ecb..5b27dcd 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -184,6 +184,8 @@ Bug Fixes
* SOLR-14677: Improve DIH termination logic to close all DataSources, EntityProcessors (Jason Gerlowski)
+* SOLR-14703: Fix edismax replacement of all whitespace characters with spaces (Yuriy Koval via Jason Gerlowski)
+
Other Changes
---------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
index fce9416..8f234f0 100644
--- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
@@ -799,6 +799,12 @@ public class ExtendedDismaxQParser extends QParser {
}
if (inString == 0) {
+ if (!ignoreQuote && ch == '"') {
+ // end of the token if we aren't in a string, backing
+ // up the position.
+ pos--;
+ break;
+ }
switch (ch) {
case '!':
case '(':
diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
index 8b93ae7..f94f421 100644
--- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
@@ -18,6 +18,7 @@ package org.apache.solr.search;
import java.util.Arrays;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -39,6 +40,7 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Utils;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.SolrPluginUtils;
+import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -964,6 +966,62 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
);
}
+ @Test
+ public void testWhitespaceCharacters() throws Exception {
+ assertU(adoc("id", "whitespaceChars",
+ "cat_s", "foo\nfoo"));
+ assertU(commit());
+
+ assertQ(req("q", "(\"foo\nfoo\")",
+ "qf", "cat_s",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]",
+ "qf", "name",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]",
+ "qf", "name",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "{!edismax qf=cat_s v='[\"foo\nfoo\" TO \"foo\nfoo\"]'}")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "{!edismax qf=cat_s v='[ \"foo\nfoo\" TO \"foo\nfoo\"]'}")
+ , "*[count(//doc)=1]");
+
+ }
+
+ @Test
+ public void testDoubleQuoteCharacters() throws Exception {
+ assertU(adoc("id", "doubleQuote",
+ "cat_s", "foo\"foo"));
+ assertU(commit());
+
+ assertQ(req("q", "cat_s:[\"foo\\\"foo\" TO \"foo\\\"foo\"]",
+ "qf", "name",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "cat_s:\"foo\\\"foo\"",
+ "qf", "name",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "cat_s:foo\\\"foo",
+ "qf", "name",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+
+ assertQ(req("q", "cat_s:foo\"foo",
+ "qf", "name",
+ "defType", "edismax")
+ , "*[count(//doc)=1]");
+ }
+
/**
* verify that all reserved characters are properly escaped when being set in
* {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
@@ -1011,8 +1069,92 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"*[count(//doc)=3]");
}
-
- /**
+
+
+ /**
+ * Repeating some of test cases as direct calls to splitIntoClauses
+ */
+ @Test
+ public void testSplitIntoClauses() throws Exception {
+ String query = "(\"foo\nfoo\")";
+ SolrQueryRequest request = req("q", query,
+ "qf", "cat_s",
+ "defType", "edismax");
+ ExtendedDismaxQParser parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+ List<ExtendedDismaxQParser.Clause> clauses = parser.splitIntoClauses(query, false);
+ Assert.assertEquals(3, clauses.size());
+ assertClause(clauses.get(0), "\\(", false, true);
+ assertClause(clauses.get(1), "foo\nfoo", true, false);
+ assertClause(clauses.get(2), "\\)", false, true);
+
+ query = "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]";
+ request = req("q", query,
+ "qf", "cat_s",
+ "defType", "edismax");
+ parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+ clauses = parser.splitIntoClauses(query, false);
+ Assert.assertEquals(5, clauses.size());
+ assertClause(clauses.get(0), "\\[", false, true, "cat_s");
+ assertClause(clauses.get(1), "foo\nfoo", true, false);
+ assertClause(clauses.get(2), "TO", true, false);
+ assertClause(clauses.get(3), "foo\nfoo", true, false);
+ assertClause(clauses.get(4), "\\]", false, true);
+
+ query = "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]";
+ request = req("q", query,
+ "qf", "cat_s",
+ "defType", "edismax");
+ parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+ clauses = parser.splitIntoClauses(query, false);
+ Assert.assertEquals(5, clauses.size());
+ assertClause(clauses.get(0), "\\[", true, true, "cat_s");
+ assertClause(clauses.get(1), "foo\nfoo", true, false);
+ assertClause(clauses.get(2), "TO", true, false);
+ assertClause(clauses.get(3), "foo\nfoo", true, false);
+ assertClause(clauses.get(4), "\\]", false, true);
+
+ String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
+ // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
+ // character)
+ query = allReservedCharacters.replace("\\", "\\\\");
+
+ request = req("q", query,
+ "qf", "name",
+ "mm", "100%",
+ "defType", "edismax");
+
+ parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+ clauses = parser.splitIntoClauses(query, false);
+ Assert.assertEquals(1, clauses.size());
+ assertClause(clauses.get(0), "\\!\\(\\)\\:\\^\\[\\]\\{\\}\\~\\*\\?\\\"\\+\\-\\\\\\|\\&\\/", false, true);
+
+ query = "foo/";
+ request = req("q", query,
+ "qf", "name",
+ "mm", "100%",
+ "defType", "edismax");
+
+ parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
+ clauses = parser.splitIntoClauses(query, false);
+ Assert.assertEquals(1, clauses.size());
+ assertClause(clauses.get(0), "foo\\/", false, true);
+ }
+
+ private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
+ boolean hasSpecialSyntax, String field) {
+ Assert.assertEquals(value, clause.val);
+ Assert.assertEquals(hasWhitespace, clause.hasWhitespace);
+ Assert.assertEquals(hasSpecialSyntax, clause.hasSpecialSyntax);
+ Assert.assertEquals(field, clause.field);
+ }
+
+ private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
+ boolean hasSpecialSyntax) {
+ assertClause(clause, value, hasWhitespace, hasSpecialSyntax, null);
+
+ }
+
+ /**
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
*/
public void testCJK() throws Exception {