You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by th...@apache.org on 2022/05/19 19:50:52 UTC
[solr] branch branch_9x updated: SOLR-16199: Improve handling of LIKE queries with wildcard (#870) (#871)
This is an automated email from the ASF dual-hosted git repository.
thelabdude pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 21ed7bf6510 SOLR-16199: Improve handling of LIKE queries with wildcard (#870) (#871)
21ed7bf6510 is described below
commit 21ed7bf65108ef7f9d0f5ddda24f718295500f29
Author: Timothy Potter <th...@gmail.com>
AuthorDate: Thu May 19 13:50:47 2022 -0600
SOLR-16199: Improve handling of LIKE queries with wildcard (#870) (#871)
* SOLR-16199 Improve handling of LIKE queries with wildcard
* rewrite escape logic
* Fix spotless issue
Co-authored-by: kiran chitturi <kc...@apple.com>
---
solr/CHANGES.txt | 3 +
.../org/apache/solr/handler/sql/SolrFilter.java | 107 ++++++++++++++++----
.../apache/solr/handler/sql/TestSQLHandler.java | 109 +++++++++++++++++++--
3 files changed, 191 insertions(+), 28 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bb5e2b4a126..be91c9286f2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -792,6 +792,9 @@ Bug Fixes
* SOLR-16022: Enforce special character requirements on passwords with length less than 15 (Timothy Potter)
+* SOLR-16199: Improve query syntax construction for SQL LIKE clause with phrases and wildcards
+ (Kiran Chitturi, Aroop Ganguly, Amrit Sarkar via Timothy Potter)
+
================== 8.11.1 ==================
Bug Fixes
diff --git a/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java b/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java
index 216ed4913a3..8c913755c38 100644
--- a/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java
+++ b/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java
@@ -346,21 +346,54 @@ class SolrFilter extends Filter implements SolrRel {
}
protected String translateLike(RexNode like) {
- Pair<String, RexLiteral> pair = getFieldValuePair(like);
+ Pair<Pair<String, RexLiteral>, Character> pairWithEscapeCharacter =
+ getFieldValuePairWithEscapeCharacter(like);
+ Pair<String, RexLiteral> pair = pairWithEscapeCharacter.getKey();
+ Character escapeChar = pairWithEscapeCharacter.getValue();
+
String terms = pair.getValue().toString().trim();
- terms = terms.replace("'", "").replace('%', '*').replace('_', '?');
- boolean wrappedQuotes = false;
+ terms = translateLikeTermToSolrSyntax(terms, escapeChar);
+
if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
- // restore the * and ? after escaping
- terms =
- "\""
- + ClientUtils.escapeQueryChars(terms).replace("\\*", "*").replace("\\?", "?")
- + "\"";
- wrappedQuotes = true;
- }
+ terms = escapeWithWildcard(terms);
+
+ // if terms contains multiple words and one or more wildcard chars, then we need to employ
+ // the complexphrase parser
+ // but that expects the terms wrapped in double-quotes, not parens
+ boolean hasMultipleTerms = terms.split("\\s+").length > 1;
+ if (hasMultipleTerms && (terms.contains("*") || terms.contains("?"))) {
+ String quotedTerms = "\"" + terms.substring(1, terms.length() - 1) + "\"";
+ return "{!complexphrase}" + pair.getKey() + ":" + quotedTerms;
+ }
+ } // else treat as an embedded Solr query and pass-through
+
+ return pair.getKey() + ":" + terms;
+ }
- String query = pair.getKey() + ":" + terms;
- return wrappedQuotes ? "{!complexphrase}" + query : query;
+ private String translateLikeTermToSolrSyntax(String term, Character escapeChar) {
+ boolean isEscaped = false;
+ StringBuilder sb = new StringBuilder();
+ // Special character % and _ are escaped with escape character and single quote is escaped
+ // with another single quote
+ // If single quote is escaped with escape character, calcite parser fails
+ for (int i = 0; i < term.length(); i++) {
+ char c = term.charAt(i);
+ if (!isEscaped && escapeChar != null && escapeChar == c) {
+ isEscaped = true;
+ } else if (c == '%' && !isEscaped) {
+ sb.append('*');
+ } else if (c == '_' && !isEscaped) {
+ sb.append("?");
+ } else if (c == '\'') {
+ if (i > 0 && term.charAt(i - 1) == '\'') {
+ sb.append(c);
+ }
+ } else {
+ sb.append(c);
+ if (isEscaped) isEscaped = false;
+ }
+ }
+ return sb.toString();
}
protected String translateComparison(RexNode node) {
@@ -414,18 +447,30 @@ class SolrFilter extends Filter implements SolrRel {
String terms = toSolrLiteral(key, value).trim();
- boolean wrappedQuotes = false;
if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
- terms = "\"" + ClientUtils.escapeQueryChars(terms) + "\"";
- wrappedQuotes = true;
+ if (terms.contains("*") || terms.contains("?")) {
+ terms = escapeWithWildcard(terms);
+ } else {
+ terms = "\"" + ClientUtils.escapeQueryChars(terms) + "\"";
+ }
}
- String clause = key + ":" + terms;
- if (terms.contains("*") && wrappedQuotes) {
- clause = "{!complexphrase}" + clause;
- }
+ return key + ":" + terms;
+ }
- return clause;
+ // Wrap filter criteria containing wildcard with parens and unescape the wildcards after
+ // escaping protected query chars
+ private String escapeWithWildcard(String terms) {
+ String escaped =
+ ClientUtils.escapeQueryChars(terms)
+ .replace("\\*", "*")
+ .replace("\\?", "?")
+ .replace("\\ ", " ");
+ // if multiple terms, then wrap with parens
+ if (escaped.split("\\s+").length > 1) {
+ escaped = "(" + escaped + ")";
+ }
+ return escaped;
}
// translate to a literal string value for Solr queries, such as translating a
@@ -480,6 +525,28 @@ class SolrFilter extends Filter implements SolrRel {
return timestamp;
}
+ protected Pair<Pair<String, RexLiteral>, Character> getFieldValuePairWithEscapeCharacter(
+ RexNode node) {
+ if (!(node instanceof RexCall)) {
+ throw new AssertionError("expected RexCall for predicate but found: " + node);
+ }
+ RexCall call = (RexCall) node;
+ if (call.getOperands().size() == 3) {
+ RexNode escapeNode = call.getOperands().get(2);
+ Character escapeChar = null;
+ if (escapeNode.getKind() == SqlKind.LITERAL) {
+ RexLiteral literal = (RexLiteral) escapeNode;
+ if (literal.getTypeName() == SqlTypeName.CHAR) {
+ escapeChar = literal.getValueAs(Character.class);
+ }
+ }
+ return Pair.of(
+ translateBinary2(call.getOperands().get(0), call.getOperands().get(1)), escapeChar);
+ } else {
+ return Pair.of(getFieldValuePair(node), null);
+ }
+ }
+
protected Pair<String, RexLiteral> getFieldValuePair(RexNode node) {
if (!(node instanceof RexCall)) {
throw new AssertionError("expected RexCall for predicate but found: " + node);
diff --git a/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java b/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java
index fde59889b31..eefc20f0309 100644
--- a/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java
+++ b/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java
@@ -2434,18 +2434,95 @@ public class TestSQLHandler extends SolrCloudTestCase {
@Test
public void testLike() throws Exception {
new UpdateRequest()
- .add("id", "1", "a_s", "hello-1", "b_s", "foo")
- .add("id", "2", "a_s", "world-2", "b_s", "foo")
- .add("id", "3", "a_s", "hello-3", "b_s", "foo")
- .add("id", "4", "a_s", "world-4", "b_s", "foo")
- .add("id", "5", "a_s", "hello-5", "b_s", "foo")
- .add("id", "6", "a_s", "world-6", "b_s", "bar")
+ .add(
+ "id",
+ "1",
+ "a_s",
+ "hello-1",
+ "b_s",
+ "foo",
+ "c_t",
+ "the quick brown fox jumped over the lazy dog")
+ .add(
+ "id",
+ "2",
+ "a_s",
+ "world-2",
+ "b_s",
+ "foo",
+ "c_t",
+ "the sly black dog jumped over the sleeping pig")
+ .add(
+ "id",
+ "3",
+ "a_s",
+ "hello-3",
+ "b_s",
+ "foo",
+ "c_t",
+ "the quick brown fox jumped over the lazy dog")
+ .add(
+ "id",
+ "4",
+ "a_s",
+ "world-4",
+ "b_s",
+ "foo",
+ "c_t",
+ "the sly black dog jumped over the sleepy pig")
+ .add(
+ "id",
+ "5",
+ "a_s",
+ "hello-5",
+ "b_s",
+ "foo",
+ "c_t",
+ "the quick brown fox jumped over the lazy dog")
+ .add(
+ "id",
+ "6",
+ "a_s",
+ "w_orld-6",
+ "b_s",
+ "bar",
+ "c_t",
+ "the sly black dog jumped over the sleepin piglet")
+ .add(
+ "id",
+ "7",
+ "a_s",
+ "world%_7",
+ "b_s",
+ "zaz",
+ "c_t",
+ "the lazy dog jumped over the quick brown fox")
+ .add(
+ "id",
+ "8",
+ "a_s",
+ "world'\\9",
+ "b_s",
+ "zaz",
+ "c_t",
+ "the lazy dog ducked over the quick brown fox")
.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'h_llo-%'", 3);
+ Throwable exception =
+ expectThrows(
+ IOException.class,
+ () ->
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world\\'%' ESCAPE '\\'", 1));
+ assertTrue(exception.getMessage().contains("parse failed: Lexical error"));
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world''%'", 1);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world''\\\\%' ESCAPE '\\'", 1);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'w\\_o_ld%' ESCAPE '\\'", 1);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world\\%\\__' ESCAPE '\\'", 1);
+
// not technically valid SQL but we support it for legacy purposes, see: SOLR-15463
- expectResults("SELECT a_s FROM $ALIAS WHERE a_s='world-*'", 3);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s='world-*'", 2);
// no results
expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE '%MATCHNONE%'", 0);
@@ -2454,7 +2531,7 @@ public class TestSQLHandler extends SolrCloudTestCase {
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE 'foo'", 5);
// NOT LIKE
- expectResults("SELECT b_s FROM $ALIAS WHERE b_s NOT LIKE 'f%'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE b_s NOT LIKE 'f%'", 3);
// leading wildcard
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '%oo'", 5);
@@ -2463,6 +2540,22 @@ public class TestSQLHandler extends SolrCloudTestCase {
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '(fo%)'", 5);
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '(ba*)'", 1);
+
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'fox'", 5);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pig%'", 3);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pigle%'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% piglet'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pigle%'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% piglet'", 1);
+
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'jump%'", 7);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '%ump%'", 7);
+
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '(\"dog pig\"~5)'", 2);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'jumped over'", 8);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'quick brown fox'", 5);
+ expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE 'foo*'", 5);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '*og'", 8);
}
@Test