You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by th...@apache.org on 2022/05/19 21:38:18 UTC
[lucene-solr] branch branch_8_11 updated: SOLR-16199: Fix query syntax for LIKE queries with wildcard (#2659)
This is an automated email from the ASF dual-hosted git repository.
thelabdude pushed a commit to branch branch_8_11
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8_11 by this push:
new 600c0284ddf SOLR-16199: Fix query syntax for LIKE queries with wildcard (#2659)
600c0284ddf is described below
commit 600c0284ddff347bd1d08065187382ee02e44a7e
Author: Kiran Chitturi <ki...@lucidworks.com>
AuthorDate: Thu May 19 14:38:07 2022 -0700
SOLR-16199: Fix query syntax for LIKE queries with wildcard (#2659)
---
solr/CHANGES.txt | 2 +
.../org/apache/solr/handler/sql/SolrFilter.java | 102 +++++++++++++++++----
.../org/apache/solr/handler/TestSQLHandler.java | 40 ++++++--
3 files changed, 118 insertions(+), 26 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bf5869fa471..11911c62629 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -52,6 +52,8 @@ Bug Fixes
* SOLR-16143: SolrConfig can miss updates from ZooKeeper when deleting and recreating file items (Mike Drob)
+* SOLR-16199: Improve query syntax construction for SQL LIKE clause with phrases and wildcards (Kiran Chitturi, Aroop Ganguly, Amrit Sarkar via Timothy Potter)
+
* SOLR-16191: Validate that installed ps utility supports -p flag, so that we do not inadvertantly stop the wrong process. (Mike Drob, Michael Gibney)
================== 8.11.1 ==================
diff --git a/solr/core/src/java/org/apache/solr/handler/sql/SolrFilter.java b/solr/core/src/java/org/apache/solr/handler/sql/SolrFilter.java
index 90cfd92c76f..f87f72966e8 100644
--- a/solr/core/src/java/org/apache/solr/handler/sql/SolrFilter.java
+++ b/solr/core/src/java/org/apache/solr/handler/sql/SolrFilter.java
@@ -348,21 +348,55 @@ class SolrFilter extends Filter implements SolrRel {
}
protected String translateLike(RexNode like) {
- Pair<String, RexLiteral> pair = getFieldValuePair(like);
+ Pair<Pair<String, RexLiteral>, Character> pairWithEscapeCharacter = getFieldValuePairWithEscapeCharacter(like);
+ Pair<String, RexLiteral> pair = pairWithEscapeCharacter.getKey();
+ Character escapeChar = pairWithEscapeCharacter.getValue();
+
String terms = pair.getValue().toString().trim();
- terms = terms.replace("'", "").replace('%', '*').replace('_', '?');
- boolean wrappedQuotes = false;
+ terms = translateLikeTermToSolrSyntax(terms, escapeChar);
+
if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
- // restore the * and ? after escaping
- terms = "\"" + ClientUtils.escapeQueryChars(terms).replace("\\*", "*").replace("\\?", "?") + "\"";
- wrappedQuotes = true;
- }
+ terms = escapeWithWildcard(terms);
+
+ // if terms contains multiple words and one or more wildcard chars, then we need to employ the complexphrase parser
+ // but that expects the terms wrapped in double-quotes, not parens
+ boolean hasMultipleTerms = terms.split("\\s+").length > 1;
+ if (hasMultipleTerms && (terms.contains("*") || terms.contains("?"))) {
+ String quotedTerms = "\"" + terms.substring(1, terms.length() - 1) + "\"";
+ return "{!complexphrase}" + pair.getKey() + ":" + quotedTerms;
+ }
+ } // else treat as an embedded Solr query and pass-through
- String query = pair.getKey() + ":" + terms;
- return wrappedQuotes ? "{!complexphrase}" + query : query;
+ return pair.getKey() + ":" + terms;
}
- protected String translateComparison(RexNode node) {
+ private String translateLikeTermToSolrSyntax(String term, Character escapeChar) {
+ boolean isEscaped = false;
+ StringBuilder sb = new StringBuilder();
+ // Special character % and _ are escaped with escape character and single quote is escaped
+ // with another single quote
+ // If single quote is escaped with escape character, calcite parser fails
+ for (int i = 0; i < term.length(); i++) {
+ char c = term.charAt(i);
+ if (!isEscaped && escapeChar != null && escapeChar == c) {
+ isEscaped = true;
+ } else if (c == '%' && !isEscaped) {
+ sb.append('*');
+ } else if (c == '_' && !isEscaped) {
+ sb.append("?");
+ } else if (c == '\'') {
+ if (i > 0 && term.charAt(i - 1) == '\'') {
+ sb.append(c);
+ }
+ } else {
+ sb.append(c);
+ if (isEscaped) isEscaped = false;
+ }
+ }
+ return sb.toString();
+ }
+
+ protected String translateComparison(RexNode node) {
final SqlKind kind = node.getKind();
if (kind == SqlKind.NOT) {
RexNode negated = ((RexCall) node).getOperands().get(0);
@@ -413,18 +447,30 @@ class SolrFilter extends Filter implements SolrRel {
String terms = toSolrLiteral(key, value).trim();
- boolean wrappedQuotes = false;
if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
- terms = "\"" + ClientUtils.escapeQueryChars(terms) + "\"";
- wrappedQuotes = true;
+ if (terms.contains("*") || terms.contains("?")) {
+ terms = escapeWithWildcard(terms);
+ } else {
+ terms = "\"" + ClientUtils.escapeQueryChars(terms) + "\"";
+ }
}
- String clause = key + ":" + terms;
- if (terms.contains("*") && wrappedQuotes) {
- clause = "{!complexphrase}" + clause;
- }
+ return key + ":" + terms;
+ }
- return clause;
+ // Wrap filter criteria containing wildcard with parens and unescape the wildcards after
+ // escaping protected query chars
+ private String escapeWithWildcard(String terms) {
+ String escaped =
+ ClientUtils.escapeQueryChars(terms)
+ .replace("\\*", "*")
+ .replace("\\?", "?")
+ .replace("\\ ", " ");
+ // if multiple terms, then wrap with parens
+ if (escaped.split("\\s+").length > 1) {
+ escaped = "(" + escaped + ")";
+ }
+ return escaped;
}
// translate to a literal string value for Solr queries, such as translating a
@@ -479,6 +525,26 @@ class SolrFilter extends Filter implements SolrRel {
return timestamp;
}
+ protected Pair<Pair<String, RexLiteral>, Character> getFieldValuePairWithEscapeCharacter(RexNode node) {
+ if (!(node instanceof RexCall)) {
+ throw new AssertionError("expected RexCall for predicate but found: " + node);
+ }
+ RexCall call = (RexCall) node;
+ if (call.getOperands().size() == 3) {
+ RexNode escapeNode = call.getOperands().get(2);
+ Character escapeChar = null;
+ if (escapeNode.getKind() == SqlKind.LITERAL) {
+ RexLiteral literal = (RexLiteral) escapeNode;
+ if (literal.getTypeName() == SqlTypeName.CHAR) {
+ escapeChar = literal.getValueAs(Character.class);
+ }
+ }
+ return Pair.of(translateBinary2(call.getOperands().get(0), call.getOperands().get(1)), escapeChar);
+ } else {
+ return Pair.of(getFieldValuePair(node), null);
+ }
+ }
+
protected Pair<String, RexLiteral> getFieldValuePair(RexNode node) {
if (!(node instanceof RexCall)) {
throw new AssertionError("expected RexCall for predicate but found: " + node);
diff --git a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
index 587c04575e1..1eba3e72387 100644
--- a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
+++ b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
@@ -2016,18 +2016,26 @@ public class TestSQLHandler extends SolrCloudTestCase {
@Test
public void testLike() throws Exception {
new UpdateRequest()
- .add("id", "1", "a_s", "hello-1", "b_s", "foo")
- .add("id", "2", "a_s", "world-2", "b_s", "foo")
- .add("id", "3", "a_s", "hello-3", "b_s", "foo")
- .add("id", "4", "a_s", "world-4", "b_s", "foo")
- .add("id", "5", "a_s", "hello-5", "b_s", "foo")
- .add("id", "6", "a_s", "world-6", "b_s", "bar")
+ .add("id", "1", "a_s", "hello-1", "b_s", "foo", "c_t", "the quick brown fox jumped over the lazy dog")
+ .add("id", "2", "a_s", "world-2", "b_s", "foo", "c_t", "the sly black dog jumped over the sleeping pig")
+ .add("id", "3", "a_s", "hello-3", "b_s", "foo", "c_t", "the quick brown fox jumped over the lazy dog")
+ .add("id", "4", "a_s", "world-4", "b_s", "foo", "c_t", "the sly black dog jumped over the sleepy pig")
+ .add("id", "5", "a_s", "hello-5", "b_s", "foo", "c_t", "the quick brown fox jumped over the lazy dog")
+ .add("id", "6", "a_s", "w_orld-6", "b_s", "bar", "c_t", "the sly black dog jumped over the sleepin piglet")
+ .add("id", "7", "a_s", "world%_7", "b_s", "zaz", "c_t", "the lazy dog jumped over the quick brown fox")
+ .add("id", "8", "a_s", "world'\\9", "b_s", "zaz", "c_t", "the lazy dog ducked over the quick brown fox")
.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'h_llo-%'", 3);
+ Throwable exception = expectThrows(IOException.class, () -> expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world\\'%' ESCAPE '\\'", 1));
+ assertTrue(exception.getMessage().contains("parse failed: Lexical error"));
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world''%'", 1);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world''\\\\%' ESCAPE '\\'", 1);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'w\\_o_ld%' ESCAPE '\\'", 1);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world\\%\\__' ESCAPE '\\'", 1);
// not technically valid SQL but we support it for legacy purposes, see: SOLR-15463
- expectResults("SELECT a_s FROM $ALIAS WHERE a_s='world-*'", 3);
+ expectResults("SELECT a_s FROM $ALIAS WHERE a_s='world-*'", 2);
// no results
expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE '%MATCHNONE%'", 0);
@@ -2036,7 +2044,7 @@ public class TestSQLHandler extends SolrCloudTestCase {
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE 'foo'", 5);
// NOT LIKE
- expectResults("SELECT b_s FROM $ALIAS WHERE b_s NOT LIKE 'f%'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE b_s NOT LIKE 'f%'", 3);
// leading wildcard
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '%oo'", 5);
@@ -2045,6 +2053,22 @@ public class TestSQLHandler extends SolrCloudTestCase {
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '(fo%)'", 5);
expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '(ba*)'", 1);
+
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'fox'", 5);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pig%'", 3);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pigle%'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% piglet'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pigle%'", 1);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% piglet'", 1);
+
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'jump%'", 7);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '%ump%'", 7);
+
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '(\"dog pig\"~5)'", 2);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'jumped over'", 8);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'quick brown fox'", 5);
+ expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE 'foo*'", 5);
+ expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '*og'", 8);
}
@Test