You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by th...@apache.org on 2022/05/19 13:59:10 UTC

[solr] branch main updated: Revert "SOLR-16199: Improve handling of LIKE queries with wildcard (#865)" (#869)

This is an automated email from the ASF dual-hosted git repository.

thelabdude pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new ce736defbd9 Revert "SOLR-16199: Improve handling of LIKE queries with wildcard (#865)" (#869)
ce736defbd9 is described below

commit ce736defbd9b9aaa0fe2e03daf1a1f8f2c8cf6bc
Author: Timothy Potter <th...@gmail.com>
AuthorDate: Thu May 19 07:59:04 2022 -0600

    Revert "SOLR-16199: Improve handling of LIKE queries with wildcard (#865)" (#869)
    
    This reverts commit c64e5fda3b9e1e7208556fb01ac1c49f2f5a28a8.
---
 solr/CHANGES.txt                                   |   3 -
 .../org/apache/solr/handler/sql/SolrFilter.java    | 105 ++++-----------------
 .../apache/solr/handler/sql/TestSQLHandler.java    |  89 ++---------------
 3 files changed, 28 insertions(+), 169 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0663c3c77ba..828cee49e7c 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -814,9 +814,6 @@ Bug Fixes
 
 * SOLR-16022: Enforce special character requirements on passwords with length less than 15 (Timothy Potter)
 
-* SOLR-16199: Improve query syntax construction for SQL LIKE clause with phrases and wildcards
-  (Kiran Chitturi, Aroop Ganguly, Amrit Sarkar via Timothy Potter)
-
 ==================  8.11.1 ==================
 
 Bug Fixes
diff --git a/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java b/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java
index 09629d3df96..216ed4913a3 100644
--- a/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java
+++ b/solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java
@@ -346,52 +346,21 @@ class SolrFilter extends Filter implements SolrRel {
     }
 
     protected String translateLike(RexNode like) {
-      Pair<Pair<String, RexLiteral>, Character> pairWithEscapeCharacter =
-          getFieldValuePairWithEscapeCharacter(like);
-      Pair<String, RexLiteral> pair = pairWithEscapeCharacter.getKey();
-      Character escapeChar = pairWithEscapeCharacter.getValue();
-
+      Pair<String, RexLiteral> pair = getFieldValuePair(like);
       String terms = pair.getValue().toString().trim();
-      terms = translateLikeTermToSolrSyntax(terms, escapeChar);
-
+      terms = terms.replace("'", "").replace('%', '*').replace('_', '?');
+      boolean wrappedQuotes = false;
       if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
-        terms = escapeWithWildcard(terms);
-
-        // if terms contains multiple words and one or more wildcard chars, then we need to employ
-        // the complexphrase parser
-        // but that expects the terms wrapped in double-quotes, not parens
-        boolean hasMultipleTerms = terms.split("\\s+").length > 1;
-        if (hasMultipleTerms && (terms.contains("*") || terms.contains("?"))) {
-          String quotedTerms = "\"" + terms.substring(1, terms.length() - 1) + "\"";
-          return "{!complexphrase}" + pair.getKey() + ":" + quotedTerms;
-        }
-      } // else treat as an embedded Solr query and pass-through
-
-      return pair.getKey() + ":" + terms;
-    }
-
-    private String translateLikeTermToSolrSyntax(String term, Character escapeChar) {
-      boolean isEscaped = false;
-      StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < term.length(); i++) {
-        char c = term.charAt(i);
-        // Only replace special characters if they are not escaped
-        if (escapeChar != null && c == escapeChar) {
-          isEscaped = true;
-        }
-        if (c == '%' && !isEscaped) {
-          sb.append('*');
-        } else if (c == '_' && !isEscaped) {
-          sb.append('?');
-        } else if (c == '\'' && isEscaped) {
-          sb.append('\'');
-          isEscaped = false;
-        } else if ((escapeChar == null || escapeChar != c) && c != '\'') {
-          sb.append(c);
-          isEscaped = false;
-        }
+        // restore the * and ? after escaping
+        terms =
+            "\""
+                + ClientUtils.escapeQueryChars(terms).replace("\\*", "*").replace("\\?", "?")
+                + "\"";
+        wrappedQuotes = true;
       }
-      return sb.toString();
+
+      String query = pair.getKey() + ":" + terms;
+      return wrappedQuotes ? "{!complexphrase}" + query : query;
     }
 
     protected String translateComparison(RexNode node) {
@@ -445,30 +414,18 @@ class SolrFilter extends Filter implements SolrRel {
 
       String terms = toSolrLiteral(key, value).trim();
 
+      boolean wrappedQuotes = false;
       if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
-        if (terms.contains("*") || terms.contains("?")) {
-          terms = escapeWithWildcard(terms);
-        } else {
-          terms = "\"" + ClientUtils.escapeQueryChars(terms) + "\"";
-        }
+        terms = "\"" + ClientUtils.escapeQueryChars(terms) + "\"";
+        wrappedQuotes = true;
       }
 
-      return key + ":" + terms;
-    }
+      String clause = key + ":" + terms;
+      if (terms.contains("*") && wrappedQuotes) {
+        clause = "{!complexphrase}" + clause;
+      }
 
-    // Wrap filter criteria containing wildcard with parens and unescape the wildcards after
-    // escaping protected query chars
-    private String escapeWithWildcard(String terms) {
-      String escaped =
-          ClientUtils.escapeQueryChars(terms)
-              .replace("\\*", "*")
-              .replace("\\?", "?")
-              .replace("\\ ", " ");
-      // if multiple terms, then wrap with parens
-      if (escaped.split("\\s+").length > 1) {
-        escaped = "(" + escaped + ")";
-      }
-      return escaped;
+      return clause;
     }
 
     // translate to a literal string value for Solr queries, such as translating a
@@ -523,28 +480,6 @@ class SolrFilter extends Filter implements SolrRel {
       return timestamp;
     }
 
-    protected Pair<Pair<String, RexLiteral>, Character> getFieldValuePairWithEscapeCharacter(
-        RexNode node) {
-      if (!(node instanceof RexCall)) {
-        throw new AssertionError("expected RexCall for predicate but found: " + node);
-      }
-      RexCall call = (RexCall) node;
-      if (call.getOperands().size() == 3) {
-        RexNode escapeNode = call.getOperands().get(2);
-        Character escapeChar = null;
-        if (escapeNode.getKind() == SqlKind.LITERAL) {
-          RexLiteral literal = (RexLiteral) escapeNode;
-          if (literal.getTypeName() == SqlTypeName.CHAR) {
-            escapeChar = literal.getValueAs(Character.class);
-          }
-        }
-        return Pair.of(
-            translateBinary2(call.getOperands().get(0), call.getOperands().get(1)), escapeChar);
-      } else {
-        return Pair.of(getFieldValuePair(node), null);
-      }
-    }
-
     protected Pair<String, RexLiteral> getFieldValuePair(RexNode node) {
       if (!(node instanceof RexCall)) {
         throw new AssertionError("expected RexCall for predicate but found: " + node);
diff --git a/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java b/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java
index 431cc4a0f5f..fde59889b31 100644
--- a/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java
+++ b/solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java
@@ -2434,77 +2434,18 @@ public class TestSQLHandler extends SolrCloudTestCase {
   @Test
   public void testLike() throws Exception {
     new UpdateRequest()
-        .add(
-            "id",
-            "1",
-            "a_s",
-            "hello-1",
-            "b_s",
-            "foo",
-            "c_t",
-            "the quick brown fox jumped over the lazy dog")
-        .add(
-            "id",
-            "2",
-            "a_s",
-            "world-2",
-            "b_s",
-            "foo",
-            "c_t",
-            "the sly black dog jumped over the sleeping pig")
-        .add(
-            "id",
-            "3",
-            "a_s",
-            "hello-3",
-            "b_s",
-            "foo",
-            "c_t",
-            "the quick brown fox jumped over the lazy dog")
-        .add(
-            "id",
-            "4",
-            "a_s",
-            "world-4",
-            "b_s",
-            "foo",
-            "c_t",
-            "the sly black dog jumped over the sleepy pig")
-        .add(
-            "id",
-            "5",
-            "a_s",
-            "hello-5",
-            "b_s",
-            "foo",
-            "c_t",
-            "the quick brown fox jumped over the lazy dog")
-        .add(
-            "id",
-            "6",
-            "a_s",
-            "w_orld-6",
-            "b_s",
-            "bar",
-            "c_t",
-            "the sly black dog jumped over the sleepin piglet")
-        .add(
-            "id",
-            "7",
-            "a_s",
-            "world%_7",
-            "b_s",
-            "zaz",
-            "c_t",
-            "the lazy dog jumped over the quick brown fox")
+        .add("id", "1", "a_s", "hello-1", "b_s", "foo")
+        .add("id", "2", "a_s", "world-2", "b_s", "foo")
+        .add("id", "3", "a_s", "hello-3", "b_s", "foo")
+        .add("id", "4", "a_s", "world-4", "b_s", "foo")
+        .add("id", "5", "a_s", "hello-5", "b_s", "foo")
+        .add("id", "6", "a_s", "world-6", "b_s", "bar")
         .commit(cluster.getSolrClient(), COLLECTIONORALIAS);
 
     expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'h_llo-%'", 3);
-    expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'w\\_o_ld%' ESCAPE '\\'", 1);
-    expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world\\%\\__' ESCAPE '\\'", 1);
 
     // not technically valid SQL but we support it for legacy purposes, see: SOLR-15463
-    expectResults("SELECT a_s FROM $ALIAS WHERE a_s='world-*'", 2);
+    expectResults("SELECT a_s FROM $ALIAS WHERE a_s='world-*'", 3);
 
     // no results
     expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE '%MATCHNONE%'", 0);
@@ -2513,7 +2454,7 @@ public class TestSQLHandler extends SolrCloudTestCase {
     expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE 'foo'", 5);
 
     // NOT LIKE
-    expectResults("SELECT b_s FROM $ALIAS WHERE b_s NOT LIKE 'f%'", 2);
+    expectResults("SELECT b_s FROM $ALIAS WHERE b_s NOT LIKE 'f%'", 1);
 
     // leading wildcard
     expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '%oo'", 5);
@@ -2522,20 +2463,6 @@ public class TestSQLHandler extends SolrCloudTestCase {
     expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '(fo%)'", 5);
 
     expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE '(ba*)'", 1);
-
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'fox'", 4);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pig%'", 3);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% pigle%'", 1);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'sleep% piglet'", 1);
-
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'jump%'", 7);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '%ump%'", 7);
-
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '(\"dog pig\"~5)'", 2);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'jumped over'", 7);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE 'quick brown fox'", 4);
-    expectResults("SELECT b_s FROM $ALIAS WHERE b_s LIKE 'foo*'", 5);
-    expectResults("SELECT b_s FROM $ALIAS WHERE c_t LIKE '*og'", 7);
   }
 
   @Test