You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@solr.apache.org by GitBox <gi...@apache.org> on 2022/05/17 19:16:20 UTC

[GitHub] [solr] madrob commented on a diff in pull request #865: SOLR-16199: Improve handling of LIKE queries with wildcard

madrob commented on code in PR #865:
URL: https://github.com/apache/solr/pull/865#discussion_r875176213


##########
solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java:
##########
@@ -346,21 +346,52 @@ protected String translateAnd(RexNode node0) {
     }
 
     protected String translateLike(RexNode like) {
-      Pair<String, RexLiteral> pair = getFieldValuePair(like);
+      Pair<Pair<String, RexLiteral>, Character> pairWithEscapeCharacter =
+          getFieldValuePairWithEscapeCharacter(like);
+      Pair<String, RexLiteral> pair = pairWithEscapeCharacter.getKey();
+      Character escapeChar = pairWithEscapeCharacter.getValue();
+
       String terms = pair.getValue().toString().trim();
-      terms = terms.replace("'", "").replace('%', '*').replace('_', '?');
-      boolean wrappedQuotes = false;
+      terms = translateLikeTermToSolrSyntax(terms, escapeChar);
+
       if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
-        // restore the * and ? after escaping
-        terms =
-            "\""
-                + ClientUtils.escapeQueryChars(terms).replace("\\*", "*").replace("\\?", "?")
-                + "\"";
-        wrappedQuotes = true;
-      }
+        terms = escapeWithWildcard(terms);
+
+        // if terms contains multiple words and one or more wildcard chars, then we need to employ
+        // the complexphrase parser
+        // but that expects the terms wrapped in double-quotes, not parens
+        boolean hasMultipleTerms = terms.split("\\s+").length > 1;
+        if (hasMultipleTerms && (terms.contains("*") || terms.contains("?"))) {
+          String quotedTerms = "\"" + terms.substring(1, terms.length() - 1) + "\"";
+          return "{!complexphrase}" + pair.getKey() + ":" + quotedTerms;
+        }
+      } // else treat as an embedded Solr query and pass-through
+
+      return pair.getKey() + ":" + terms;
+    }
 
-      String query = pair.getKey() + ":" + terms;
-      return wrappedQuotes ? "{!complexphrase}" + query : query;
+    private String translateLikeTermToSolrSyntax(String term, Character escapeChar) {
+      boolean isEscaped = false;
+      StringBuilder sb = new StringBuilder();
+      for (int i = 0; i < term.length() - 1; i++) {

Review Comment:
   why is this `- 1`?



##########
solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java:
##########
@@ -346,21 +346,52 @@ protected String translateAnd(RexNode node0) {
     }
 
     protected String translateLike(RexNode like) {
-      Pair<String, RexLiteral> pair = getFieldValuePair(like);
+      Pair<Pair<String, RexLiteral>, Character> pairWithEscapeCharacter =
+          getFieldValuePairWithEscapeCharacter(like);
+      Pair<String, RexLiteral> pair = pairWithEscapeCharacter.getKey();
+      Character escapeChar = pairWithEscapeCharacter.getValue();
+
       String terms = pair.getValue().toString().trim();
-      terms = terms.replace("'", "").replace('%', '*').replace('_', '?');
-      boolean wrappedQuotes = false;
+      terms = translateLikeTermToSolrSyntax(terms, escapeChar);
+
       if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) {
-        // restore the * and ? after escaping
-        terms =
-            "\""
-                + ClientUtils.escapeQueryChars(terms).replace("\\*", "*").replace("\\?", "?")
-                + "\"";
-        wrappedQuotes = true;
-      }
+        terms = escapeWithWildcard(terms);
+
+        // if terms contains multiple words and one or more wildcard chars, then we need to employ
+        // the complexphrase parser
+        // but that expects the terms wrapped in double-quotes, not parens
+        boolean hasMultipleTerms = terms.split("\\s+").length > 1;
+        if (hasMultipleTerms && (terms.contains("*") || terms.contains("?"))) {
+          String quotedTerms = "\"" + terms.substring(1, terms.length() - 1) + "\"";
+          return "{!complexphrase}" + pair.getKey() + ":" + quotedTerms;
+        }
+      } // else treat as an embedded Solr query and pass-through
+
+      return pair.getKey() + ":" + terms;
+    }
 
-      String query = pair.getKey() + ":" + terms;
-      return wrappedQuotes ? "{!complexphrase}" + query : query;
+    private String translateLikeTermToSolrSyntax(String term, Character escapeChar) {
+      boolean isEscaped = false;
+      StringBuilder sb = new StringBuilder();
+      for (int i = 0; i < term.length() - 1; i++) {
+        char c = term.charAt(i);
+        // Only replace special characters if they are not escaped
+        if (escapeChar != null && c == escapeChar) {
+          isEscaped = true;
+        }
+        if (c == '%' && !isEscaped) {
+          sb.append('*');
+        } else if (c == '_' && !isEscaped) {
+          sb.append('?');
+        } else if (c == '\'' && isEscaped) {
+          sb.append('\'');
+          isEscaped = false;
+        } else if ((escapeChar == null || escapeChar != c) && c != '\'') {
+          sb.append(c);
+          isEscaped = false;
+        }

Review Comment:
   The last two branches do the same thing?



##########
solr/modules/sql/src/test/org/apache/solr/handler/sql/TestSQLHandler.java:
##########
@@ -2434,15 +2434,73 @@ public void testColIsNull() throws Exception {
   @Test
   public void testLike() throws Exception {
     new UpdateRequest()
-        .add("id", "1", "a_s", "hello-1", "b_s", "foo")
-        .add("id", "2", "a_s", "world-2", "b_s", "foo")
-        .add("id", "3", "a_s", "hello-3", "b_s", "foo")
-        .add("id", "4", "a_s", "world-4", "b_s", "foo")
-        .add("id", "5", "a_s", "hello-5", "b_s", "foo")
-        .add("id", "6", "a_s", "world-6", "b_s", "bar")
+        .add(
+            "id",
+            "1",
+            "a_s",
+            "hello-1",
+            "b_s",
+            "foo",
+            "c_t",
+            "the quick brown fox jumped over the lazy dog")
+        .add(
+            "id",
+            "2",
+            "a_s",
+            "world-2",
+            "b_s",
+            "foo",
+            "c_t",
+            "the sly black dog jumped over the sleeping pig")
+        .add(
+            "id",
+            "3",
+            "a_s",
+            "hello-3",
+            "b_s",
+            "foo",
+            "c_t",
+            "the quick brown fox jumped over the lazy dog")
+        .add(
+            "id",
+            "4",
+            "a_s",
+            "world-4",
+            "b_s",
+            "foo",
+            "c_t",
+            "the sly black dog jumped over the sleepy pig")
+        .add(
+            "id",
+            "5",
+            "a_s",
+            "hello-5",
+            "b_s",
+            "foo",
+            "c_t",
+            "the quick brown fox jumped over the lazy dog")
+        .add(
+            "id",
+            "6",
+            "a_s",
+            "world-6",
+            "b_s",
+            "bar",
+            "c_t",
+            "the sly black dog jumped over the sleepin piglet")
+        .add(
+            "id",
+            "7",
+            "a_s",
+            "world%_7",
+            "b_s",
+            "zaz",
+            "c_t",
+            "the lazy dog jumped over the quick brown fox")
         .commit(cluster.getSolrClient(), COLLECTIONORALIAS);
 
     expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'h_llo-%'", 3);
+    expectResults("SELECT a_s FROM $ALIAS WHERE a_s LIKE 'world\\%\\__' ESCAPE '\\'", 1);

Review Comment:
   I'm unclear if this would match `world-6` if the escapes weren't working, but I think it's important to have a negative test case.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org