You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/09/08 05:02:25 UTC

[GitHub] [spark] MaxGekk commented on a change in pull request #33919: [SPARK-36674][SQL] Support ILIKE - case insensitive LIKE

MaxGekk commented on a change in pull request #33919:
URL: https://github.com/apache/spark/pull/33919#discussion_r704049052



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
##########
@@ -121,70 +187,73 @@ abstract class StringRegexExpression extends BinaryExpression
   since = "1.0.0",
   group = "predicate_funcs")
 // scalastyle:on line.contains.tab
-case class Like(left: Expression, right: Expression, escapeChar: Char)
-  extends StringRegexExpression {
+case class Like(left: Expression, right: Expression, escapeChar: Char) extends LikeBase {
 
   def this(left: Expression, right: Expression) = this(left, right, '\\')
 
-  override def escape(v: String): String = StringUtils.escapeLikeRegex(v, escapeChar)
+  override protected def likeName: String = "LIKE"
 
-  override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
+  override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Like =
+    copy(left = newLeft, right = newRight)
+}
 
-  final override val nodePatterns: Seq[TreePattern] = Seq(LIKE_FAMLIY)
+// scalastyle:off line.contains.tab
+/**
+ * Simple RegEx case-insensitive pattern matching function
+ */
+@ExpressionDescription(
+  usage = "str _FUNC_ pattern[ ESCAPE escape] - Returns true if str matches `pattern` with " +
+    "`escape` case-insensitively, null if any arguments are null, false otherwise.",
+  arguments = """
+    Arguments:
+      * str - a string expression
+      * pattern - a string expression. The pattern is a string which is matched literally and
+          case-insensitively, with exception to the following special symbols:
 
-  override def toString: String = escapeChar match {
-    case '\\' => s"$left LIKE $right"
-    case c => s"$left LIKE $right ESCAPE '$c'"
-  }
+          _ matches any one character in the input (similar to . in posix regular expressions)
 
-  override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}"
+          % matches zero or more characters in the input (similar to .* in posix regular
+          expressions)
 
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val patternClass = classOf[Pattern].getName
-    val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex"
+          Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order
+          to match "\abc", the pattern should be "\\abc".
 
-    if (right.foldable) {
-      val rVal = right.eval()
-      if (rVal != null) {
-        val regexStr =
-          StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString()))
-        val pattern = ctx.addMutableState(patternClass, "patternLike",
-          v => s"""$v = $patternClass.compile("$regexStr");""")
+          When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back
+          to Spark 1.6 behavior regarding string literal parsing. For example, if the config is
+          enabled, the pattern to match "\abc" should be "\abc".
+      * escape - an character added since Spark 3.0. The default escape character is the '\'.
+          If an escape character precedes a special symbol or another escape character, the
+          following character is matched literally. It is invalid to escape any other character.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Spark', '_Park');
+      true
+      > SET spark.sql.parser.escapedStringLiterals=true;

Review comment:
       The examples are executed in a test too. Or do you want to test something specifically to case-sensitivity?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org