You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/02/26 22:28:16 UTC

[spark] branch branch-2.4 updated: [SPARK-30759][SQL][3.0] Fix cache initialization in StringRegexExpression

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 56fa200  [SPARK-30759][SQL][3.0] Fix cache initialization in StringRegexExpression
56fa200 is described below

commit 56fa200948d69396ce60b99256efd9da114bdb6c
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Wed Feb 26 14:25:58 2020 -0800

    [SPARK-30759][SQL][3.0] Fix cache initialization in StringRegexExpression
    
    In the PR, I propose to fix `cache` initialization in `StringRegexExpression` by changing of expected value type in `case Literal(value: String, StringType)` from `String` to `UTF8String`.
    
    This is a backport of #27502 and #27547
    
    Actually, the case doesn't work at all because `Literal`'s value has type `UTF8String`, see
    <img width="649" alt="Screen Shot 2020-02-08 at 22 45 50" src="https://user-images.githubusercontent.com/1580697/74091681-0d4a2180-4acb-11ea-8a0d-7e8c65f4214e.png">
    
    No
    
    Added new test by `RegexpExpressionsSuite`.
    
    Closes #27713 from MaxGekk/str-regexp-foldable-pattern-backport.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit cfc48a8a3068972791410e8e36ff9cf1ba5af445)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../apache/spark/sql/catalyst/expressions/regexpExpressions.scala | 2 +-
 .../spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala   | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 7086e4d..a271be5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -40,7 +40,7 @@ abstract class StringRegexExpression extends BinaryExpression
 
   // try cache the pattern for Literal
   private lazy val cache: Pattern = right match {
-    case x @ Literal(value: String, StringType) => compile(value)
+    case Literal(value: UTF8String, StringType) => compile(value.toString)
     case _ => null
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 4c7a037..f7a97db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -244,4 +244,12 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(StringSplit(s1, s2), null, row3)
   }
 
+  test("SPARK-30759: cache initialization for literal patterns") {
+    val expr = "A" like Literal.create("a", StringType)
+    expr.eval()
+    val cache = expr.getClass.getSuperclass
+      .getDeclaredFields.filter(_.getName.endsWith("cache")).head
+    cache.setAccessible(true)
+    assert(cache.get(expr).asInstanceOf[java.util.regex.Pattern].pattern().contains("a"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org