You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/02/26 22:28:16 UTC
[spark] branch branch-2.4 updated: [SPARK-30759][SQL][3.0] Fix
cache initialization in StringRegexExpression
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 56fa200 [SPARK-30759][SQL][3.0] Fix cache initialization in StringRegexExpression
56fa200 is described below
commit 56fa200948d69396ce60b99256efd9da114bdb6c
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Wed Feb 26 14:25:58 2020 -0800
[SPARK-30759][SQL][3.0] Fix cache initialization in StringRegexExpression
In the PR, I propose to fix `cache` initialization in `StringRegexExpression` by changing of expected value type in `case Literal(value: String, StringType)` from `String` to `UTF8String`.
This is a backport of #27502 and #27547
Actually, the case doesn't work at all because `Literal`'s value has type `UTF8String`, see
<img width="649" alt="Screen Shot 2020-02-08 at 22 45 50" src="https://user-images.githubusercontent.com/1580697/74091681-0d4a2180-4acb-11ea-8a0d-7e8c65f4214e.png">
No
Added new test by `RegexpExpressionsSuite`.
Closes #27713 from MaxGekk/str-regexp-foldable-pattern-backport.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit cfc48a8a3068972791410e8e36ff9cf1ba5af445)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../apache/spark/sql/catalyst/expressions/regexpExpressions.scala | 2 +-
.../spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala | 8 ++++++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 7086e4d..a271be5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -40,7 +40,7 @@ abstract class StringRegexExpression extends BinaryExpression
// try cache the pattern for Literal
private lazy val cache: Pattern = right match {
- case x @ Literal(value: String, StringType) => compile(value)
+ case Literal(value: UTF8String, StringType) => compile(value.toString)
case _ => null
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 4c7a037..f7a97db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -244,4 +244,12 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(StringSplit(s1, s2), null, row3)
}
+ test("SPARK-30759: cache initialization for literal patterns") {
+ val expr = "A" like Literal.create("a", StringType)
+ expr.eval()
+ val cache = expr.getClass.getSuperclass
+ .getDeclaredFields.filter(_.getName.endsWith("cache")).head
+ cache.setAccessible(true)
+ assert(cache.get(expr).asInstanceOf[java.util.regex.Pattern].pattern().contains("a"))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org