You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/10/31 22:17:15 UTC
[spark] branch branch-3.0 updated: [SPARK-33306][SQL] Timezone is
needed when cast date to string
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 49e9575 [SPARK-33306][SQL] Timezone is needed when cast date to string
49e9575 is described below
commit 49e9575674b42190629d71eb8114318634625091
Author: wangguangxin.cn <wa...@gmail.com>
AuthorDate: Sat Oct 31 15:14:46 2020 -0700
[SPARK-33306][SQL] Timezone is needed when cast date to string
### What changes were proposed in this pull request?
When `spark.sql.legacy.typeCoercion.datetimeToString.enabled` is enabled, spark will cast date to string when compare date with string. In Spark3, timezone is needed when casting date to string as https://github.com/apache/spark/blob/72ad9dcd5d484a8dd64c08889de85ef9de2a6077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L309.
Howerver, the timezone may not be set because `CastBase.needsTimeZone` returns false for this kind of casting.
A simple way to reproduce this is
```
spark-shell --conf spark.sql.legacy.typeCoercion.datetimeToString.enabled=true
```
when we execute the following sql,
```
select a.d1 from
(select to_date(concat('2000-01-0', id)) as d1 from range(1, 2)) a
join
(select concat('2000-01-0', id) as d2 from range(1, 2)) b
on a.d1 = b.d2
```
it will throw
```
java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:529)
at scala.None$.get(Option.scala:527)
at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId(datetimeExpressions.scala:56)
at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId$(datetimeExpressions.scala:56)
at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId$lzycompute(Cast.scala:253)
at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId(Cast.scala:253)
at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter$lzycompute(Cast.scala:287)
at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter(Cast.scala:287)
```
### Why are the changes needed?
As described above, it's a bug here.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Add more UT
Closes #30213 from WangGuangxin/SPARK-33306.
Authored-by: wangguangxin.cn <wa...@gmail.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit 69c27f49acf2fe6fbc8335bde2aac4afd4188678)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../org/apache/spark/sql/catalyst/expressions/Cast.scala | 1 +
.../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 15 +++++++++++++++
2 files changed, 16 insertions(+)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c8985d4..388fb231 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -108,6 +108,7 @@ object Cast {
*/
def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
case (StringType, TimestampType | DateType) => true
+ case (DateType, StringType) => true
case (DateType, TimestampType) => true
case (TimestampType, StringType) => true
case (TimestampType, DateType) => true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 979ae88..8f278f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3508,6 +3508,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
checkAnswer(sql("SELECT id FROM t WHERE (SELECT true)"), Row(0L))
}
}
+
+ test("SPARK-33306: Timezone is needed when cast Date to String") {
+ withTempView("t1", "t2") {
+ spark.sql("select to_date(concat('2000-01-0', id)) as d from range(1, 2)")
+ .createOrReplaceTempView("t1")
+ spark.sql("select concat('2000-01-0', id) as d from range(1, 2)")
+ .createOrReplaceTempView("t2")
+ val result = Date.valueOf("2000-01-01")
+
+ checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result))
+ withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
+ checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result))
+ }
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org