You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/10/31 22:17:15 UTC

[spark] branch branch-3.0 updated: [SPARK-33306][SQL] Timezone is needed when cast date to string

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 49e9575  [SPARK-33306][SQL] Timezone is needed when cast date to string
49e9575 is described below

commit 49e9575674b42190629d71eb8114318634625091
Author: wangguangxin.cn <wa...@gmail.com>
AuthorDate: Sat Oct 31 15:14:46 2020 -0700

    [SPARK-33306][SQL] Timezone is needed when cast date to string
    
    ### What changes were proposed in this pull request?
    When `spark.sql.legacy.typeCoercion.datetimeToString.enabled` is enabled, spark will cast date to string when compare date with string. In Spark3, timezone is needed when casting date to string as https://github.com/apache/spark/blob/72ad9dcd5d484a8dd64c08889de85ef9de2a6077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L309.
    
    Howerver, the timezone may not be set because `CastBase.needsTimeZone` returns false for this kind of casting.
    
    A simple way to reproduce this is
    ```
    spark-shell --conf spark.sql.legacy.typeCoercion.datetimeToString.enabled=true
    
    ```
    when we execute the following sql,
    ```
    select a.d1 from
    (select to_date(concat('2000-01-0', id)) as d1 from range(1, 2)) a
    join
    (select concat('2000-01-0', id) as d2 from range(1, 2)) b
    on a.d1 = b.d2
    ```
    it will throw
    ```
    java.util.NoSuchElementException: None.get
      at scala.None$.get(Option.scala:529)
      at scala.None$.get(Option.scala:527)
      at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId(datetimeExpressions.scala:56)
      at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId$(datetimeExpressions.scala:56)
      at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId$lzycompute(Cast.scala:253)
      at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId(Cast.scala:253)
      at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter$lzycompute(Cast.scala:287)
      at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter(Cast.scala:287)
    ```
    
    ### Why are the changes needed?
    As described above, it's a bug here.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Add more UT
    
    Closes #30213 from WangGuangxin/SPARK-33306.
    
    Authored-by: wangguangxin.cn <wa...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit 69c27f49acf2fe6fbc8335bde2aac4afd4188678)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../org/apache/spark/sql/catalyst/expressions/Cast.scala  |  1 +
 .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala   | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c8985d4..388fb231 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -108,6 +108,7 @@ object Cast {
    */
   def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
     case (StringType, TimestampType | DateType) => true
+    case (DateType, StringType) => true
     case (DateType, TimestampType) => true
     case (TimestampType, StringType) => true
     case (TimestampType, DateType) => true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 979ae88..8f278f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3508,6 +3508,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(sql("SELECT id FROM t WHERE (SELECT true)"), Row(0L))
     }
   }
+
+  test("SPARK-33306: Timezone is needed when cast Date to String") {
+    withTempView("t1", "t2") {
+      spark.sql("select to_date(concat('2000-01-0', id)) as d from range(1, 2)")
+        .createOrReplaceTempView("t1")
+      spark.sql("select concat('2000-01-0', id) as d from range(1, 2)")
+        .createOrReplaceTempView("t2")
+      val result = Date.valueOf("2000-01-01")
+
+      checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result))
+      withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
+        checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result))
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org