You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/03/18 07:46:55 UTC

[spark] branch branch-3.3 updated: [SPARK-38583][SQL] Restore the behavior of `to_timestamp` that allows numeric types

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new fa71029  [SPARK-38583][SQL] Restore the behavior of `to_timestamp` that allows numeric types
fa71029 is described below

commit fa71029c1860830d0c47fe20b3f8831da31d4820
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Fri Mar 18 16:44:12 2022 +0900

    [SPARK-38583][SQL] Restore the behavior of `to_timestamp` that allows numeric types
    
    This PR is a followup of https://github.com/apache/spark/commit/fab4ceb157baac870f6d50b942084bb9b2cd4ad2 that mistakenly removed the numeric type support in `to_timestamp(...)`. This PR restores the behavior back.
    
    To keep the previous behavior.
    
    To end users, no because the change has not been released yet.
    
    Unit test was added, and manually tested:
    
    ```scala
    spark.range(1).selectExpr("to_timestamp(id)").show()
    ```
    
    **Before**
    
    ```
    +----------------+
    |to_timestamp(id)|
    +----------------+
    |            null|
    +----------------+
    ```
    
    **After**
    
    ```
    +-------------------+
    |   to_timestamp(id)|
    +-------------------+
    |1970-01-01 09:00:00|
    +-------------------+
    
    ```
    
    Closes #35887 from HyukjinKwon/SPARK-38583.
    
    Authored-by: Hyukjin Kwon <gu...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit 681dfee7a0fa040b8928c65ef34471ee7239621c)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../spark/sql/catalyst/expressions/datetimeExpressions.scala  |  6 ++++--
 sql/core/src/test/resources/sql-tests/inputs/timestamp.sql    |  1 +
 .../test/resources/sql-tests/results/ansi/timestamp.sql.out   | 10 +++++++++-
 .../test/resources/sql-tests/results/datetime-legacy.sql.out  | 10 +++++++++-
 .../src/test/resources/sql-tests/results/timestamp.sql.out    | 10 +++++++++-
 .../sql-tests/results/timestampNTZ/timestamp-ansi.sql.out     | 11 ++++++++++-
 .../sql-tests/results/timestampNTZ/timestamp.sql.out          | 10 +++++++++-
 7 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 013f11a..15ab3a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -2080,8 +2080,10 @@ case class ParseToTimestamp(
   override def inputTypes: Seq[AbstractDataType] = {
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
-    TypeCollection(StringType, DateType, TimestampType, TimestampNTZType) +:
-      format.map(_ => StringType).toSeq
+    val types = Seq(StringType, DateType, TimestampType, TimestampNTZType)
+    TypeCollection(
+      (if (dataType.isInstanceOf[TimestampType]) types :+ NumericType else types): _*
+    ) +: format.map(_ => StringType).toSeq
   }
 
   override protected def withNewChildrenInternal(
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
index 21d27e9..b0d958a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
@@ -62,6 +62,7 @@ select UNIX_MILLIS(timestamp'2020-12-01 14:30:08Z'), UNIX_MILLIS(timestamp'2020-
 select UNIX_MICROS(timestamp'2020-12-01 14:30:08Z'), UNIX_MICROS(timestamp'2020-12-01 14:30:08.999999Z'), UNIX_MICROS(null);
 
 select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd');
+select to_timestamp(1);
 -- variable-length second fraction tests
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
 select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index 2946842..1bd579e 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
+-- Number of queries: 98
 
 
 -- !query
@@ -327,6 +327,14 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp(1)
+-- !query schema
+struct<to_timestamp(1):timestamp>
+-- !query output
+1969-12-31 16:00:01
+
+
+-- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index ebfdf60e..9b00d38 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 194
+-- Number of queries: 195
 
 
 -- !query
@@ -1139,6 +1139,14 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp(1)
+-- !query schema
+struct<to_timestamp(1):timestamp>
+-- !query output
+1969-12-31 16:00:01
+
+
+-- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 0ebdf4c..792b0a5 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
+-- Number of queries: 98
 
 
 -- !query
@@ -323,6 +323,14 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp(1)
+-- !query schema
+struct<to_timestamp(1):timestamp>
+-- !query output
+1969-12-31 16:00:01
+
+
+-- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index f7552ed..9cad5a3 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
+-- Number of queries: 98
 
 
 -- !query
@@ -327,6 +327,15 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp(1)
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Cannot cast 1 to TimestampNTZType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+
+
+-- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
index 06e255a..4ae5a8d 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
+-- Number of queries: 98
 
 
 -- !query
@@ -323,6 +323,14 @@ NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
 -- !query
+select to_timestamp(1)
+-- !query schema
+struct<to_timestamp(1):timestamp_ntz>
+-- !query output
+NULL
+
+
+-- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp_ntz>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org