You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/02/17 15:10:19 UTC

[spark] branch branch-3.0 updated: [SPARK-30793][SQL] Fix truncations of timestamps before the epoch to minutes and seconds

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new d9383e1  [SPARK-30793][SQL] Fix truncations of timestamps before the epoch to minutes and seconds
d9383e1 is described below

commit d9383e1a895bdfd1e318974d8f418937cd08e118
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Mon Feb 17 22:51:56 2020 +0800

    [SPARK-30793][SQL] Fix truncations of timestamps before the epoch to minutes and seconds
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to replace `%` by `Math.floorMod` in `DateTimeUtils.truncTimestamp` for the `SECOND` and `MINUTE` levels.
    
    ### Why are the changes needed?
    This fixes the issue of incorrect truncation of timestamps before the epoch `1970-01-01T00:00:00.000000Z` to the `SECOND` and `MINUTE` levels. For example, timestamps after the epoch are truncated by cutting off the rest part of the timestamp:
    ```sql
    spark-sql> select date_trunc('SECOND', '2020-02-11 00:01:02.123');
    2020-02-11 00:01:02
    ```
    but seconds in the truncated timestamp before the epoch are increased by 1:
    ```sql
    spark-sql> select date_trunc('SECOND', '1960-02-11 00:01:02.123');
    1960-02-11 00:01:03
    ```
    
    ### Does this PR introduce any user-facing change?
    Yes. After the changes, the example above outputs correct result:
    ```sql
    spark-sql> select date_trunc('SECOND', '1960-02-11 00:01:02.123');
    1960-02-11 00:01:02
    ```
    
    ### How was this patch tested?
    Added new tests to `DateFunctionsSuite`.
    
    Closes #27543 from MaxGekk/fix-second-minute-truc.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 06217cfded8d32962e7c54c315f8e684eb9f0999)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala  |  4 ++--
 .../scala/org/apache/spark/sql/DateFunctionsSuite.scala     | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 768dde0..a1054a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -716,9 +716,9 @@ object DateTimeUtils {
         val truncated = level match {
           case TRUNC_TO_MILLISECOND => millis
           case TRUNC_TO_SECOND =>
-            millis - millis % MILLIS_PER_SECOND
+            millis - Math.floorMod(millis, MILLIS_PER_SECOND)
           case TRUNC_TO_MINUTE =>
-            millis - millis % MILLIS_PER_MINUTE
+            millis - Math.floorMod(millis, MILLIS_PER_MINUTE)
           case _ => // Try to truncate date levels
             val dDays = millisToDays(millis, zoneId)
             daysToMillis(truncDate(dDays, level), zoneId)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index ba45b9f..527dfe5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -869,4 +869,17 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkTrunc("HOUR", "0010-01-01 01:00:00")
     checkTrunc("DAY", "0010-01-01 00:00:00")
   }
+
+  test("SPARK-30793: truncate timestamps before the epoch to seconds and minutes") {
+    def checkTrunc(level: String, expected: String): Unit = {
+      val df = Seq("1961-04-12 00:01:02.345")
+        .toDF()
+        .select($"value".cast("timestamp").as("ts"))
+        .select(date_trunc(level, $"ts").cast("string"))
+      checkAnswer(df, Row(expected))
+    }
+
+    checkTrunc("SECOND", "1961-04-12 00:01:02")
+    checkTrunc("MINUTE", "1961-04-12 00:01:00")
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org