You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/11/10 07:24:37 UTC

[GitHub] [spark] MaxGekk commented on a change in pull request #30303: [SPARK-33404][SQL] Fix incorrect results in `date_trunc` expression

MaxGekk commented on a change in pull request #30303:
URL: https://github.com/apache/spark/pull/30303#discussion_r520341000



##########
File path: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
##########
@@ -518,18 +518,32 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     assert(time == None)
   }
 
-  test("truncTimestamp") {
-    def testTrunc(
-        level: Int,
-        expected: String,
-        inputTS: Long,
-        zoneId: ZoneId = defaultZoneId): Unit = {
-      val truncated =
-        DateTimeUtils.truncTimestamp(inputTS, level, zoneId)
-      val expectedTS = toTimestamp(expected, defaultZoneId)
-      assert(truncated === expectedTS.get)
+  def testTrunc(
+      level: Int,
+      expected: String,
+      inputTS: Long,
+      zoneId: ZoneId = defaultZoneId): Unit = {
+    val truncated = DateTimeUtils.truncTimestamp(inputTS, level, zoneId)
+    val expectedTS = toTimestamp(expected, defaultZoneId)
+    assert(truncated === expectedTS.get)
+  }
+
+  test("SPARK-33404: test truncTimestamp when time zone offset from UTC has a " +
+    "granularity of seconds") {
+    for (zid <- ALL_TIMEZONES) {
+      withDefaultTimeZone(zid) {
+        val inputTS = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("1769-10-17T17:10:02.123456"), defaultZoneId)
+        testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "1769-10-17T17:10:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "1769-10-17T17:10:02", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MILLISECOND, "1769-10-17T17:10:02.123", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MICROSECOND, "1769-10-17T17:10:02.123456",

Review comment:
       The checks for millis and micros don't fail when I reverted your fix. I guess because zone offset has seconds precision.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
##########
@@ -737,13 +737,10 @@ object DateTimeUtils {
    */
   def truncTimestamp(micros: Long, level: Int, zoneId: ZoneId): Long = {
     level match {
-      case TRUNC_TO_MICROSECOND => micros
-      case TRUNC_TO_MILLISECOND =>
-        micros - Math.floorMod(micros, MICROS_PER_MILLIS)
-      case TRUNC_TO_SECOND =>
-        micros - Math.floorMod(micros, MICROS_PER_SECOND)
-      case TRUNC_TO_MINUTE =>
-        micros - Math.floorMod(micros, MICROS_PER_MINUTE)
+      case TRUNC_TO_MICROSECOND => truncToUnit(micros, zoneId, ChronoUnit.MICROS)
+      case TRUNC_TO_MILLISECOND => truncToUnit(micros, zoneId, ChronoUnit.MILLIS)

Review comment:
       The changes are not necessary - this just slows down truncation.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org