You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/04/27 05:04:25 UTC

[spark] branch branch-3.0 updated: [SPARK-31557][SQL] Legacy time parser should return Gregorian days rather than Julian days

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new b0155e36 [SPARK-31557][SQL] Legacy time parser should return Gregorian days rather than Julian days
b0155e36 is described below

commit b0155e36c3f267743c65a259e2be16324714de4a
Author: Bruce Robbins <be...@gmail.com>
AuthorDate: Mon Apr 27 05:00:36 2020 +0000

    [SPARK-31557][SQL] Legacy time parser should return Gregorian days rather than Julian days
    
    This PR modifies LegacyDateFormatter#parse to return proleptic Gregorian days rather than hybrid Julian days.
    
    The legacy time parser currently returns epoch days in the hybrid Julian calendar. However, the callers to the legacy parser (e.g., UnivocityParser, JacksonParser) expect epoch days in the proleptic Gregorian calendar. As a result, pre-Gregorian dates like '1000-01-01' get interpreted as '1000-01-06'.
    
    No
    
    Manual testing and modified existing unit tests.
    
    Closes #28345 from bersprockets/SPARK-31557.
    
    Authored-by: Bruce Robbins <be...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit a911287244a98aa9e6464bcdd97c80e7ad732788)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/util/DateFormatter.scala    |  3 +-
 .../apache/spark/sql/util/DateFormatterSuite.scala | 76 ++++++++++++----------
 2 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index d2e4e8b..0f79c1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -66,8 +66,7 @@ trait LegacyDateFormatter extends DateFormatter {
   def formatDate(d: Date): String
 
   override def parse(s: String): Int = {
-    val milliseconds = parseToDate(s).getTime
-    DateTimeUtils.millisToDays(milliseconds)
+    fromJavaDate(new java.sql.Date(parseToDate(s).getTime))
   }
 
   override def format(days: Int): String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index a40dbcc..2df1d49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 
 class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("parsing dates") {
@@ -47,45 +48,54 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("roundtrip date -> days -> date") {
-    Seq(
-      "0050-01-01",
-      "0953-02-02",
-      "1423-03-08",
-      "1969-12-31",
-      "1972-08-25",
-      "1975-09-26",
-      "2018-12-12",
-      "2038-01-01",
-      "5010-11-17").foreach { date =>
-      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter(getZoneId(timeZone))
-          val days = formatter.parse(date)
-          val formatted = formatter.format(days)
-          assert(date === formatted)
+    LegacyBehaviorPolicy.values.foreach { parserPolicy =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) {
+        Seq(
+          "0050-01-01",
+          "0953-02-02",
+          "1423-03-08",
+          "1582-10-15",
+          "1969-12-31",
+          "1972-08-25",
+          "1975-09-26",
+          "2018-12-12",
+          "2038-01-01",
+          "5010-11-17").foreach { date =>
+          DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+            withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+              val formatter = DateFormatter(getZoneId(timeZone))
+              val days = formatter.parse(date)
+              val formatted = formatter.format(days)
+              assert(date === formatted)
+            }
+          }
         }
       }
     }
   }
 
   test("roundtrip days -> date -> days") {
-    Seq(
-      -701265,
-      -371419,
-      -199722,
-      -1,
-      0,
-      967,
-      2094,
-      17877,
-      24837,
-      1110657).foreach { days =>
-      DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
-        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter(getZoneId(timeZone))
-          val date = formatter.format(days)
-          val parsed = formatter.parse(date)
-          assert(days === parsed)
+    LegacyBehaviorPolicy.values.foreach { parserPolicy =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> parserPolicy.toString) {
+        Seq(
+          -701265,
+          -371419,
+          -199722,
+          -1,
+          0,
+          967,
+          2094,
+          17877,
+          24837,
+          1110657).foreach { days =>
+          DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+            withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+              val formatter = DateFormatter(getZoneId(timeZone))
+              val date = formatter.format(days)
+              val parsed = formatter.parse(date)
+              assert(days === parsed)
+            }
+          }
         }
       }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org