You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/09 15:23:28 UTC

[spark] branch branch-3.0 updated: [SPARK-31940][SQL][DOCS] Document the default JVM time zone in to/fromJavaDate and legacy date formatters

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new bdddf31  [SPARK-31940][SQL][DOCS] Document the default JVM time zone in to/fromJavaDate and legacy date formatters
bdddf31 is described below

commit bdddf31d125652bd8975b3c1a9ce13f124c28698
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Tue Jun 9 15:20:13 2020 +0000

    [SPARK-31940][SQL][DOCS] Document the default JVM time zone in to/fromJavaDate and legacy date formatters
    
    ### What changes were proposed in this pull request?
    Update comments for `DateTimeUtils`.`toJavaDate` and `fromJavaDate`, and for the legacy date formatters `LegacySimpleDateFormatter` and `LegacyFastDateFormatter` regarding to the default JVM time zone. The comments say that the default JVM time zone is used intentionally for backward compatibility with Spark 2.4 and earlier versions.
    
    Closes #28709
    
    ### Why are the changes needed?
    To document current behaviour of related methods in `DateTimeUtils` and the legacy date formatters. For example, correctness of `HiveResult.hiveResultString` and `toHiveString` is directly related to the same time zone used by `toJavaDate` and `LegacyFastDateFormatter`.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    By running the Scala style checker `./dev/scalastyle`
    
    Closes #28767 from MaxGekk/doc-legacy-formatters.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit de91915a24a5cb5cd725b0e0a7fa63a73c2fc277)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/util/DateFormatter.scala    | 27 ++++++++++++++
 .../spark/sql/catalyst/util/DateTimeUtils.scala    | 42 +++++++++++-----------
 2 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index ec8db46..07fa09d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -100,6 +100,17 @@ trait LegacyDateFormatter extends DateFormatter {
   }
 }
 
+/**
+ * The legacy formatter is based on Apache Commons FastDateFormat. The formatter uses the default
+ * JVM time zone intentionally for compatibility with Spark 2.4 and earlier versions.
+ *
+ * Note: Using of the default JVM time zone makes the formatter compatible with the legacy
+ *       `DateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default
+ *       JVM time zone too.
+ *
+ * @param pattern `java.text.SimpleDateFormat` compatible pattern.
+ * @param locale The locale overrides the system locale and is used in parsing/formatting.
+ */
 class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
   @transient
   private lazy val fdf = FastDateFormat.getInstance(pattern, locale)
@@ -109,6 +120,22 @@ class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDat
   override def validatePatternString(): Unit = fdf
 }
 
+// scalastyle:off line.size.limit
+/**
+ * The legacy formatter is based on `java.text.SimpleDateFormat`. The formatter uses the default
+ * JVM time zone intentionally for compatibility with Spark 2.4 and earlier versions.
+ *
+ * Note: Using of the default JVM time zone makes the formatter compatible with the legacy
+ *       `DateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default
+ *       JVM time zone too.
+ *
+ * @param pattern The pattern describing the date and time format.
+ *                See <a href="https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html">
+ *                Date and Time Patterns</a>
+ * @param locale  The locale whose date format symbols should be used. It overrides the system
+ *                locale in parsing/formatting.
+ */
+// scalastyle:on line.size.limit
 class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
   @transient
   private lazy val sdf = new SimpleDateFormat(pattern, locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index fb7875d..3f8417f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -90,19 +90,20 @@ object DateTimeUtils {
   }
 
   /**
-   * Converts an instance of `java.sql.Date` to a number of days since the epoch
-   * 1970-01-01 via extracting date fields `year`, `month`, `days` from the input,
-   * creating a local date in Proleptic Gregorian calendar from the fields, and
-   * getting the number of days from the resulted local date.
+   * Converts a local date at the default JVM time zone to the number of days since 1970-01-01
+   * in the hybrid calendar (Julian + Gregorian) by discarding the time part. The resulted days are
+   * rebased from the hybrid to Proleptic Gregorian calendar. The days rebasing is performed via
+   * UTC time zone for simplicity because the difference between two calendars is the same in
+   * any given time zone and UTC time zone.
    *
-   * This approach was taken to have the same local date as the triple of `year`,
-   * `month`, `day` in the original hybrid calendar used by `java.sql.Date` and
-   * Proleptic Gregorian calendar used by Spark since version 3.0.0, see SPARK-26651.
+   * Note: The date is shifted by the offset of the default JVM time zone for backward compatibility
+   *       with Spark 2.4 and earlier versions. The goal of the shift is to get a local date derived
+   *       from the number of days that has the same date fields (year, month, day) as the original
+   *       `date` at the default JVM time zone.
    *
-   * @param date It represents a specific instant in time based on
-   *             the hybrid calendar which combines Julian and
-   *             Gregorian calendars.
-   * @return The number of days since epoch from java.sql.Date.
+   * @param date It represents a specific instant in time based on the hybrid calendar which
+   *             combines Julian and Gregorian calendars.
+   * @return The number of days since the epoch in Proleptic Gregorian calendar.
    */
   def fromJavaDate(date: Date): SQLDate = {
     val millisUtc = date.getTime
@@ -112,17 +113,18 @@ object DateTimeUtils {
   }
 
   /**
-   * The opposite to `fromJavaDate` method which converts a number of days to an
-   * instance of `java.sql.Date`. It builds a local date in Proleptic Gregorian
-   * calendar, extracts date fields `year`, `month`, `day`, and creates a local
-   * date in the hybrid calendar (Julian + Gregorian calendars) from the fields.
+   * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date
+   * at the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given
+   * days from Proleptic Gregorian to the hybrid calendar at UTC time zone for simplicity because
+   * the difference between two calendars doesn't depend on any time zone. The result is shifted
+   * by the time zone offset in wall clock to have the same date fields (year, month, day)
+   * at the default JVM time zone as the input `daysSinceEpoch` in Proleptic Gregorian calendar.
    *
-   * The purpose of the conversion is to have the same local date as the triple
-   * of `year`, `month`, `day` in the original Proleptic Gregorian calendar and
-   * in the target calender.
+   * Note: The date is shifted by the offset of the default JVM time zone for backward compatibility
+   *       with Spark 2.4 and earlier versions.
    *
-   * @param daysSinceEpoch The number of days since 1970-01-01.
-   * @return A `java.sql.Date` from number of days since epoch.
+   * @param daysSinceEpoch The number of days since 1970-01-01 in Proleptic Gregorian calendar.
+   * @return A local date in the hybrid calendar as `java.sql.Date` from number of days since epoch.
    */
   def toJavaDate(daysSinceEpoch: SQLDate): Date = {
     val rebasedDays = rebaseGregorianToJulianDays(daysSinceEpoch)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org