You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/09 15:21:26 UTC
[spark] branch master updated: [SPARK-31940][SQL][DOCS] Document
the default JVM time zone in to/fromJavaDate and legacy date formatters
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new de91915 [SPARK-31940][SQL][DOCS] Document the default JVM time zone in to/fromJavaDate and legacy date formatters
de91915 is described below
commit de91915a24a5cb5cd725b0e0a7fa63a73c2fc277
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Tue Jun 9 15:20:13 2020 +0000
[SPARK-31940][SQL][DOCS] Document the default JVM time zone in to/fromJavaDate and legacy date formatters
### What changes were proposed in this pull request?
Update comments for `DateTimeUtils`.`toJavaDate` and `fromJavaDate`, and for the legacy date formatters `LegacySimpleDateFormatter` and `LegacyFastDateFormatter` regarding to the default JVM time zone. The comments say that the default JVM time zone is used intentionally for backward compatibility with Spark 2.4 and earlier versions.
Closes #28709
### Why are the changes needed?
To document current behaviour of related methods in `DateTimeUtils` and the legacy date formatters. For example, correctness of `HiveResult.hiveResultString` and `toHiveString` is directly related to the same time zone used by `toJavaDate` and `LegacyFastDateFormatter`.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
By running the Scala style checker `./dev/scalastyle`
Closes #28767 from MaxGekk/doc-legacy-formatters.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/util/DateFormatter.scala | 27 ++++++++++++++
.../spark/sql/catalyst/util/DateTimeUtils.scala | 42 +++++++++++-----------
2 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 76ae3e5..da80e62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -100,6 +100,17 @@ trait LegacyDateFormatter extends DateFormatter {
}
}
+/**
+ * The legacy formatter is based on Apache Commons FastDateFormat. The formatter uses the default
+ * JVM time zone intentionally for compatibility with Spark 2.4 and earlier versions.
+ *
+ * Note: Using of the default JVM time zone makes the formatter compatible with the legacy
+ * `DateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default
+ * JVM time zone too.
+ *
+ * @param pattern `java.text.SimpleDateFormat` compatible pattern.
+ * @param locale The locale overrides the system locale and is used in parsing/formatting.
+ */
class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
@transient
private lazy val fdf = FastDateFormat.getInstance(pattern, locale)
@@ -108,6 +119,22 @@ class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDat
override def validatePatternString(): Unit = fdf
}
+// scalastyle:off line.size.limit
+/**
+ * The legacy formatter is based on `java.text.SimpleDateFormat`. The formatter uses the default
+ * JVM time zone intentionally for compatibility with Spark 2.4 and earlier versions.
+ *
+ * Note: Using of the default JVM time zone makes the formatter compatible with the legacy
+ * `DateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default
+ * JVM time zone too.
+ *
+ * @param pattern The pattern describing the date and time format.
+ * See <a href="https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html">
+ * Date and Time Patterns</a>
+ * @param locale The locale whose date format symbols should be used. It overrides the system
+ * locale in parsing/formatting.
+ */
+// scalastyle:on line.size.limit
class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
@transient
private lazy val sdf = new SimpleDateFormat(pattern, locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 21a478a..41a271b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -88,19 +88,20 @@ object DateTimeUtils {
}
/**
- * Converts an instance of `java.sql.Date` to a number of days since the epoch
- * 1970-01-01 via extracting date fields `year`, `month`, `days` from the input,
- * creating a local date in Proleptic Gregorian calendar from the fields, and
- * getting the number of days from the resulted local date.
+ * Converts a local date at the default JVM time zone to the number of days since 1970-01-01
+ * in the hybrid calendar (Julian + Gregorian) by discarding the time part. The resulted days are
+ * rebased from the hybrid to Proleptic Gregorian calendar. The days rebasing is performed via
+ * UTC time zone for simplicity because the difference between two calendars is the same in
+ * any given time zone and UTC time zone.
*
- * This approach was taken to have the same local date as the triple of `year`,
- * `month`, `day` in the original hybrid calendar used by `java.sql.Date` and
- * Proleptic Gregorian calendar used by Spark since version 3.0.0, see SPARK-26651.
+ * Note: The date is shifted by the offset of the default JVM time zone for backward compatibility
+ * with Spark 2.4 and earlier versions. The goal of the shift is to get a local date derived
+ * from the number of days that has the same date fields (year, month, day) as the original
+ * `date` at the default JVM time zone.
*
- * @param date It represents a specific instant in time based on
- * the hybrid calendar which combines Julian and
- * Gregorian calendars.
- * @return The number of days since epoch from java.sql.Date.
+ * @param date It represents a specific instant in time based on the hybrid calendar which
+ * combines Julian and Gregorian calendars.
+ * @return The number of days since the epoch in Proleptic Gregorian calendar.
*/
def fromJavaDate(date: Date): SQLDate = {
val millisUtc = date.getTime
@@ -110,17 +111,18 @@ object DateTimeUtils {
}
/**
- * The opposite to `fromJavaDate` method which converts a number of days to an
- * instance of `java.sql.Date`. It builds a local date in Proleptic Gregorian
- * calendar, extracts date fields `year`, `month`, `day`, and creates a local
- * date in the hybrid calendar (Julian + Gregorian calendars) from the fields.
+ * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date
+ * at the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given
+ * days from Proleptic Gregorian to the hybrid calendar at UTC time zone for simplicity because
+ * the difference between two calendars doesn't depend on any time zone. The result is shifted
+ * by the time zone offset in wall clock to have the same date fields (year, month, day)
+ * at the default JVM time zone as the input `daysSinceEpoch` in Proleptic Gregorian calendar.
*
- * The purpose of the conversion is to have the same local date as the triple
- * of `year`, `month`, `day` in the original Proleptic Gregorian calendar and
- * in the target calender.
+ * Note: The date is shifted by the offset of the default JVM time zone for backward compatibility
+ * with Spark 2.4 and earlier versions.
*
- * @param daysSinceEpoch The number of days since 1970-01-01.
- * @return A `java.sql.Date` from number of days since epoch.
+ * @param daysSinceEpoch The number of days since 1970-01-01 in Proleptic Gregorian calendar.
+ * @return A local date in the hybrid calendar as `java.sql.Date` from number of days since epoch.
*/
def toJavaDate(daysSinceEpoch: SQLDate): Date = {
val rebasedDays = rebaseGregorianToJulianDays(daysSinceEpoch)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org