You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/03/20 06:58:09 UTC
[spark] branch master updated: [SPARK-31195][SQL] Correct and reuse
days rebase functions of `DateTimeUtils` in `DaysWritable`
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6a66876 [SPARK-31195][SQL] Correct and reuse days rebase functions of `DateTimeUtils` in `DaysWritable`
6a66876 is described below
commit 6a668763b81cce4b4e50d5b34f8e79c493031f86
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Fri Mar 20 15:57:21 2020 +0900
[SPARK-31195][SQL] Correct and reuse days rebase functions of `DateTimeUtils` in `DaysWritable`
### What changes were proposed in this pull request?
In the PR, I propose to correct and re-use functions from `DateTimeUtils` for rebasing days before the cutover day `1582-10-15` in `org.apache.spark.sql.hive.DaysWritable`.
### Why are the changes needed?
0. Existing rebasing of days in `DaysWritable` is not correct.
1. To deduplicate code in `DaysWritable`
2. To use functions that are better tested and cross checked by loading dates/timestamps from Parquet/Avro files written by Spark 2.4.5
### Does this PR introduce any user-facing change?
This PR can introduce behavior change because the replaced code is different from the re-used code from `DateTimeUtils`.
### How was this patch tested?
By existing test suite, for instance `HiveOrcHadoopFsRelationSuite`.
Closes #27962 from MaxGekk/reuse-rebase-funcs.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../org/apache/spark/sql/hive/DaysWritable.scala | 24 +++-------------------
1 file changed, 3 insertions(+), 21 deletions(-)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala
index 53a0deb..862be33 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala
@@ -19,13 +19,11 @@ package org.apache.spark.sql.hive
import java.io.{DataInput, DataOutput, IOException}
import java.sql.Date
-import java.time.LocalDate
-import java.util.Calendar
import org.apache.hadoop.hive.serde2.io.DateWritable
import org.apache.hadoop.io.WritableUtils
-import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
/**
* The class accepts/returns days in Gregorian calendar and rebase them
@@ -82,17 +80,7 @@ private[hive] object DaysWritable {
// The code below converts -141714 to -141704.
def rebaseGregorianToJulianDays(daysSinceEpoch: Int): Int = {
if (daysSinceEpoch < DateTimeUtils.GREGORIAN_CUTOVER_DAY) {
- val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY)
- val utcCal = new Calendar.Builder()
- .setCalendarType("gregory")
- .setTimeZone(DateTimeUtils.TimeZoneUTC)
- .setInstant(millis)
- .build()
- val localDate = LocalDate.of(
- utcCal.get(Calendar.YEAR),
- utcCal.get(Calendar.MONTH) + 1,
- utcCal.get(Calendar.DAY_OF_MONTH))
- Math.toIntExact(localDate.toEpochDay)
+ DateTimeUtils.rebaseGregorianToJulianDays(daysSinceEpoch)
} else {
daysSinceEpoch
}
@@ -100,13 +88,7 @@ private[hive] object DaysWritable {
def rebaseJulianToGregorianDays(daysSinceEpoch: Int): Int = {
if (daysSinceEpoch < JULIAN_CUTOVER_DAY) {
- val localDate = LocalDate.ofEpochDay(daysSinceEpoch)
- val utcCal = new Calendar.Builder()
- .setCalendarType("gregory")
- .setTimeZone(DateTimeUtils.TimeZoneUTC)
- .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
- .build()
- Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY))
+ DateTimeUtils.rebaseJulianToGregorianDays(daysSinceEpoch)
} else {
daysSinceEpoch
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org