You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/03/16 09:16:49 UTC

[spark] branch branch-3.0 updated: [SPARK-31076][SQL][FOLLOWUP] Incapsulate date rebasing to `DaysWritable`

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 5902a23  [SPARK-31076][SQL][FOLLOWUP] Incapsulate date rebasing to `DaysWritable`
5902a23 is described below

commit 5902a232c14516f893895be0d673354933d8a5d0
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Mon Mar 16 17:06:15 2020 +0800

    [SPARK-31076][SQL][FOLLOWUP] Incapsulate date rebasing to `DaysWritable`
    
    ### What changes were proposed in this pull request?
    Move the code related to days rebasing from/to Julian calendar from `HiveInspectors` to new class `DaysWritable`.
    
    ### Why are the changes needed?
    To improve maintainability of the `HiveInspectors` trait which is already pretty complex.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    By `HiveOrcHadoopFsRelationSuite`.
    
    Closes #27890 from MaxGekk/replace-DateWritable-by-DaysWritable.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 57854c736c2ca495eb03962f61857e1600864e95)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../org/apache/spark/sql/hive/DaysWritable.scala   | 117 +++++++++++++++++++++
 .../org/apache/spark/sql/hive/HiveInspectors.scala |  58 +---------
 2 files changed, 120 insertions(+), 55 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala
new file mode 100644
index 0000000..53a0deb
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.{DataInput, DataOutput, IOException}
+import java.sql.Date
+import java.time.LocalDate
+import java.util.Calendar
+
+import org.apache.hadoop.hive.serde2.io.DateWritable
+import org.apache.hadoop.io.WritableUtils
+
+import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils}
+
+/**
+ * The class accepts/returns days in Gregorian calendar and rebase them
+ * via conversion to local date in Julian calendar for dates before 1582-10-15
+ * in read/write for backward compatibility with Spark 2.4 and earlier versions.
+ *
+ * @param gregorianDays The number of days since the epoch 1970-01-01 in
+ *                      Gregorian calendar.
+ * @param julianDays The number of days since the epoch 1970-01-01 in
+ *                   Julian calendar.
+ */
+private[hive] class DaysWritable(
+    var gregorianDays: Int,
+    var julianDays: Int)
+  extends DateWritable {
+
+  def this(gregorianDays: Int) =
+    this(gregorianDays, DaysWritable.rebaseGregorianToJulianDays(gregorianDays))
+  def this(dateWritable: DateWritable) = {
+    this(
+      gregorianDays = dateWritable match {
+        case daysWritable: DaysWritable => daysWritable.gregorianDays
+        case dateWritable: DateWritable =>
+        DaysWritable.rebaseJulianToGregorianDays(dateWritable.getDays)
+      },
+      julianDays = dateWritable.getDays)
+  }
+
+  override def getDays: Int = julianDays
+  override def get(): Date = new Date(DateWritable.daysToMillis(julianDays))
+
+  @throws[IOException]
+  override def write(out: DataOutput): Unit = {
+    WritableUtils.writeVInt(out, julianDays)
+  }
+
+  @throws[IOException]
+  override def readFields(in: DataInput): Unit = {
+    julianDays = WritableUtils.readVInt(in)
+    gregorianDays = DaysWritable.rebaseJulianToGregorianDays(julianDays)
+  }
+}
+
+private[hive] object DaysWritable {
+  // Rebasing days since the epoch to store the same number of days
+  // as by Spark 2.4 and earlier versions. Spark 3.0 switched to
+  // Proleptic Gregorian calendar (see SPARK-26651), and as a consequence of that,
+  // this affects dates before 1582-10-15. Spark 2.4 and earlier versions use
+  // Julian calendar for dates before 1582-10-15. So, the same local date may
+  // be mapped to different number of days since the epoch in different calendars.
+  // For example:
+  // Proleptic Gregorian calendar: 1582-01-01 -> -141714
+  // Julian calendar: 1582-01-01 -> -141704
+  // The code below converts -141714 to -141704.
+  def rebaseGregorianToJulianDays(daysSinceEpoch: Int): Int = {
+    if (daysSinceEpoch < DateTimeUtils.GREGORIAN_CUTOVER_DAY) {
+      val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY)
+      val utcCal = new Calendar.Builder()
+        .setCalendarType("gregory")
+        .setTimeZone(DateTimeUtils.TimeZoneUTC)
+        .setInstant(millis)
+        .build()
+      val localDate = LocalDate.of(
+        utcCal.get(Calendar.YEAR),
+        utcCal.get(Calendar.MONTH) + 1,
+        utcCal.get(Calendar.DAY_OF_MONTH))
+      Math.toIntExact(localDate.toEpochDay)
+    } else {
+      daysSinceEpoch
+    }
+  }
+
+  def rebaseJulianToGregorianDays(daysSinceEpoch: Int): Int = {
+    if (daysSinceEpoch < JULIAN_CUTOVER_DAY) {
+      val localDate = LocalDate.ofEpochDay(daysSinceEpoch)
+      val utcCal = new Calendar.Builder()
+        .setCalendarType("gregory")
+        .setTimeZone(DateTimeUtils.TimeZoneUTC)
+        .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
+        .build()
+      Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY))
+    } else {
+      daysSinceEpoch
+    }
+  }
+
+  final val JULIAN_CUTOVER_DAY =
+    rebaseGregorianToJulianDays(DateTimeUtils.GREGORIAN_CUTOVER_DAY.toInt)
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index e217c52..e3e9a31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.sql.hive
 
 import java.lang.reflect.{ParameterizedType, Type, WildcardType}
-import java.time.LocalDate
-import java.util.Calendar
 
 import scala.collection.JavaConverters._
 
@@ -182,33 +180,6 @@ import org.apache.spark.unsafe.types.UTF8String
  */
 private[hive] trait HiveInspectors {
 
-  private final val JULIAN_CUTOVER_DAY =
-    rebaseGregorianToJulianDays(DateTimeUtils.GREGORIAN_CUTOVER_DAY.toInt)
-
-  private def rebaseJulianToGregorianDays(daysSinceEpoch: Int): Int = {
-    val localDate = LocalDate.ofEpochDay(daysSinceEpoch)
-    val utcCal = new Calendar.Builder()
-      .setCalendarType("gregory")
-      .setTimeZone(DateTimeUtils.TimeZoneUTC)
-      .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
-      .build()
-    Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY))
-  }
-
-  private def rebaseGregorianToJulianDays(daysSinceEpoch: Int): Int = {
-    val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY)
-    val utcCal = new Calendar.Builder()
-      .setCalendarType("gregory")
-      .setTimeZone(DateTimeUtils.TimeZoneUTC)
-      .setInstant(millis)
-      .build()
-    val localDate = LocalDate.of(
-      utcCal.get(Calendar.YEAR),
-      utcCal.get(Calendar.MONTH) + 1,
-      utcCal.get(Calendar.DAY_OF_MONTH))
-    Math.toIntExact(localDate.toEpochDay)
-  }
-
   def javaTypeToDataType(clz: Type): DataType = clz match {
     // writable
     case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
@@ -646,14 +617,7 @@ private[hive] trait HiveInspectors {
         case x: DateObjectInspector if x.preferWritable() =>
           data: Any => {
             if (data != null) {
-              // Rebasing written days via conversion to local dates.
-              // See the comment for `getDateWritable()`.
-              val daysSinceEpoch = x.getPrimitiveWritableObject(data).getDays
-              if (daysSinceEpoch < JULIAN_CUTOVER_DAY) {
-                rebaseJulianToGregorianDays(daysSinceEpoch)
-              } else {
-                daysSinceEpoch
-              }
+              new DaysWritable(x.getPrimitiveWritableObject(data)).gregorianDays
             } else {
               null
             }
@@ -1045,27 +1009,11 @@ private[hive] trait HiveInspectors {
       new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]])
     }
 
-  private def getDateWritable(value: Any): hiveIo.DateWritable =
+  private def getDateWritable(value: Any): DaysWritable =
     if (value == null) {
       null
     } else {
-      // Rebasing days since the epoch to store the same number of days
-      // as by Spark 2.4 and earlier versions. Spark 3.0 switched to
-      // Proleptic Gregorian calendar (see SPARK-26651), and as a consequence of that,
-      // this affects dates before 1582-10-15. Spark 2.4 and earlier versions use
-      // Julian calendar for dates before 1582-10-15. So, the same local date may
-      // be mapped to different number of days since the epoch in different calendars.
-      // For example:
-      // Proleptic Gregorian calendar: 1582-01-01 -> -141714
-      // Julian calendar: 1582-01-01 -> -141704
-      // The code below converts -141714 to -141704.
-      val daysSinceEpoch = value.asInstanceOf[Int]
-      val rebasedDays = if (daysSinceEpoch < DateTimeUtils.GREGORIAN_CUTOVER_DAY) {
-        rebaseGregorianToJulianDays(daysSinceEpoch)
-      } else {
-        daysSinceEpoch
-      }
-      new hiveIo.DateWritable(rebasedDays)
+      new DaysWritable(value.asInstanceOf[Int])
     }
 
   private def getTimestampWritable(value: Any): hiveIo.TimestampWritable =


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org