You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/08/12 04:36:47 UTC

[spark] branch branch-3.0 updated: [SPARK-32594][SQL] Fix serialization of dates inserted to Hive tables

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new e7d45f8  [SPARK-32594][SQL] Fix serialization of dates inserted to Hive tables
e7d45f8 is described below

commit e7d45f846ab50247ac8ad01a86e0dedb0fefe8b4
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Aug 12 13:32:16 2020 +0900

    [SPARK-32594][SQL] Fix serialization of dates inserted to Hive tables
    
    ### What changes were proposed in this pull request?
    Fix `DaysWritable` by overriding parent's method `def get(doesTimeMatter: Boolean): Date` from `DateWritable` instead of `Date get()` because the former one uses the first one. The bug occurs because `HiveOutputWriter.write()` call `def get(doesTimeMatter: Boolean): Date` transitively with default implementation from the parent class  `DateWritable` which doesn't respect date rebases and uses not initialized `daysSinceEpoch` (0 which `1970-01-01`).
    
    ### Why are the changes needed?
    The changes fix the bug:
    ```sql
    spark-sql> CREATE TABLE table1 (d date);
    spark-sql> INSERT INTO table1 VALUES (date '2020-08-11');
    spark-sql> SELECT * FROM table1;
    1970-01-01
    ```
    The expected result of the last SQL statement must be **2020-08-11** but got **1970-01-01**.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. After the fix, `INSERT` work correctly:
    ```sql
    spark-sql> SELECT * FROM table1;
    2020-08-11
    ```
    
    ### How was this patch tested?
    Add new test to `HiveSerDeReadWriteSuite`
    
    Closes #29409 from MaxGekk/insert-date-into-hive-table.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
    (cherry picked from commit 0477d234672d6b02f906428dcf2536f26fb4fd04)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../org/apache/spark/sql/execution/datasources/DaysWritable.scala | 4 +++-
 .../apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala
index 486f678..56c176e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala
@@ -54,7 +54,9 @@ class DaysWritable(
   }
 
   override def getDays: Int = julianDays
-  override def get(): Date = new Date(DateWritable.daysToMillis(julianDays))
+  override def get(doesTimeMatter: Boolean): Date = {
+    new Date(DateWritable.daysToMillis(julianDays, doesTimeMatter))
+  }
 
   override def set(d: Int): Unit = {
     gregorianDays = d
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index f8ba7bf..10a0cdb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -184,4 +184,12 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       checkComplexTypes(fileFormat)
     }
   }
+
+  test("SPARK-32594: insert dates to a Hive table") {
+    withTable("table1") {
+      sql("CREATE TABLE table1 (d date)")
+      sql("INSERT INTO table1 VALUES (date '2020-08-11')")
+      checkAnswer(spark.table("table1"), Row(Date.valueOf("2020-08-11")))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org