You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/08/12 04:36:47 UTC
[spark] branch branch-3.0 updated: [SPARK-32594][SQL] Fix
serialization of dates inserted to Hive tables
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new e7d45f8 [SPARK-32594][SQL] Fix serialization of dates inserted to Hive tables
e7d45f8 is described below
commit e7d45f846ab50247ac8ad01a86e0dedb0fefe8b4
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Wed Aug 12 13:32:16 2020 +0900
[SPARK-32594][SQL] Fix serialization of dates inserted to Hive tables
### What changes were proposed in this pull request?
Fix `DaysWritable` by overriding parent's method `def get(doesTimeMatter: Boolean): Date` from `DateWritable` instead of `Date get()` because the former one uses the first one. The bug occurs because `HiveOutputWriter.write()` call `def get(doesTimeMatter: Boolean): Date` transitively with default implementation from the parent class `DateWritable` which doesn't respect date rebases and uses not initialized `daysSinceEpoch` (0 which `1970-01-01`).
### Why are the changes needed?
The changes fix the bug:
```sql
spark-sql> CREATE TABLE table1 (d date);
spark-sql> INSERT INTO table1 VALUES (date '2020-08-11');
spark-sql> SELECT * FROM table1;
1970-01-01
```
The expected result of the last SQL statement must be **2020-08-11** but got **1970-01-01**.
### Does this PR introduce _any_ user-facing change?
Yes. After the fix, `INSERT` work correctly:
```sql
spark-sql> SELECT * FROM table1;
2020-08-11
```
### How was this patch tested?
Add new test to `HiveSerDeReadWriteSuite`
Closes #29409 from MaxGekk/insert-date-into-hive-table.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
(cherry picked from commit 0477d234672d6b02f906428dcf2536f26fb4fd04)
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../org/apache/spark/sql/execution/datasources/DaysWritable.scala | 4 +++-
.../apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala | 8 ++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala
index 486f678..56c176e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritable.scala
@@ -54,7 +54,9 @@ class DaysWritable(
}
override def getDays: Int = julianDays
- override def get(): Date = new Date(DateWritable.daysToMillis(julianDays))
+ override def get(doesTimeMatter: Boolean): Date = {
+ new Date(DateWritable.daysToMillis(julianDays, doesTimeMatter))
+ }
override def set(d: Int): Unit = {
gregorianDays = d
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index f8ba7bf..10a0cdb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -184,4 +184,12 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
checkComplexTypes(fileFormat)
}
}
+
+ test("SPARK-32594: insert dates to a Hive table") {
+ withTable("table1") {
+ sql("CREATE TABLE table1 (d date)")
+ sql("INSERT INTO table1 VALUES (date '2020-08-11')")
+ checkAnswer(spark.table("table1"), Row(Date.valueOf("2020-08-11")))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org