You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/13 00:00:51 UTC
spark git commit: [SPARK-23230][SQL] When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error

Repository: spark
Updated Branches:
  refs/heads/master 6cb59708c -> 4104b68e9


[SPARK-23230][SQL] When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error

When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error.
We should take the default type of textfile and sequencefile both as org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.

```
set hive.default.fileformat=orc;
create table tbl( i string ) stored as textfile;
desc formatted tbl;

Serde Library org.apache.hadoop.hive.ql.io.orc.OrcSerde
InputFormat  org.apache.hadoop.mapred.TextInputFormat
OutputFormat  org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
```

Author: sychen <sy...@ctrip.com>

Closes #20406 from cxzl25/default_serde.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4104b68e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4104b68e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4104b68e

Branch: refs/heads/master
Commit: 4104b68e958cd13975567a96541dac7cccd8195c
Parents: 6cb5970
Author: sychen <sy...@ctrip.com>
Authored: Mon Feb 12 16:00:47 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Mon Feb 12 16:00:47 2018 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/internal/HiveSerDe.scala    |  6 ++++--
 .../sql/hive/execution/HiveSerDeSuite.scala      | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4104b68e/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index dac4636..eca612f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -31,7 +31,8 @@ object HiveSerDe {
     "sequencefile" ->
       HiveSerDe(
         inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
-        outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat")),
+        outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"),
+        serde = Option("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
 
     "rcfile" ->
       HiveSerDe(
@@ -54,7 +55,8 @@ object HiveSerDe {
     "textfile" ->
       HiveSerDe(
         inputFormat = Option("org.apache.hadoop.mapred.TextInputFormat"),
-        outputFormat = Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
+        outputFormat = Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"),
+        serde = Option("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
 
     "avro" ->
       HiveSerDe(

http://git-wip-us.apache.org/repos/asf/spark/blob/4104b68e/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index 1c9f001..d7752e9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -100,6 +100,25 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
       assert(output == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
       assert(serde == Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
     }
+
+    withSQLConf("hive.default.fileformat" -> "orc") {
+      val (desc, exists) = extractTableDesc(
+        "CREATE TABLE IF NOT EXISTS fileformat_test (id int) STORED AS textfile")
+      assert(exists)
+      assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
+      assert(desc.storage.outputFormat ==
+        Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
+      assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    }
+
+    withSQLConf("hive.default.fileformat" -> "orc") {
+      val (desc, exists) = extractTableDesc(
+        "CREATE TABLE IF NOT EXISTS fileformat_test (id int) STORED AS sequencefile")
+      assert(exists)
+      assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.SequenceFileInputFormat"))
+      assert(desc.storage.outputFormat == Some("org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+      assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    }
   }
 
   test("create hive serde table with new syntax - basic") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org