You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/13 00:00:51 UTC
spark git commit: [SPARK-23230][SQL] When hive.default.fileformat is
other kinds of file types, create textfile table cause a serde error
Repository: spark
Updated Branches:
refs/heads/master 6cb59708c -> 4104b68e9
[SPARK-23230][SQL] When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error
When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error.
We should take the default type of textfile and sequencefile both as org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.
```
set hive.default.fileformat=orc;
create table tbl( i string ) stored as textfile;
desc formatted tbl;
Serde Library org.apache.hadoop.hive.ql.io.orc.OrcSerde
InputFormat org.apache.hadoop.mapred.TextInputFormat
OutputFormat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
```
Author: sychen <sy...@ctrip.com>
Closes #20406 from cxzl25/default_serde.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4104b68e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4104b68e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4104b68e
Branch: refs/heads/master
Commit: 4104b68e958cd13975567a96541dac7cccd8195c
Parents: 6cb5970
Author: sychen <sy...@ctrip.com>
Authored: Mon Feb 12 16:00:47 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Mon Feb 12 16:00:47 2018 -0800
----------------------------------------------------------------------
.../apache/spark/sql/internal/HiveSerDe.scala | 6 ++++--
.../sql/hive/execution/HiveSerDeSuite.scala | 19 +++++++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4104b68e/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index dac4636..eca612f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -31,7 +31,8 @@ object HiveSerDe {
"sequencefile" ->
HiveSerDe(
inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
- outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat")),
+ outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"),
+ serde = Option("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
"rcfile" ->
HiveSerDe(
@@ -54,7 +55,8 @@ object HiveSerDe {
"textfile" ->
HiveSerDe(
inputFormat = Option("org.apache.hadoop.mapred.TextInputFormat"),
- outputFormat = Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
+ outputFormat = Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"),
+ serde = Option("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
"avro" ->
HiveSerDe(
http://git-wip-us.apache.org/repos/asf/spark/blob/4104b68e/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index 1c9f001..d7752e9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -100,6 +100,25 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
assert(output == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
assert(serde == Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
}
+
+ withSQLConf("hive.default.fileformat" -> "orc") {
+ val (desc, exists) = extractTableDesc(
+ "CREATE TABLE IF NOT EXISTS fileformat_test (id int) STORED AS textfile")
+ assert(exists)
+ assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
+ assert(desc.storage.outputFormat ==
+ Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
+ assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+ }
+
+ withSQLConf("hive.default.fileformat" -> "orc") {
+ val (desc, exists) = extractTableDesc(
+ "CREATE TABLE IF NOT EXISTS fileformat_test (id int) STORED AS sequencefile")
+ assert(exists)
+ assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.SequenceFileInputFormat"))
+ assert(desc.storage.outputFormat == Some("org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+ assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+ }
}
test("create hive serde table with new syntax - basic") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org