You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by dongjoon-hyun <gi...@git.apache.org> on 2017/10/02 16:14:00 UTC
[GitHub] spark pull request #19382: [SPARK-22158][SQL] convertMetastoreOrc/Parquet sh...
Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/19382#discussion_r142184730
--- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala ---
@@ -1437,30 +1439,75 @@ class HiveDDLSuite
}
}
- test("create hive serde table with new syntax") {
+ test("create hive serde table with new syntax - orc") {
+ Seq("true", "false").foreach { value =>
+ withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
+ withTable("t", "t2", "t3") {
+ withTempPath { path =>
+ sql(
+ s"""
+ |CREATE TABLE t(id int) USING hive
+ |OPTIONS(fileFormat 'orc', compression 'Zlib')
+ |LOCATION '${path.toURI}'
+ """.stripMargin)
+ val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+ assert(DDLUtils.isHiveTable(table))
+ assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+ assert(table.storage.properties.get("compression") == Some("Zlib"))
+ assert(spark.table("t").collect().isEmpty)
+
+ sql("INSERT INTO t SELECT 1")
+ checkAnswer(spark.table("t"), Row(1))
+ // Check if this is compressed as ZLIB.
+ val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part"))
+ assert(maybeOrcFile.isDefined)
+ val orcFilePath = maybeOrcFile.get.toPath.toString
+ val expectedCompressionKind =
+ OrcFileOperator.getFileReader(orcFilePath).get.getCompression
+ assert("ZLIB" === expectedCompressionKind.name())
+
+ sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
+ val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
+ assert(DDLUtils.isHiveTable(table2))
+ assert(
+ table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+ checkAnswer(spark.table("t2"), Row(1, "a"))
+
+ sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
+ sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
+ checkAnswer(spark.table("t3"), Row(0, 1))
+ }
+ }
+ }
+ }
+ }
+
+ test("create hive serde table with new syntax - parquet") {
withTable("t", "t2", "t3") {
withTempPath { path =>
sql(
s"""
- |CREATE TABLE t(id int) USING hive
- |OPTIONS(fileFormat 'orc', compression 'Zlib')
- |LOCATION '${path.toURI}'
- """.stripMargin)
+ |CREATE TABLE t(id int) USING hive
+ |OPTIONS(fileFormat 'parquet', compression 'gzip')
--- End diff --
Sure!
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org