You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/10/14 21:09:46 UTC
spark git commit: Revert "[SPARK-17620][SQL] Determine Serde by
hive.default.fileformat when Creating Hive Serde Tables"
Repository: spark
Updated Branches:
refs/heads/master 7ab86244e -> 522dd0d0e
Revert "[SPARK-17620][SQL] Determine Serde by hive.default.fileformat when Creating Hive Serde Tables"
This reverts commit 7ab86244e30ca81eb4fa40ea77b4c2b8881cbab2.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/522dd0d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/522dd0d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/522dd0d0
Branch: refs/heads/master
Commit: 522dd0d0e5af83e45a3c3526c191aa4b8bcaeeeb
Parents: 7ab8624
Author: Yin Huai <yh...@databricks.com>
Authored: Fri Oct 14 14:09:35 2016 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Fri Oct 14 14:09:35 2016 -0700
----------------------------------------------------------------------
.../spark/sql/execution/SparkSqlParser.scala | 4 +-
.../spark/sql/hive/HiveDDLCommandSuite.scala | 26 +------------
.../sql/hive/execution/SQLQuerySuite.scala | 39 ++------------------
3 files changed, 9 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/522dd0d0/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8c68d1e..be2eddb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -1010,7 +1010,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
.orElse(Some("org.apache.hadoop.mapred.TextInputFormat")),
outputFormat = defaultHiveSerde.flatMap(_.outputFormat)
.orElse(Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
- serde = defaultHiveSerde.flatMap(_.serde),
+ // Note: Keep this unspecified because we use the presence of the serde to decide
+ // whether to convert a table created by CTAS to a datasource table.
+ serde = None,
compressed = false,
properties = Map())
}
http://git-wip-us.apache.org/repos/asf/spark/blob/522dd0d0/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 8133749..9ce3338 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -30,12 +30,10 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical.{Generate, ScriptTransformation}
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.CreateTable
-import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.types.StructType
-class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingleton {
+class HiveDDLCommandSuite extends PlanTest {
val parser = TestHive.sessionState.sqlParser
private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
@@ -558,24 +556,4 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
assert(partition2.get.apply("c") == "1" && partition2.get.apply("d") == "2")
}
- test("Test the default fileformat for Hive-serde tables") {
- withSQLConf("hive.default.fileformat" -> "orc") {
- val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
- assert(exists)
- assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
- assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
- assert(desc.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
- }
-
- withSQLConf("hive.default.fileformat" -> "parquet") {
- val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
- assert(exists)
- val input = desc.storage.inputFormat
- val output = desc.storage.outputFormat
- val serde = desc.storage.serde
- assert(input == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
- assert(output == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
- assert(serde == Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
- }
- }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/522dd0d0/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 5798f47..6f2a166 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -492,7 +492,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
def checkRelation(
tableName: String,
- isDataSourceTable: Boolean,
+ isDataSourceParquet: Boolean,
format: String,
userSpecifiedLocation: Option[String] = None): Unit = {
val relation = EliminateSubqueryAliases(
@@ -501,7 +501,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
relation match {
case LogicalRelation(r: HadoopFsRelation, _, _) =>
- if (!isDataSourceTable) {
+ if (!isDataSourceParquet) {
fail(
s"${classOf[MetastoreRelation].getCanonicalName} is expected, but found " +
s"${HadoopFsRelation.getClass.getCanonicalName}.")
@@ -514,7 +514,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
assert(catalogTable.provider.get === format)
case r: MetastoreRelation =>
- if (isDataSourceTable) {
+ if (isDataSourceParquet) {
fail(
s"${HadoopFsRelation.getClass.getCanonicalName} is expected, but found " +
s"${classOf[MetastoreRelation].getCanonicalName}.")
@@ -524,15 +524,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
assert(r.catalogTable.storage.locationUri.get === location)
case None => // OK.
}
- // Also make sure that the format and serde are as desired.
+ // Also make sure that the format is the desired format.
assert(catalogTable.storage.inputFormat.get.toLowerCase.contains(format))
- assert(catalogTable.storage.outputFormat.get.toLowerCase.contains(format))
- val serde = catalogTable.storage.serde.get
- format match {
- case "sequence" | "text" => assert(serde.contains("LazySimpleSerDe"))
- case "rcfile" => assert(serde.contains("LazyBinaryColumnarSerDe"))
- case _ => assert(serde.toLowerCase.contains(format))
- }
}
// When a user-specified location is defined, the table type needs to be EXTERNAL.
@@ -594,30 +587,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}
- test("CTAS with default fileformat") {
- val table = "ctas1"
- val ctas = s"CREATE TABLE IF NOT EXISTS $table SELECT key k, value FROM src"
- withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
- withSQLConf("hive.default.fileformat" -> "textfile") {
- withTable(table) {
- sql(ctas)
- // We should use parquet here as that is the default datasource fileformat. The default
- // datasource file format is controlled by `spark.sql.sources.default` configuration.
- // This testcase verifies that setting `hive.default.fileformat` has no impact on
- // the target table's fileformat in case of CTAS.
- assert(sessionState.conf.defaultDataSourceName === "parquet")
- checkRelation(table, isDataSourceTable = true, "parquet")
- }
- }
- withSQLConf("spark.sql.sources.default" -> "orc") {
- withTable(table) {
- sql(ctas)
- checkRelation(table, isDataSourceTable = true, "orc")
- }
- }
- }
- }
-
test("CTAS without serde with location") {
withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
withTempDir { dir =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org