You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/06/20 16:15:38 UTC
spark git commit: [SPARK-21150][SQL] Persistent view stored in Hive
metastore should be case preserving
Repository: spark
Updated Branches:
refs/heads/master ef1622899 -> e862dc904
[SPARK-21150][SQL] Persistent view stored in Hive metastore should be case preserving
## What changes were proposed in this pull request?
This is a regression in Spark 2.2. In Spark 2.2, we introduced a new way to resolve persisted view: https://issues.apache.org/jira/browse/SPARK-18209 , but this makes the persisted view non case-preserving because we store the schema in hive metastore directly. We should follow data source table and store schema in table properties.
## How was this patch tested?
new regression test
Author: Wenchen Fan <we...@databricks.com>
Closes #18360 from cloud-fan/view.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e862dc90
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e862dc90
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e862dc90
Branch: refs/heads/master
Commit: e862dc904963cf7832bafc1d3d0ea9090bbddd81
Parents: ef16228
Author: Wenchen Fan <we...@databricks.com>
Authored: Tue Jun 20 09:15:33 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Tue Jun 20 09:15:33 2017 -0700
----------------------------------------------------------------------
.../spark/sql/execution/command/views.scala | 4 +-
.../spark/sql/execution/SQLViewSuite.scala | 10 +++
.../spark/sql/hive/HiveExternalCatalog.scala | 84 ++++++++++----------
3 files changed, 56 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e862dc90/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 1945d68..a6d56ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -159,7 +159,9 @@ case class CreateViewCommand(
checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
// Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...`
- catalog.alterTable(prepareTable(sparkSession, analyzedPlan))
+ // Nothing we need to retain from the old view, so just drop and create a new one
+ catalog.dropTable(viewIdent, ignoreIfNotExists = false, purge = false)
+ catalog.createTable(prepareTable(sparkSession, analyzedPlan), ignoreIfExists = false)
} else {
// Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already
// exists.
http://git-wip-us.apache.org/repos/asf/spark/blob/e862dc90/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index d32716c..6761f05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -669,4 +669,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
"positive."))
}
}
+
+ test("permanent view should be case-preserving") {
+ withView("v") {
+ sql("CREATE VIEW v AS SELECT 1 as aBc")
+ assert(spark.table("v").schema.head.name == "aBc")
+
+ sql("CREATE OR REPLACE VIEW v AS SELECT 2 as cBa")
+ assert(spark.table("v").schema.head.name == "cBa")
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/e862dc90/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 1945367..6e7c475 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -224,39 +224,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
throw new TableAlreadyExistsException(db = db, table = table)
}
- if (tableDefinition.tableType == VIEW) {
- client.createTable(tableDefinition, ignoreIfExists)
+ // Ideally we should not create a managed table with location, but Hive serde table can
+ // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
+ // to create the table directory and write out data before we create this table, to avoid
+ // exposing a partial written table.
+ val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
+ tableDefinition.storage.locationUri.isEmpty
+
+ val tableLocation = if (needDefaultTableLocation) {
+ Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
} else {
- // Ideally we should not create a managed table with location, but Hive serde table can
- // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
- // to create the table directory and write out data before we create this table, to avoid
- // exposing a partial written table.
- val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
- tableDefinition.storage.locationUri.isEmpty
-
- val tableLocation = if (needDefaultTableLocation) {
- Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
- } else {
- tableDefinition.storage.locationUri
- }
+ tableDefinition.storage.locationUri
+ }
- if (DDLUtils.isHiveTable(tableDefinition)) {
- val tableWithDataSourceProps = tableDefinition.copy(
- // We can't leave `locationUri` empty and count on Hive metastore to set a default table
- // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
- // table location for tables in default database, while we expect to use the location of
- // default database.
- storage = tableDefinition.storage.copy(locationUri = tableLocation),
- // Here we follow data source tables and put table metadata like table schema, partition
- // columns etc. in table properties, so that we can work around the Hive metastore issue
- // about not case preserving and make Hive serde table support mixed-case column names.
- properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
- client.createTable(tableWithDataSourceProps, ignoreIfExists)
- } else {
- createDataSourceTable(
- tableDefinition.withNewStorage(locationUri = tableLocation),
- ignoreIfExists)
- }
+ if (DDLUtils.isDatasourceTable(tableDefinition)) {
+ createDataSourceTable(
+ tableDefinition.withNewStorage(locationUri = tableLocation),
+ ignoreIfExists)
+ } else {
+ val tableWithDataSourceProps = tableDefinition.copy(
+ // We can't leave `locationUri` empty and count on Hive metastore to set a default table
+ // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
+ // table location for tables in default database, while we expect to use the location of
+ // default database.
+ storage = tableDefinition.storage.copy(locationUri = tableLocation),
+ // Here we follow data source tables and put table metadata like table schema, partition
+ // columns etc. in table properties, so that we can work around the Hive metastore issue
+ // about not case preserving and make Hive serde table and view support mixed-case column
+ // names.
+ properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
+ client.createTable(tableWithDataSourceProps, ignoreIfExists)
}
}
@@ -679,16 +676,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
var table = inputTable
- if (table.tableType != VIEW) {
- table.properties.get(DATASOURCE_PROVIDER) match {
- // No provider in table properties, which means this is a Hive serde table.
- case None =>
- table = restoreHiveSerdeTable(table)
-
- // This is a regular data source table.
- case Some(provider) =>
- table = restoreDataSourceTable(table, provider)
- }
+ table.properties.get(DATASOURCE_PROVIDER) match {
+ case None if table.tableType == VIEW =>
+ // If this is a view created by Spark 2.2 or higher versions, we should restore its schema
+ // from table properties.
+ if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
+ table = table.copy(schema = getSchemaFromTableProperties(table))
+ }
+
+ // No provider in table properties, which means this is a Hive serde table.
+ case None =>
+ table = restoreHiveSerdeTable(table)
+
+ // This is a regular data source table.
+ case Some(provider) =>
+ table = restoreDataSourceTable(table, provider)
}
// Restore Spark's statistics from information in Metastore.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org