You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/09/18 07:37:36 UTC
spark git commit: [SPARK-17518][SQL] Block Users to Specify the
Internal Data Source Provider Hive
Repository: spark
Updated Branches:
refs/heads/master 8faa5217b -> 3a3c9ffbd
[SPARK-17518][SQL] Block Users to Specify the Internal Data Source Provider Hive
### What changes were proposed in this pull request?
In Spark 2.1, we introduced a new internal provider `hive` for telling Hive serde tables from data source tables. This PR is to block users to specify this in `DataFrameWriter` and SQL APIs.
### How was this patch tested?
Added a test case
Author: gatorsmile <ga...@gmail.com>
Closes #15073 from gatorsmile/formatHive.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a3c9ffb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a3c9ffb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a3c9ffb
Branch: refs/heads/master
Commit: 3a3c9ffbd282244407e9437c2b02ae7e062dd183
Parents: 8faa521
Author: gatorsmile <ga...@gmail.com>
Authored: Sun Sep 18 15:37:15 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sun Sep 18 15:37:15 2016 +0800
----------------------------------------------------------------------
.../org/apache/spark/sql/DataFrameWriter.scala | 3 ++
.../spark/sql/execution/SparkSqlParser.scala | 5 +-
.../apache/spark/sql/internal/CatalogImpl.scala | 2 +-
.../spark/sql/internal/CatalogSuite.scala | 7 +++
.../apache/spark/sql/hive/HiveStrategies.scala | 2 +-
.../sql/hive/MetastoreDataSourcesSuite.scala | 51 ++++++++++++++++++++
6 files changed, 67 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e137f07..64d3422 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -357,6 +357,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
}
private def saveAsTable(tableIdent: TableIdentifier): Unit = {
+ if (source.toLowerCase == "hive") {
+ throw new AnalysisException("Cannot create hive serde table with saveAsTable API")
+ }
val tableExists = df.sparkSession.sessionState.catalog.tableExists(tableIdent)
http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7ba1a9f..5359ced 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.antlr.v4.runtime.tree.TerminalNode
-import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.{AnalysisException, SaveMode}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.parser._
@@ -316,6 +316,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
}
val options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
val provider = ctx.tableProvider.qualifiedName.getText
+ if (provider.toLowerCase == "hive") {
+ throw new AnalysisException("Cannot create hive serde table with CREATE TABLE USING")
+ }
val schema = Option(ctx.colTypeList()).map(createStructType)
val partitionColumnNames =
Option(ctx.partitionColumnNames)
http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 1f87f0e..78ad710 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -258,7 +258,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
source: String,
schema: StructType,
options: Map[String, String]): DataFrame = {
- if (source == "hive") {
+ if (source.toLowerCase == "hive") {
throw new AnalysisException("Cannot create hive serde table with createExternalTable API.")
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index b221eed..549fd63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -322,6 +322,13 @@ class CatalogSuite
assert(e2.message == "Cannot create a file-based external data source table without path")
}
+ test("createExternalTable should fail if provider is hive") {
+ val e = intercept[AnalysisException] {
+ spark.catalog.createExternalTable("tbl", "HiVe", Map.empty[String, String])
+ }
+ assert(e.message.contains("Cannot create hive serde table with createExternalTable API"))
+ }
+
// TODO: add tests for the rest of them
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index fb11c84..9d29309 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -61,7 +61,7 @@ private[hive] trait HiveStrategies {
// `ErrorIfExists` mode, and `DataFrameWriter.saveAsTable` doesn't support hive serde
// tables yet.
if (mode == SaveMode.Append || mode == SaveMode.Overwrite) {
- throw new AnalysisException("" +
+ throw new AnalysisException(
"CTAS for hive serde tables does not support append or overwrite semantics.")
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 3466733..0f331ba 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.hive.HiveExternalCatalog._
import org.apache.spark.sql.hive.client.HiveClient
@@ -1151,6 +1152,56 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
}
}
+ test("save API - format hive") {
+ withTempDir { dir =>
+ val path = dir.getCanonicalPath
+ val e = intercept[ClassNotFoundException] {
+ spark.range(10).write.format("hive").mode(SaveMode.Ignore).save(path)
+ }.getMessage
+ assert(e.contains("Failed to find data source: hive"))
+ }
+ }
+
+ test("saveAsTable API - format hive") {
+ val tableName = "tab1"
+ withTable(tableName) {
+ val e = intercept[AnalysisException] {
+ spark.range(10).write.format("hive").mode(SaveMode.Overwrite).saveAsTable(tableName)
+ }.getMessage
+ assert(e.contains("Cannot create hive serde table with saveAsTable API"))
+ }
+ }
+
+ test("create a data source table using hive") {
+ val tableName = "tab1"
+ withTable (tableName) {
+ val e = intercept[AnalysisException] {
+ sql(
+ s"""
+ |CREATE TABLE $tableName
+ |(col1 int)
+ |USING hive
+ """.stripMargin)
+ }.getMessage
+ assert(e.contains("Cannot create hive serde table with CREATE TABLE USING"))
+ }
+ }
+
+ test("create a temp view using hive") {
+ val tableName = "tab1"
+ withTable (tableName) {
+ val e = intercept[ClassNotFoundException] {
+ sql(
+ s"""
+ |CREATE TEMPORARY VIEW $tableName
+ |(col1 int)
+ |USING hive
+ """.stripMargin)
+ }.getMessage
+ assert(e.contains("Failed to find data source: hive"))
+ }
+ }
+
test("saveAsTable - source and target are the same table") {
val tableName = "tab1"
withTable(tableName) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org