You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2016/08/25 12:38:48 UTC
spark git commit: [SPARK-17167][2.0][SQL] Issue Exceptions when
Analyze Table on In-Memory Cataloged Tables
Repository: spark
Updated Branches:
refs/heads/branch-2.0 48ecf3d00 -> 2b32a442d
[SPARK-17167][2.0][SQL] Issue Exceptions when Analyze Table on In-Memory Cataloged Tables
### What changes were proposed in this pull request?
Currently, `Analyze Table` is only used for Hive-serde tables. We should issue exceptions in all the other cases. When the tables are data source tables, we issued an exception. However, when tables are In-Memory Cataloged tables, we do not issue any exception.
This PR is to issue an exception when the tables are in-memory cataloged. For example,
```SQL
CREATE TABLE tbl(a INT, b INT) USING parquet
```
`tbl` is a `SimpleCatalogRelation` when the hive support is not enabled.
### How was this patch tested?
Added two test cases. One of them is just to improve the test coverage when the analyzed table is data source tables.
Author: gatorsmile <ga...@gmail.com>
Closes #14781 from gatorsmile/analyzeInMemoryTable2.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b32a442
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b32a442
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b32a442
Branch: refs/heads/branch-2.0
Commit: 2b32a442dfbc8494c30dcb2f6869c9dc7f258ada
Parents: 48ecf3d
Author: gatorsmile <ga...@gmail.com>
Authored: Thu Aug 25 14:38:41 2016 +0200
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Thu Aug 25 14:38:41 2016 +0200
----------------------------------------------------------------------
.../sql/execution/command/AnalyzeTableCommand.scala | 4 ++--
.../spark/sql/execution/command/DDLSuite.scala | 11 +++++++++++
.../spark/sql/hive/execution/HiveDDLSuite.scala | 15 +++++++++++++++
3 files changed, 28 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index a469d4d..9509b66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, SimpleCatalogRelation}
/**
@@ -41,7 +41,7 @@ case class AnalyzeTableCommand(tableName: String) extends RunnableCommand {
val relation = EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdent))
relation match {
- case relation: CatalogRelation =>
+ case relation: CatalogRelation if !relation.isInstanceOf[SimpleCatalogRelation] =>
val catalogTable: CatalogTable = relation.catalogTable
// This method is mainly based on
// org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index d70cae7..9565471 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -395,6 +395,17 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
assert(catalog.getTableMetadata(tableIdent1) === expectedTable)
}
+ test("Analyze in-memory cataloged tables(SimpleCatalogRelation)") {
+ withTable("tbl") {
+ sql("CREATE TABLE tbl(a INT, b INT) USING parquet")
+ val e = intercept[AnalysisException] {
+ sql("ANALYZE TABLE tbl COMPUTE STATISTICS")
+ }.getMessage
+ assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " +
+ "but 'tbl' is a SimpleCatalogRelation"))
+ }
+ }
+
test("create table using") {
val catalog = spark.sessionState.catalog
withTable("tbl") {
http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 9228242..df6cd56 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -594,6 +594,21 @@ class HiveDDLSuite
}
}
+ test("Analyze data source tables(LogicalRelation)") {
+ withTable("t1") {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ spark.range(1).write.format("parquet").save(path)
+ sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
+ val e = intercept[AnalysisException] {
+ sql("ANALYZE TABLE t1 COMPUTE STATISTICS")
+ }.getMessage
+ assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " +
+ "but 't1' is a LogicalRelation"))
+ }
+ }
+ }
+
test("desc table for data source table") {
withTable("tab1") {
val tabName = "tab1"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org