You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/11/08 05:34:11 UTC
[spark] branch master updated: [SPARK-37214][SQL] Fail query
analysis earlier with invalid identifiers
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8ab9d63 [SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers
8ab9d63 is described below
commit 8ab9d6327d7db20a4257f9fe6d0b17919576be5e
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Mon Nov 8 13:33:30 2021 +0800
[SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers
### What changes were proposed in this pull request?
This is a followup of #31427 , which introduced two issues:
1. When we lookup `spark_catalog.t`, we failed earlier with `The namespace in session catalog must have exactly one name part` before that PR, now we fail very late in `CheckAnalysis` with `NoSuchTableException`
2. The error message is a bit confusing now. We report `Table t not found` even if table `t` exists.
This PR fixes the 2 issues.
### Why are the changes needed?
save analysis time and improve error message
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
updated test
Closes #34490 from cloud-fan/table.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../sql/connector/catalog/LookupCatalog.scala | 4 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 10 +---
.../catalyst/analysis/ResolveSessionCatalog.scala | 2 +-
.../datasources/v2/V2SessionCatalog.scala | 4 +-
.../spark/sql/connector/DataSourceV2SQLSuite.scala | 66 +++++-----------------
.../sql/execution/command/DDLParserSuite.scala | 3 +
.../spark/sql/execution/command/DDLSuite.scala | 3 +-
7 files changed, 27 insertions(+), 65 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
index 0635859..0362caf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -191,8 +191,8 @@ private[sql] trait LookupCatalog extends Logging {
} else {
ident.namespace match {
case Array(db) => FunctionIdentifier(ident.name, Some(db))
- case _ =>
- throw QueryCompilationErrors.unsupportedFunctionNameError(ident.toString)
+ case other =>
+ throw QueryCompilationErrors.requiresSinglePartNamespaceError(other)
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 527a2b9..b7f4cce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -530,10 +530,6 @@ object QueryCompilationErrors {
"SHOW VIEWS, only SessionCatalog supports this command.")
}
- def unsupportedFunctionNameError(quoted: String): Throwable = {
- new AnalysisException(s"Unsupported function name '$quoted'")
- }
-
def sqlOnlySupportedWithV1TablesError(sql: String): Throwable = {
new AnalysisException(s"$sql is only supported with v1 tables.")
}
@@ -861,9 +857,9 @@ object QueryCompilationErrors {
new TableAlreadyExistsException(ident)
}
- def requiresSinglePartNamespaceError(ident: Identifier): Throwable = {
- new NoSuchTableException(
- s"V2 session catalog requires a single-part namespace: ${ident.quoted}")
+ def requiresSinglePartNamespaceError(ns: Seq[String]): Throwable = {
+ new AnalysisException(CatalogManager.SESSION_CATALOG_NAME +
+ " requires a single-part namespace, but got " + ns.mkString("[", ", ", "]"))
}
def namespaceAlreadyExistsError(namespace: Array[String]): Throwable = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index f211054..e5be7f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -430,7 +430,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
className, resources, ignoreIfExists, replace) =>
if (isSessionCatalog(catalog)) {
val database = if (nameParts.length > 2) {
- throw QueryCompilationErrors.unsupportedFunctionNameError(nameParts.quoted)
+ throw QueryCompilationErrors.requiresSinglePartNamespaceError(nameParts)
} else if (nameParts.length == 2) {
Some(nameParts.head)
} else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 1202498..5bfeac9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -188,8 +188,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
ident.namespace match {
case Array(db) =>
TableIdentifier(ident.name, Some(db))
- case _ =>
- throw QueryCompilationErrors.requiresSinglePartNamespaceError(ident)
+ case other =>
+ throw QueryCompilationErrors.requiresSinglePartNamespaceError(other)
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index d2d46d6..f03792f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2184,7 +2184,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("DESCRIBE FUNCTION default.ns1.ns2.fun")
}
- assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("SHOW FUNCTIONS not valid v1 namespace") {
@@ -2205,7 +2205,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("DROP FUNCTION default.ns1.ns2.fun")
}
- assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("CREATE FUNCTION: only support session catalog") {
@@ -2217,7 +2217,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("CREATE FUNCTION default.ns1.ns2.fun as 'f'")
}
- assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("REFRESH FUNCTION: only support session catalog") {
@@ -2229,8 +2229,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("REFRESH FUNCTION default.ns1.ns2.fun")
}
- assert(e1.message.contains(
- "Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("global temp view should not be masked by v2 catalog") {
@@ -2300,26 +2299,7 @@ class DataSourceV2SQLSuite
def verify(sql: String): Unit = {
val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"Table or view not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
- }
-
- def verifyView(sql: String): Unit = {
- val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"View not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
- }
-
- def verifyTable(sql: String): Unit = {
- val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"Table not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
- }
-
- def verifyGeneric(sql: String): Unit = {
- val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
+ assert(e.getMessage.contains("requires a single-part namespace"))
}
verify(s"select * from $t")
@@ -2327,16 +2307,16 @@ class DataSourceV2SQLSuite
verify(s"REFRESH TABLE $t")
verify(s"DESCRIBE $t i")
verify(s"DROP TABLE $t")
- verifyView(s"DROP VIEW $t")
- verifyGeneric(s"ANALYZE TABLE $t COMPUTE STATISTICS")
- verifyGeneric(s"ANALYZE TABLE $t COMPUTE STATISTICS FOR ALL COLUMNS")
- verifyTable(s"MSCK REPAIR TABLE $t")
- verifyTable(s"LOAD DATA INPATH 'filepath' INTO TABLE $t")
- verifyGeneric(s"SHOW CREATE TABLE $t")
- verifyGeneric(s"SHOW CREATE TABLE $t AS SERDE")
- verifyGeneric(s"CACHE TABLE $t")
- verifyGeneric(s"UNCACHE TABLE $t")
- verifyGeneric(s"TRUNCATE TABLE $t")
+ verify(s"DROP VIEW $t")
+ verify(s"ANALYZE TABLE $t COMPUTE STATISTICS")
+ verify(s"ANALYZE TABLE $t COMPUTE STATISTICS FOR ALL COLUMNS")
+ verify(s"MSCK REPAIR TABLE $t")
+ verify(s"LOAD DATA INPATH 'filepath' INTO TABLE $t")
+ verify(s"SHOW CREATE TABLE $t")
+ verify(s"SHOW CREATE TABLE $t AS SERDE")
+ verify(s"CACHE TABLE $t")
+ verify(s"UNCACHE TABLE $t")
+ verify(s"TRUNCATE TABLE $t")
verify(s"SHOW COLUMNS FROM $t")
}
}
@@ -2476,22 +2456,6 @@ class DataSourceV2SQLSuite
.head().getString(1) === expectedComment)
}
- test("SPARK-30799: temp view name can't contain catalog name") {
- val sessionCatalogName = CatalogManager.SESSION_CATALOG_NAME
- withTempView("v") {
- spark.range(10).createTempView("v")
- val e1 = intercept[AnalysisException](
- sql(s"CACHE TABLE $sessionCatalogName.v")
- )
- assert(e1.message.contains(
- "Table or view not found: spark_catalog.v"))
- }
- val e2 = intercept[AnalysisException] {
- sql(s"CREATE TEMP VIEW $sessionCatalogName.v AS SELECT 1")
- }
- assert(e2.message.contains("It is not allowed to add database prefix"))
- }
-
test("SPARK-31015: star expression should work for qualified column names for v2 tables") {
val t = "testcat.ns1.ns2.tbl"
withTable(t) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index f99a5ab..ca6a30b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -359,6 +359,9 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
false,
LocalTempView)
comparePlans(parsed2, expected2)
+
+ val v3 = "CREATE TEMPORARY VIEW a.b AS SELECT 1"
+ intercept(v3, "It is not allowed to add database prefix")
}
test("create temp view - full") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index d01cf4e..b86250f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1718,8 +1718,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
val message = intercept[AnalysisException] {
sql("SHOW COLUMNS IN tbl FROM a.b.c")
}.getMessage
- assert(message.contains(
- "Table or view not found: a.b.c.tbl"))
+ assert(message.contains("requires a single-part namespace"))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org