You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/11/08 05:52:58 UTC
[spark] branch branch-3.2 updated: [SPARK-37214][SQL] Fail query
analysis earlier with invalid identifiers
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new e55bab5 [SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers
e55bab5 is described below
commit e55bab5267b066fb78921ef6828924c32adbc637
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Mon Nov 8 13:33:30 2021 +0800
[SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers
This is a followup of #31427 , which introduced two issues:
1. When we lookup `spark_catalog.t`, we failed earlier with `The namespace in session catalog must have exactly one name part` before that PR, now we fail very late in `CheckAnalysis` with `NoSuchTableException`
2. The error message is a bit confusing now. We report `Table t not found` even if table `t` exists.
This PR fixes the 2 issues.
save analysis time and improve error message
no
updated test
Closes #34490 from cloud-fan/table.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 8ab9d6327d7db20a4257f9fe6d0b17919576be5e)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../sql/connector/catalog/LookupCatalog.scala | 4 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 10 +---
.../spark/sql/catalyst/parser/DDLParserSuite.scala | 3 +
.../catalyst/analysis/ResolveSessionCatalog.scala | 2 +-
.../datasources/v2/V2SessionCatalog.scala | 4 +-
.../spark/sql/connector/DataSourceV2SQLSuite.scala | 66 +++++-----------------
.../spark/sql/execution/command/DDLSuite.scala | 3 +-
7 files changed, 27 insertions(+), 65 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
index 0635859..0362caf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -191,8 +191,8 @@ private[sql] trait LookupCatalog extends Logging {
} else {
ident.namespace match {
case Array(db) => FunctionIdentifier(ident.name, Some(db))
- case _ =>
- throw QueryCompilationErrors.unsupportedFunctionNameError(ident.toString)
+ case other =>
+ throw QueryCompilationErrors.requiresSinglePartNamespaceError(other)
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index e7af006..7c2780a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -519,10 +519,6 @@ object QueryCompilationErrors {
"SHOW VIEWS, only SessionCatalog supports this command.")
}
- def unsupportedFunctionNameError(quoted: String): Throwable = {
- new AnalysisException(s"Unsupported function name '$quoted'")
- }
-
def sqlOnlySupportedWithV1TablesError(sql: String): Throwable = {
new AnalysisException(s"$sql is only supported with v1 tables.")
}
@@ -850,9 +846,9 @@ object QueryCompilationErrors {
new TableAlreadyExistsException(ident)
}
- def requiresSinglePartNamespaceError(ident: Identifier): Throwable = {
- new NoSuchTableException(
- s"V2 session catalog requires a single-part namespace: ${ident.quoted}")
+ def requiresSinglePartNamespaceError(ns: Seq[String]): Throwable = {
+ new AnalysisException(CatalogManager.SESSION_CATALOG_NAME +
+ " requires a single-part namespace, but got " + ns.mkString("[", ", ", "]"))
}
def namespaceAlreadyExistsError(namespace: Array[String]): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index a1d9f89..886c9a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2237,6 +2237,9 @@ class DDLParserSuite extends AnalysisTest {
false,
LocalTempView)
comparePlans(parsed2, expected2)
+
+ val v3 = "CREATE TEMPORARY VIEW a.b AS SELECT 1"
+ intercept(v3, "It is not allowed to add database prefix")
}
test("create view - full") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 80063cd..b73ccbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -456,7 +456,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
if (isTemp) {
// temp func doesn't belong to any catalog and we shouldn't resolve catalog in the name.
val database = if (nameParts.length > 2) {
- throw QueryCompilationErrors.unsupportedFunctionNameError(nameParts.quoted)
+ throw QueryCompilationErrors.requiresSinglePartNamespaceError(nameParts)
} else if (nameParts.length == 2) {
Some(nameParts.head)
} else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 33b8f22..f0db368 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -188,8 +188,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
ident.namespace match {
case Array(db) =>
TableIdentifier(ident.name, Some(db))
- case _ =>
- throw QueryCompilationErrors.requiresSinglePartNamespaceError(ident)
+ case other =>
+ throw QueryCompilationErrors.requiresSinglePartNamespaceError(other)
}
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index a326b82..d5c3631 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2185,7 +2185,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("DESCRIBE FUNCTION default.ns1.ns2.fun")
}
- assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("SHOW FUNCTIONS not valid v1 namespace") {
@@ -2206,7 +2206,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("DROP FUNCTION default.ns1.ns2.fun")
}
- assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("CREATE FUNCTION: only support session catalog") {
@@ -2218,7 +2218,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("CREATE FUNCTION default.ns1.ns2.fun as 'f'")
}
- assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("REFRESH FUNCTION: only support session catalog") {
@@ -2230,8 +2230,7 @@ class DataSourceV2SQLSuite
val e1 = intercept[AnalysisException] {
sql("REFRESH FUNCTION default.ns1.ns2.fun")
}
- assert(e1.message.contains(
- "Unsupported function name 'default.ns1.ns2.fun'"))
+ assert(e1.message.contains("requires a single-part namespace"))
}
test("global temp view should not be masked by v2 catalog") {
@@ -2301,26 +2300,7 @@ class DataSourceV2SQLSuite
def verify(sql: String): Unit = {
val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"Table or view not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
- }
-
- def verifyView(sql: String): Unit = {
- val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"View not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
- }
-
- def verifyTable(sql: String): Unit = {
- val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"Table not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
- }
-
- def verifyGeneric(sql: String): Unit = {
- val e = intercept[AnalysisException](spark.sql(sql))
- assert(e.message.contains(s"not found: $t"),
- s"Error message did not contain expected text while evaluting $sql")
+ assert(e.getMessage.contains("requires a single-part namespace"))
}
verify(s"select * from $t")
@@ -2328,16 +2308,16 @@ class DataSourceV2SQLSuite
verify(s"REFRESH TABLE $t")
verify(s"DESCRIBE $t i")
verify(s"DROP TABLE $t")
- verifyView(s"DROP VIEW $t")
- verifyGeneric(s"ANALYZE TABLE $t COMPUTE STATISTICS")
- verifyGeneric(s"ANALYZE TABLE $t COMPUTE STATISTICS FOR ALL COLUMNS")
- verifyTable(s"MSCK REPAIR TABLE $t")
- verifyTable(s"LOAD DATA INPATH 'filepath' INTO TABLE $t")
- verifyGeneric(s"SHOW CREATE TABLE $t")
- verifyGeneric(s"SHOW CREATE TABLE $t AS SERDE")
- verifyGeneric(s"CACHE TABLE $t")
- verifyGeneric(s"UNCACHE TABLE $t")
- verifyGeneric(s"TRUNCATE TABLE $t")
+ verify(s"DROP VIEW $t")
+ verify(s"ANALYZE TABLE $t COMPUTE STATISTICS")
+ verify(s"ANALYZE TABLE $t COMPUTE STATISTICS FOR ALL COLUMNS")
+ verify(s"MSCK REPAIR TABLE $t")
+ verify(s"LOAD DATA INPATH 'filepath' INTO TABLE $t")
+ verify(s"SHOW CREATE TABLE $t")
+ verify(s"SHOW CREATE TABLE $t AS SERDE")
+ verify(s"CACHE TABLE $t")
+ verify(s"UNCACHE TABLE $t")
+ verify(s"TRUNCATE TABLE $t")
verify(s"SHOW COLUMNS FROM $t")
}
}
@@ -2477,22 +2457,6 @@ class DataSourceV2SQLSuite
.head().getString(1) === expectedComment)
}
- test("SPARK-30799: temp view name can't contain catalog name") {
- val sessionCatalogName = CatalogManager.SESSION_CATALOG_NAME
- withTempView("v") {
- spark.range(10).createTempView("v")
- val e1 = intercept[AnalysisException](
- sql(s"CACHE TABLE $sessionCatalogName.v")
- )
- assert(e1.message.contains(
- "Table or view not found: spark_catalog.v"))
- }
- val e2 = intercept[AnalysisException] {
- sql(s"CREATE TEMP VIEW $sessionCatalogName.v AS SELECT 1")
- }
- assert(e2.message.contains("It is not allowed to add database prefix"))
- }
-
test("SPARK-31015: star expression should work for qualified column names for v2 tables") {
val t = "testcat.ns1.ns2.tbl"
withTable(t) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index ba87883..2d949e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1733,8 +1733,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
val message = intercept[AnalysisException] {
sql("SHOW COLUMNS IN tbl FROM a.b.c")
}.getMessage
- assert(message.contains(
- "Table or view not found: a.b.c.tbl"))
+ assert(message.contains("requires a single-part namespace"))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org