You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/11/08 05:52:58 UTC

[spark] branch branch-3.2 updated: [SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new e55bab5  [SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers
e55bab5 is described below

commit e55bab5267b066fb78921ef6828924c32adbc637
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Mon Nov 8 13:33:30 2021 +0800

    [SPARK-37214][SQL] Fail query analysis earlier with invalid identifiers
    
    This is a followup of #31427 , which introduced two issues:
    1. When we lookup `spark_catalog.t`, we failed earlier with `The namespace in session catalog must have exactly one name part` before that PR, now we fail very late in `CheckAnalysis` with `NoSuchTableException`
    2. The error message is a bit confusing now. We report `Table t not found` even if table `t` exists.
    
    This PR fixes the 2 issues.
    
    save analysis time and improve error message
    
    no
    
    updated test
    
    Closes #34490 from cloud-fan/table.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 8ab9d6327d7db20a4257f9fe6d0b17919576be5e)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../sql/connector/catalog/LookupCatalog.scala      |  4 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  | 10 +---
 .../spark/sql/catalyst/parser/DDLParserSuite.scala |  3 +
 .../catalyst/analysis/ResolveSessionCatalog.scala  |  2 +-
 .../datasources/v2/V2SessionCatalog.scala          |  4 +-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala | 66 +++++-----------------
 .../spark/sql/execution/command/DDLSuite.scala     |  3 +-
 7 files changed, 27 insertions(+), 65 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
index 0635859..0362caf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -191,8 +191,8 @@ private[sql] trait LookupCatalog extends Logging {
         } else {
           ident.namespace match {
             case Array(db) => FunctionIdentifier(ident.name, Some(db))
-            case _ =>
-              throw QueryCompilationErrors.unsupportedFunctionNameError(ident.toString)
+            case other =>
+              throw QueryCompilationErrors.requiresSinglePartNamespaceError(other)
           }
         }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index e7af006..7c2780a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -519,10 +519,6 @@ object QueryCompilationErrors {
       "SHOW VIEWS, only SessionCatalog supports this command.")
   }
 
-  def unsupportedFunctionNameError(quoted: String): Throwable = {
-    new AnalysisException(s"Unsupported function name '$quoted'")
-  }
-
   def sqlOnlySupportedWithV1TablesError(sql: String): Throwable = {
     new AnalysisException(s"$sql is only supported with v1 tables.")
   }
@@ -850,9 +846,9 @@ object QueryCompilationErrors {
     new TableAlreadyExistsException(ident)
   }
 
-  def requiresSinglePartNamespaceError(ident: Identifier): Throwable = {
-    new NoSuchTableException(
-      s"V2 session catalog requires a single-part namespace: ${ident.quoted}")
+  def requiresSinglePartNamespaceError(ns: Seq[String]): Throwable = {
+    new AnalysisException(CatalogManager.SESSION_CATALOG_NAME +
+      " requires a single-part namespace, but got " + ns.mkString("[", ", ", "]"))
   }
 
   def namespaceAlreadyExistsError(namespace: Array[String]): Throwable = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index a1d9f89..886c9a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2237,6 +2237,9 @@ class DDLParserSuite extends AnalysisTest {
       false,
       LocalTempView)
     comparePlans(parsed2, expected2)
+
+    val v3 = "CREATE TEMPORARY VIEW a.b AS SELECT 1"
+    intercept(v3, "It is not allowed to add database prefix")
   }
 
   test("create view - full") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 80063cd..b73ccbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -456,7 +456,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       if (isTemp) {
         // temp func doesn't belong to any catalog and we shouldn't resolve catalog in the name.
         val database = if (nameParts.length > 2) {
-          throw QueryCompilationErrors.unsupportedFunctionNameError(nameParts.quoted)
+          throw QueryCompilationErrors.requiresSinglePartNamespaceError(nameParts)
         } else if (nameParts.length == 2) {
           Some(nameParts.head)
         } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 33b8f22..f0db368 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -188,8 +188,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
       ident.namespace match {
         case Array(db) =>
           TableIdentifier(ident.name, Some(db))
-        case _ =>
-          throw QueryCompilationErrors.requiresSinglePartNamespaceError(ident)
+        case other =>
+          throw QueryCompilationErrors.requiresSinglePartNamespaceError(other)
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index a326b82..d5c3631 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2185,7 +2185,7 @@ class DataSourceV2SQLSuite
     val e1 = intercept[AnalysisException] {
       sql("DESCRIBE FUNCTION default.ns1.ns2.fun")
     }
-    assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+    assert(e1.message.contains("requires a single-part namespace"))
   }
 
   test("SHOW FUNCTIONS not valid v1 namespace") {
@@ -2206,7 +2206,7 @@ class DataSourceV2SQLSuite
     val e1 = intercept[AnalysisException] {
       sql("DROP FUNCTION default.ns1.ns2.fun")
     }
-    assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+    assert(e1.message.contains("requires a single-part namespace"))
   }
 
   test("CREATE FUNCTION: only support session catalog") {
@@ -2218,7 +2218,7 @@ class DataSourceV2SQLSuite
     val e1 = intercept[AnalysisException] {
       sql("CREATE FUNCTION default.ns1.ns2.fun as 'f'")
     }
-    assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+    assert(e1.message.contains("requires a single-part namespace"))
   }
 
   test("REFRESH FUNCTION: only support session catalog") {
@@ -2230,8 +2230,7 @@ class DataSourceV2SQLSuite
     val e1 = intercept[AnalysisException] {
       sql("REFRESH FUNCTION default.ns1.ns2.fun")
     }
-    assert(e1.message.contains(
-      "Unsupported function name 'default.ns1.ns2.fun'"))
+    assert(e1.message.contains("requires a single-part namespace"))
   }
 
   test("global temp view should not be masked by v2 catalog") {
@@ -2301,26 +2300,7 @@ class DataSourceV2SQLSuite
 
         def verify(sql: String): Unit = {
           val e = intercept[AnalysisException](spark.sql(sql))
-          assert(e.message.contains(s"Table or view not found: $t"),
-            s"Error message did not contain expected text while evaluting $sql")
-        }
-
-        def verifyView(sql: String): Unit = {
-          val e = intercept[AnalysisException](spark.sql(sql))
-          assert(e.message.contains(s"View not found: $t"),
-            s"Error message did not contain expected text while evaluting $sql")
-        }
-
-        def verifyTable(sql: String): Unit = {
-          val e = intercept[AnalysisException](spark.sql(sql))
-          assert(e.message.contains(s"Table not found: $t"),
-            s"Error message did not contain expected text while evaluting $sql")
-        }
-
-        def verifyGeneric(sql: String): Unit = {
-          val e = intercept[AnalysisException](spark.sql(sql))
-          assert(e.message.contains(s"not found: $t"),
-            s"Error message did not contain expected text while evaluting $sql")
+          assert(e.getMessage.contains("requires a single-part namespace"))
         }
 
         verify(s"select * from $t")
@@ -2328,16 +2308,16 @@ class DataSourceV2SQLSuite
         verify(s"REFRESH TABLE $t")
         verify(s"DESCRIBE $t i")
         verify(s"DROP TABLE $t")
-        verifyView(s"DROP VIEW $t")
-        verifyGeneric(s"ANALYZE TABLE $t COMPUTE STATISTICS")
-        verifyGeneric(s"ANALYZE TABLE $t COMPUTE STATISTICS FOR ALL COLUMNS")
-        verifyTable(s"MSCK REPAIR TABLE $t")
-        verifyTable(s"LOAD DATA INPATH 'filepath' INTO TABLE $t")
-        verifyGeneric(s"SHOW CREATE TABLE $t")
-        verifyGeneric(s"SHOW CREATE TABLE $t AS SERDE")
-        verifyGeneric(s"CACHE TABLE $t")
-        verifyGeneric(s"UNCACHE TABLE $t")
-        verifyGeneric(s"TRUNCATE TABLE $t")
+        verify(s"DROP VIEW $t")
+        verify(s"ANALYZE TABLE $t COMPUTE STATISTICS")
+        verify(s"ANALYZE TABLE $t COMPUTE STATISTICS FOR ALL COLUMNS")
+        verify(s"MSCK REPAIR TABLE $t")
+        verify(s"LOAD DATA INPATH 'filepath' INTO TABLE $t")
+        verify(s"SHOW CREATE TABLE $t")
+        verify(s"SHOW CREATE TABLE $t AS SERDE")
+        verify(s"CACHE TABLE $t")
+        verify(s"UNCACHE TABLE $t")
+        verify(s"TRUNCATE TABLE $t")
         verify(s"SHOW COLUMNS FROM $t")
       }
     }
@@ -2477,22 +2457,6 @@ class DataSourceV2SQLSuite
       .head().getString(1) === expectedComment)
   }
 
-  test("SPARK-30799: temp view name can't contain catalog name") {
-    val sessionCatalogName = CatalogManager.SESSION_CATALOG_NAME
-    withTempView("v") {
-      spark.range(10).createTempView("v")
-      val e1 = intercept[AnalysisException](
-        sql(s"CACHE TABLE $sessionCatalogName.v")
-      )
-      assert(e1.message.contains(
-        "Table or view not found: spark_catalog.v"))
-    }
-    val e2 = intercept[AnalysisException] {
-      sql(s"CREATE TEMP VIEW $sessionCatalogName.v AS SELECT 1")
-    }
-    assert(e2.message.contains("It is not allowed to add database prefix"))
-  }
-
   test("SPARK-31015: star expression should work for qualified column names for v2 tables") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index ba87883..2d949e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1733,8 +1733,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val message = intercept[AnalysisException] {
         sql("SHOW COLUMNS IN tbl FROM a.b.c")
       }.getMessage
-      assert(message.contains(
-        "Table or view not found: a.b.c.tbl"))
+      assert(message.contains("requires a single-part namespace"))
     }
   }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org