You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2022/06/20 17:36:00 UTC

[spark] branch master updated: [SPARK-39497][SQL] Improve the analysis exception of missing map key column

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f1d7964902 [SPARK-39497][SQL] Improve the analysis exception of missing map key column
1f1d7964902 is described below

commit 1f1d79649027a4c03e48dea2bcef280dca53767a
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Mon Jun 20 10:35:22 2022 -0700

    [SPARK-39497][SQL] Improve the analysis exception of missing map key column
    
    ### What changes were proposed in this pull request?
    
    Sometimes users forgot to add single quotes on the map key string literal, for example `map_col[a]`. In such a case, the Analyzer will throw an exception:
    ```
    [MISSING_COLUMN] Column 'struct.a' does not exist. Did you mean one of the following? ...
    ```
    We can improve this message by saying that the user should append single quotes if the map key is a string literal.
    
    ```
    [UNRESOLVED_MAP_KEY] Cannot resolve column 'a' as a map key. If the key is a string literal, please add single quotes around it. Otherwise, did you mean one of the following column(s)? ...
    ```
    
    ### Why are the changes needed?
    
    Error message improvement
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes but trivial, an improvement on the error message of unresolved map key column
    
    ### How was this patch tested?
    
    New UT
    
    Closes #36896 from gengliangwang/unreslovedMapKey.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 core/src/main/resources/error/error-classes.json   |  8 ++++-
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  4 +--
 .../sql/catalyst/analysis/CheckAnalysis.scala      | 39 +++++++++++++++++-----
 .../expressions/complexTypeExtractors.scala        |  2 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  |  9 +++--
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 12 +++++++
 6 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index d4c0910c5ad..f9257b6c21b 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -352,6 +352,12 @@
     ],
     "sqlState" : "42000"
   },
+  "UNRESOLVED_MAP_KEY" : {
+    "message" : [
+      "Cannot resolve column <columnName> as a map key. If the key is a string literal, please add single quotes around it. Otherwise, did you mean one of the following column(s)? [<proposal>]"
+    ],
+    "sqlState" : "42000"
+  },
   "UNSUPPORTED_DATATYPE" : {
     "message" : [
       "Unsupported data type <typeName>"
@@ -556,4 +562,4 @@
     ],
     "sqlState" : "40000"
   }
-}
\ No newline at end of file
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 931a0fcf77f..4d2dd175260 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3420,8 +3420,8 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       i.userSpecifiedCols.map { col =>
         i.table.resolve(Seq(col), resolver).getOrElse(
-          throw QueryCompilationErrors.unresolvedColumnError(
-            col, i.table.output.map(_.name), i.origin))
+          throw QueryCompilationErrors.unresolvedAttributeError(
+            "UNRESOLVED_COLUMN", col, i.table.output.map(_.name), i.origin))
       }
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f9f8b590a31..759683b8c00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -91,6 +91,26 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     }
   }
 
+  private def isMapWithStringKey(e: Expression): Boolean = if (e.resolved) {
+    e.dataType match {
+      case m: MapType => m.keyType.isInstanceOf[StringType]
+      case _ => false
+    }
+  } else {
+    false
+  }
+
+  private def failUnresolvedAttribute(
+      operator: LogicalPlan,
+      a: Attribute,
+      errorClass: String): Nothing = {
+    val missingCol = a.sql
+    val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
+    val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
+    throw QueryCompilationErrors.unresolvedAttributeError(
+      errorClass, missingCol, orderedCandidates, a.origin)
+  }
+
   def checkAnalysis(plan: LogicalPlan): Unit = {
     // We transform up and order the rules so as to catch the first possible failure instead
     // of the result of cascading resolution failures. Inline all CTEs in the plan to help check
@@ -160,11 +180,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         throw QueryCompilationErrors.commandUnsupportedInV2TableError("SHOW TABLE EXTENDED")
 
       case operator: LogicalPlan =>
-        // Check argument data types of higher-order functions downwards first.
-        // If the arguments of the higher-order functions are resolved but the type check fails,
-        // the argument functions will not get resolved, but we should report the argument type
-        // check failure instead of claiming the argument functions are unresolved.
         operator transformExpressionsDown {
+          // Check argument data types of higher-order functions downwards first.
+          // If the arguments of the higher-order functions are resolved but the type check fails,
+          // the argument functions will not get resolved, but we should report the argument type
+          // check failure instead of claiming the argument functions are unresolved.
           case hof: HigherOrderFunction
               if hof.argumentsResolved && hof.checkArgumentDataTypes().isFailure =>
             hof.checkArgumentDataTypes() match {
@@ -172,15 +192,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                 hof.failAnalysis(
                   s"cannot resolve '${hof.sql}' due to argument data type mismatch: $message")
             }
+
+          // If an attribute can't be resolved as a map key of string type, either the key should be
+          // surrounded with single quotes, or there is a typo in the attribute name.
+          case GetMapValue(map, key: Attribute, _) if isMapWithStringKey(map) && !key.resolved =>
+            failUnresolvedAttribute(operator, key, "UNRESOLVED_MAP_KEY")
         }
 
         getAllExpressions(operator).foreach(_.foreachUp {
           case a: Attribute if !a.resolved =>
-            val missingCol = a.sql
-            val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
-            val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
-            throw QueryCompilationErrors.unresolvedColumnError(
-              missingCol, orderedCandidates, a.origin)
+            failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN")
 
           case s: Star =>
             withPosition(s) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index b2db00cd2b4..198fd0cd1f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -461,7 +461,7 @@ case class GetMapValue(
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(keyType)
 
-  private def keyType = child.dataType.asInstanceOf[MapType].keyType
+  private[catalyst] def keyType = child.dataType.asInstanceOf[MapType].keyType
 
   override def checkInputDataTypes(): TypeCheckResult = {
     super.checkInputDataTypes() match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 4ee53c56f69..7ed5c785771 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -144,11 +144,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
         s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]")
   }
 
-  def unresolvedColumnError(
-      colName: String, candidates: Seq[String], origin: Origin): Throwable = {
+  def unresolvedAttributeError(
+      errorClass: String,
+      colName: String,
+      candidates: Seq[String],
+      origin: Origin): Throwable = {
     val candidateIds = candidates.map(candidate => toSQLId(candidate))
     new AnalysisException(
-      errorClass = "UNRESOLVED_COLUMN",
+      errorClass = errorClass,
       messageParameters = Array(toSQLId(colName), candidateIds.mkString(", ")),
       origin = origin)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 06e6bec3fd1..bab5a106828 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -401,6 +401,18 @@ class QueryCompilationErrorsSuite
     )
   }
 
+  test("UNRESOLVED_MAP_KEY: string type literal should be quoted") {
+    checkAnswer(sql("select m['a'] from (select map('a', 'b') as m, 'aa' as aa)"), Row("b"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select m[a] from (select map('a', 'b') as m, 'aa' as aa)")
+      },
+      errorClass = "UNRESOLVED_MAP_KEY",
+      parameters = Map("columnName" -> "`a`",
+        "proposal" ->
+          "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`aa`"))
+  }
+
   test("UNRESOLVED_COLUMN: SELECT distinct does not work correctly " +
     "if order by missing attribute") {
     checkAnswer(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org