You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2022/06/20 17:36:00 UTC
[spark] branch master updated: [SPARK-39497][SQL] Improve the analysis exception of missing map key column
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1f1d7964902 [SPARK-39497][SQL] Improve the analysis exception of missing map key column
1f1d7964902 is described below
commit 1f1d79649027a4c03e48dea2bcef280dca53767a
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Mon Jun 20 10:35:22 2022 -0700
[SPARK-39497][SQL] Improve the analysis exception of missing map key column
### What changes were proposed in this pull request?
Sometimes users forgot to add single quotes on the map key string literal, for example `map_col[a]`. In such a case, the Analyzer will throw an exception:
```
[MISSING_COLUMN] Column 'struct.a' does not exist. Did you mean one of the following? ...
```
We can improve this message by saying that the user should append single quotes if the map key is a string literal.
```
[UNRESOLVED_MAP_KEY] Cannot resolve column 'a' as a map key. If the key is a string literal, please add single quotes around it. Otherwise, did you mean one of the following column(s)? ...
```
### Why are the changes needed?
Error message improvement
### Does this PR introduce _any_ user-facing change?
Yes but trivial, an improvement on the error message of unresolved map key column
### How was this patch tested?
New UT
Closes #36896 from gengliangwang/unreslovedMapKey.
Authored-by: Gengliang Wang <ge...@apache.org>
Signed-off-by: Gengliang Wang <ge...@apache.org>
---
core/src/main/resources/error/error-classes.json | 8 ++++-
.../spark/sql/catalyst/analysis/Analyzer.scala | 4 +--
.../sql/catalyst/analysis/CheckAnalysis.scala | 39 +++++++++++++++++-----
.../expressions/complexTypeExtractors.scala | 2 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 9 +++--
.../sql/errors/QueryCompilationErrorsSuite.scala | 12 +++++++
6 files changed, 58 insertions(+), 16 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index d4c0910c5ad..f9257b6c21b 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -352,6 +352,12 @@
],
"sqlState" : "42000"
},
+ "UNRESOLVED_MAP_KEY" : {
+ "message" : [
+ "Cannot resolve column <columnName> as a map key. If the key is a string literal, please add single quotes around it. Otherwise, did you mean one of the following column(s)? [<proposal>]"
+ ],
+ "sqlState" : "42000"
+ },
"UNSUPPORTED_DATATYPE" : {
"message" : [
"Unsupported data type <typeName>"
@@ -556,4 +562,4 @@
],
"sqlState" : "40000"
}
-}
\ No newline at end of file
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 931a0fcf77f..4d2dd175260 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3420,8 +3420,8 @@ class Analyzer(override val catalogManager: CatalogManager)
i.userSpecifiedCols.map { col =>
i.table.resolve(Seq(col), resolver).getOrElse(
- throw QueryCompilationErrors.unresolvedColumnError(
- col, i.table.output.map(_.name), i.origin))
+ throw QueryCompilationErrors.unresolvedAttributeError(
+ "UNRESOLVED_COLUMN", col, i.table.output.map(_.name), i.origin))
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f9f8b590a31..759683b8c00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -91,6 +91,26 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
}
}
+ private def isMapWithStringKey(e: Expression): Boolean = if (e.resolved) {
+ e.dataType match {
+ case m: MapType => m.keyType.isInstanceOf[StringType]
+ case _ => false
+ }
+ } else {
+ false
+ }
+
+ private def failUnresolvedAttribute(
+ operator: LogicalPlan,
+ a: Attribute,
+ errorClass: String): Nothing = {
+ val missingCol = a.sql
+ val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
+ val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
+ throw QueryCompilationErrors.unresolvedAttributeError(
+ errorClass, missingCol, orderedCandidates, a.origin)
+ }
+
def checkAnalysis(plan: LogicalPlan): Unit = {
// We transform up and order the rules so as to catch the first possible failure instead
// of the result of cascading resolution failures. Inline all CTEs in the plan to help check
@@ -160,11 +180,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
throw QueryCompilationErrors.commandUnsupportedInV2TableError("SHOW TABLE EXTENDED")
case operator: LogicalPlan =>
- // Check argument data types of higher-order functions downwards first.
- // If the arguments of the higher-order functions are resolved but the type check fails,
- // the argument functions will not get resolved, but we should report the argument type
- // check failure instead of claiming the argument functions are unresolved.
operator transformExpressionsDown {
+ // Check argument data types of higher-order functions downwards first.
+ // If the arguments of the higher-order functions are resolved but the type check fails,
+ // the argument functions will not get resolved, but we should report the argument type
+ // check failure instead of claiming the argument functions are unresolved.
case hof: HigherOrderFunction
if hof.argumentsResolved && hof.checkArgumentDataTypes().isFailure =>
hof.checkArgumentDataTypes() match {
@@ -172,15 +192,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
hof.failAnalysis(
s"cannot resolve '${hof.sql}' due to argument data type mismatch: $message")
}
+
+ // If an attribute can't be resolved as a map key of string type, either the key should be
+ // surrounded with single quotes, or there is a typo in the attribute name.
+ case GetMapValue(map, key: Attribute, _) if isMapWithStringKey(map) && !key.resolved =>
+ failUnresolvedAttribute(operator, key, "UNRESOLVED_MAP_KEY")
}
getAllExpressions(operator).foreach(_.foreachUp {
case a: Attribute if !a.resolved =>
- val missingCol = a.sql
- val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
- val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
- throw QueryCompilationErrors.unresolvedColumnError(
- missingCol, orderedCandidates, a.origin)
+ failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN")
case s: Star =>
withPosition(s) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index b2db00cd2b4..198fd0cd1f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -461,7 +461,7 @@ case class GetMapValue(
@transient private lazy val ordering: Ordering[Any] =
TypeUtils.getInterpretedOrdering(keyType)
- private def keyType = child.dataType.asInstanceOf[MapType].keyType
+ private[catalyst] def keyType = child.dataType.asInstanceOf[MapType].keyType
override def checkInputDataTypes(): TypeCheckResult = {
super.checkInputDataTypes() match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 4ee53c56f69..7ed5c785771 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -144,11 +144,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]")
}
- def unresolvedColumnError(
- colName: String, candidates: Seq[String], origin: Origin): Throwable = {
+ def unresolvedAttributeError(
+ errorClass: String,
+ colName: String,
+ candidates: Seq[String],
+ origin: Origin): Throwable = {
val candidateIds = candidates.map(candidate => toSQLId(candidate))
new AnalysisException(
- errorClass = "UNRESOLVED_COLUMN",
+ errorClass = errorClass,
messageParameters = Array(toSQLId(colName), candidateIds.mkString(", ")),
origin = origin)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 06e6bec3fd1..bab5a106828 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -401,6 +401,18 @@ class QueryCompilationErrorsSuite
)
}
+ test("UNRESOLVED_MAP_KEY: string type literal should be quoted") {
+ checkAnswer(sql("select m['a'] from (select map('a', 'b') as m, 'aa' as aa)"), Row("b"))
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("select m[a] from (select map('a', 'b') as m, 'aa' as aa)")
+ },
+ errorClass = "UNRESOLVED_MAP_KEY",
+ parameters = Map("columnName" -> "`a`",
+ "proposal" ->
+ "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`aa`"))
+ }
+
test("UNRESOLVED_COLUMN: SELECT distinct does not work correctly " +
"if order by missing attribute") {
checkAnswer(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org