You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/06 07:29:04 UTC
[spark] branch master updated: [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432]
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0cd5ca5a7b3 [SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432]
0cd5ca5a7b3 is described below
commit 0cd5ca5a7b31f65a005c8ee2e90a6b4a29623ba7
Author: Jiaan Geng <be...@163.com>
AuthorDate: Tue Jun 6 10:28:48 2023 +0300
[SPARK-43913][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_[2426-2432]
### What changes were proposed in this pull request?
The pr aims to assign names to the error class `_LEGACY_ERROR_TEMP_[2426-2432]`.
### Why are the changes needed?
Improve the error framework.
### Does this PR introduce _any_ user-facing change?
'No'.
### How was this patch tested?
Exists test cases.
Closes #41424 from beliefer/SPARK-43913.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 58 ++++++++++++----------
.../sql/catalyst/analysis/CheckAnalysis.scala | 51 +++++++++++--------
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++----
.../CreateTablePartitioningValidationSuite.scala | 22 ++++----
.../negative-cases/invalid-correlation.sql.out | 6 ++-
.../negative-cases/invalid-correlation.sql.out | 6 ++-
6 files changed, 93 insertions(+), 70 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index de80415d85b..8c3c076ce74 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -660,6 +660,11 @@
"The event time <eventName> has the invalid type <eventType>, but expected \"TIMESTAMP\"."
]
},
+ "EXPRESSION_TYPE_IS_NOT_ORDERABLE" : {
+ "message" : [
+ "Column expression <expr> cannot be sorted because its type <exprType> is not orderable."
+ ]
+ },
"FAILED_EXECUTE_UDF" : {
"message" : [
"Failed to execute user defined function (<functionName>: (<signature>) => <result>)."
@@ -1541,6 +1546,24 @@
],
"sqlState" : "42803"
},
+ "MISSING_ATTRIBUTES" : {
+ "message" : [
+ "Resolved attribute(s) <missingAttributes> missing from <input> in operator <operator>."
+ ],
+ "subClass" : {
+ "RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION" : {
+ "message" : [
+ "Attribute(s) with the same name appear in the operation: <operation>.",
+ "Please check if the right attribute(s) are used."
+ ]
+ },
+ "RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT" : {
+ "message" : [
+ ""
+ ]
+ }
+ }
+ },
"MISSING_GROUP_BY" : {
"message" : [
"The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses."
@@ -1945,6 +1968,11 @@
"Query [id = <id>, runId = <runId>] terminated with exception: <message>"
]
},
+ "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT" : {
+ "message" : [
+ "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
+ ]
+ },
"TABLE_OR_VIEW_ALREADY_EXISTS" : {
"message" : [
"Cannot create table or view <relationName> because it already exists.",
@@ -2310,6 +2338,11 @@
"Parameter markers in unexpected statement: <statement>. Parameter markers must only be used in a query, or DML statement."
]
},
+ "PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED" : {
+ "message" : [
+ "Invalid partitioning: <cols> is missing or is in a map or array."
+ ]
+ },
"PIVOT_AFTER_GROUP_BY" : {
"message" : [
"PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery."
@@ -5525,31 +5558,6 @@
"failed to evaluate expression <sqlExpr>: <msg>"
]
},
- "_LEGACY_ERROR_TEMP_2426" : {
- "message" : [
- "nondeterministic expression <sqlExpr> should not appear in grouping expression."
- ]
- },
- "_LEGACY_ERROR_TEMP_2427" : {
- "message" : [
- "sorting is not supported for columns of type <type>."
- ]
- },
- "_LEGACY_ERROR_TEMP_2428" : {
- "message" : [
- "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
- ]
- },
- "_LEGACY_ERROR_TEMP_2431" : {
- "message" : [
- "Invalid partitioning: <cols> is missing or is in a map or array."
- ]
- },
- "_LEGACY_ERROR_TEMP_2432" : {
- "message" : [
- "<msg>"
- ]
- },
"_LEGACY_ERROR_TEMP_2433" : {
"message" : [
"Only a single table generating function is allowed in a SELECT clause, found:",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 594c0b666e8..9124890d4af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -474,9 +474,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
// This is just a sanity check, our analysis rule PullOutNondeterministic should
// already pull out those nondeterministic expressions and evaluate them in
// a Project node.
- expr.failAnalysis(
- errorClass = "_LEGACY_ERROR_TEMP_2426",
- messageParameters = Map("sqlExpr" -> expr.sql))
+ throw SparkException.internalError(
+ msg = s"Non-deterministic expression '${toSQLExpr(expr)}' should not appear in " +
+ "grouping expression.",
+ context = expr.origin.getQueryContext,
+ summary = expr.origin.context.summary)
}
}
@@ -545,8 +547,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
orders.foreach { order =>
if (!RowOrdering.isOrderable(order.dataType)) {
order.failAnalysis(
- errorClass = "_LEGACY_ERROR_TEMP_2427",
- messageParameters = Map("type" -> order.dataType.catalogString))
+ errorClass = "EXPRESSION_TYPE_IS_NOT_ORDERABLE",
+ messageParameters = Map("exprType" -> toSQLType(order.dataType)))
}
}
@@ -560,7 +562,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
val offset = offsetExpr.eval().asInstanceOf[Int]
if (Int.MaxValue - limit < offset) {
child.failAnalysis(
- errorClass = "_LEGACY_ERROR_TEMP_2428",
+ errorClass = "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT",
messageParameters = Map(
"limit" -> limit.toString,
"offset" -> offset.toString))
@@ -624,8 +626,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
if (badReferences.nonEmpty) {
create.failAnalysis(
- errorClass = "_LEGACY_ERROR_TEMP_2431",
- messageParameters = Map("cols" -> badReferences.mkString(", ")))
+ errorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+ messageParameters = Map(
+ "cols" -> badReferences.map(r => toSQLId(r)).mkString(", ")))
}
create.tableSchema.foreach(f => TypeUtils.failWithIntervalType(f.dataType))
@@ -641,28 +644,34 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
operator match {
case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
- val missingAttributes = o.missingInput.mkString(",")
- val input = o.inputSet.mkString(",")
- val msgForMissingAttributes = s"Resolved attribute(s) $missingAttributes missing " +
- s"from $input in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}."
+ val missingAttributes = o.missingInput.map(attr => toSQLExpr(attr)).mkString(", ")
+ val input = o.inputSet.map(attr => toSQLExpr(attr)).mkString(", ")
val resolver = plan.conf.resolver
val attrsWithSameName = o.missingInput.filter { missing =>
o.inputSet.exists(input => resolver(missing.name, input.name))
}
- val msg = if (attrsWithSameName.nonEmpty) {
- val sameNames = attrsWithSameName.map(_.name).mkString(",")
- s"$msgForMissingAttributes Attribute(s) with the same name appear in the " +
- s"operation: $sameNames. Please check if the right attribute(s) are used."
+ if (attrsWithSameName.nonEmpty) {
+ val sameNames = attrsWithSameName.map(attr => toSQLExpr(attr)).mkString(", ")
+ o.failAnalysis(
+ errorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION",
+ messageParameters = Map(
+ "missingAttributes" -> missingAttributes,
+ "input" -> input,
+ "operator" -> operator.simpleString(SQLConf.get.maxToStringFields),
+ "operation" -> sameNames
+ ))
} else {
- msgForMissingAttributes
+ o.failAnalysis(
+ errorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT",
+ messageParameters = Map(
+ "missingAttributes" -> missingAttributes,
+ "input" -> input,
+ "operator" -> operator.simpleString(SQLConf.get.maxToStringFields)
+ ))
}
- o.failAnalysis(
- errorClass = "_LEGACY_ERROR_TEMP_2432",
- messageParameters = Map("msg" -> msg))
-
case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
p.failAnalysis(
errorClass = "_LEGACY_ERROR_TEMP_2433",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 5f6256881a4..b657dd55eb7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -721,7 +721,7 @@ class AnalysisErrorSuite extends AnalysisTest {
errorClassTest(
"the sum of num_rows in limit clause and num_rows in offset clause less than Int.MaxValue",
testRelation.offset(Literal(2000000000, IntegerType)).limit(Literal(1000000000, IntegerType)),
- "_LEGACY_ERROR_TEMP_2428",
+ "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT",
Map("limit" -> "1000000000", "offset" -> "2000000000"))
errorTest(
@@ -770,14 +770,16 @@ class AnalysisErrorSuite extends AnalysisTest {
assert(plan.resolved)
- val resolved = s"${attrA.toString},${attrC.toString}"
-
- val errorMsg = s"Resolved attribute(s) $resolved missing from ${otherA.toString} " +
- s"in operator !Aggregate [${aliases.mkString(", ")}]. " +
- s"Attribute(s) with the same name appear in the operation: a. " +
- "Please check if the right attribute(s) are used."
-
- assertAnalysisError(plan, errorMsg :: Nil)
+ assertAnalysisErrorClass(
+ inputPlan = plan,
+ expectedErrorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION",
+ expectedMessageParameters = Map(
+ "missingAttributes" -> "\"a\", \"c\"",
+ "input" -> "\"a\"",
+ "operator" -> s"!Aggregate [${aliases.mkString(", ")}]",
+ "operation" -> "\"a\""
+ )
+ )
}
test("error test for self-join") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index 67441e18b0f..ba312ddbc49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -40,9 +40,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
ignoreIfExists = false)
assert(!plan.resolved)
- assertAnalysisError(plan, Seq(
- "Invalid partitioning",
- "does_not_exist is missing or is in a map or array"))
+ assertAnalysisErrorClass(plan,
+ expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+ expectedMessageParameters = Map("cols" -> "`does_not_exist`"))
}
test("CreateTableAsSelect: fail missing top-level column nested reference") {
@@ -57,9 +57,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
ignoreIfExists = false)
assert(!plan.resolved)
- assertAnalysisError(plan, Seq(
- "Invalid partitioning",
- "does_not_exist.z is missing or is in a map or array"))
+ assertAnalysisErrorClass(plan,
+ expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+ expectedMessageParameters = Map("cols" -> "`does_not_exist`.`z`"))
}
test("CreateTableAsSelect: fail missing nested column") {
@@ -74,9 +74,9 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
ignoreIfExists = false)
assert(!plan.resolved)
- assertAnalysisError(plan, Seq(
- "Invalid partitioning",
- "point.z is missing or is in a map or array"))
+ assertAnalysisErrorClass(plan,
+ expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+ expectedMessageParameters = Map("cols" -> "`point`.`z`"))
}
test("CreateTableAsSelect: fail with multiple errors") {
@@ -92,8 +92,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
assert(!plan.resolved)
assertAnalysisErrorClass(plan,
- expectedErrorClass = "_LEGACY_ERROR_TEMP_2431",
- expectedMessageParameters = Map("cols" -> "does_not_exist, point.z"))
+ expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+ expectedMessageParameters = Map("cols" -> "`does_not_exist`, `point`.`z`"))
}
test("CreateTableAsSelect: success with top-level column") {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
index 08ddc2cfcd2..2992bc6c9a1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
@@ -76,9 +76,11 @@ WHERE t1a IN (SELECT min(t2a)
-- !query analysis
org.apache.spark.sql.AnalysisException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2432",
+ "errorClass" : "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT",
"messageParameters" : {
- "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])."
+ "input" : "\"min(t2a)\", \"t2c\"",
+ "missingAttributes" : "\"t2b\"",
+ "operator" : "!Filter t2c#x IN (list#x [t2b#x])"
},
"queryContext" : [ {
"objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index 39b4f87bb1b..d1eb86a2d33 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -71,9 +71,11 @@ struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
- "errorClass" : "_LEGACY_ERROR_TEMP_2432",
+ "errorClass" : "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT",
"messageParameters" : {
- "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])."
+ "input" : "\"min(t2a)\", \"t2c\"",
+ "missingAttributes" : "\"t2b\"",
+ "operator" : "!Filter t2c#x IN (list#x [t2b#x])"
},
"queryContext" : [ {
"objectType" : "",
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org