You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/08/30 17:43:43 UTC
[spark] branch master updated: [SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 296fe49ec85 [SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position
296fe49ec85 is described below
commit 296fe49ec855ac8c15c080e7bab6d519fe504bd3
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Tue Aug 30 20:43:19 2022 +0300
[SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position
### What changes were proposed in this pull request?
In the PR, I propose to the following new error classes:
- GROUP_BY_POS_OUT_OF_RANGE
- GROUP_BY_POS_REFERS_AGG_EXPR
and migrate 2 compilation exceptions related to GROUP BY a position onto them.
### Why are the changes needed?
The migration onto error classes makes the errors searchable in docs, and allows to edit error's text messages w/o modifying the source code.
### Does this PR introduce _any_ user-facing change?
Yes, in some sense because it modifies user-facing error messages.
### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
$ build/sbt "core/testOnly *SparkThrowableSuite"
```
Closes #37712 from MaxGekk/group-ref-agg-error.
Lead-authored-by: Max Gekk <ma...@gmail.com>
Co-authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
core/src/main/resources/error/error-classes.json | 12 ++++
.../org/apache/spark/sql/AnalysisException.scala | 2 +-
.../spark/sql/errors/QueryCompilationErrors.scala | 11 +--
.../sql-tests/results/group-by-ordinal.sql.out | 81 +++++++++++++++++++---
.../results/postgreSQL/select_implicit.sql.out | 9 ++-
.../udf/postgreSQL/udf-select_implicit.sql.out | 9 ++-
6 files changed, 107 insertions(+), 17 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 816df79e508..df0f887a63c 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -136,6 +136,18 @@
"Grouping sets size cannot be greater than <maxSize>"
]
},
+ "GROUP_BY_POS_OUT_OF_RANGE" : {
+ "message" : [
+ "GROUP BY position <index> is not in select list (valid range is [1, <size>])."
+ ],
+ "sqlState" : "42000"
+ },
+ "GROUP_BY_POS_REFERS_AGG_EXPR" : {
+ "message" : [
+ "GROUP BY <index> refers to an expression <aggExpr> that contains an aggregate function. Aggregate functions are not allowed in GROUP BY."
+ ],
+ "sqlState" : "42000"
+ },
"INCOMPARABLE_PIVOT_COLUMN" : {
"message" : [
"Invalid pivot column <columnName>. Pivot columns must be comparable."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 9ab0b223e11..48e1f91990b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -100,7 +100,7 @@ class AnalysisException protected[sql] (
line = origin.line,
startPosition = origin.startPosition,
errorClass = Some(errorClass),
- errorSubClass = Some(errorSubClass),
+ errorSubClass = Option(errorSubClass),
messageParameters = messageParameters)
def copy(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 20c3c81b250..7458e201be2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -366,14 +366,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
def groupByPositionRefersToAggregateFunctionError(
index: Int,
expr: Expression): Throwable = {
- new AnalysisException(s"GROUP BY $index refers to an expression that is or contains " +
- "an aggregate function. Aggregate functions are not allowed in GROUP BY, " +
- s"but got ${expr.sql}")
+ new AnalysisException(
+ errorClass = "GROUP_BY_POS_REFERS_AGG_EXPR",
+ messageParameters = Array(index.toString, expr.sql))
}
def groupByPositionRangeError(index: Int, size: Int): Throwable = {
- new AnalysisException(s"GROUP BY position $index is not in select list " +
- s"(valid range is [1, $size])")
+ new AnalysisException(
+ errorClass = "GROUP_BY_POS_OUT_OF_RANGE",
+ messageParameters = Array(index.toString, size.toString))
}
def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = {
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index 0e4ec436b3b..10b244c1c43 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -92,7 +92,14 @@ select a, b from data group by -1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "2"
+ }
+}
-- !query
@@ -101,7 +108,14 @@ select a, b from data group by 0
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "0",
+ "size" : "2"
+ }
+}
-- !query
@@ -110,7 +124,14 @@ select a, b from data group by 3
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "size" : "2"
+ }
+}
-- !query
@@ -119,7 +140,14 @@ select a, b, sum(b) from data group by 3
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got sum(data.b) AS `sum(b)`; line 1 pos 39
+{
+ "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "aggExpr" : "sum(data.b) AS `sum(b)`"
+ }
+}
-- !query
@@ -128,7 +156,14 @@ select a, b, sum(b) + 2 from data group by 3
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got (sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`; line 1 pos 43
+{
+ "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "aggExpr" : "(sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`"
+ }
+}
-- !query
@@ -349,7 +384,14 @@ select a, b, count(1) from data group by a, -1
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 44
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "3"
+ }
+}
-- !query
@@ -358,7 +400,14 @@ select a, b, count(1) from data group by a, 3
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 44
+{
+ "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "aggExpr" : "count(1) AS `count(1)`"
+ }
+}
-- !query
@@ -367,7 +416,14 @@ select a, b, count(1) from data group by cube(-1, 2)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 46
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "3"
+ }
+}
-- !query
@@ -376,7 +432,14 @@ select a, b, count(1) from data group by cube(1, 3)
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 49
+{
+ "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "aggExpr" : "count(1) AS `count(1)`"
+ }
+}
-- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
index d39f6101ac7..cd5bc39d7c6 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
@@ -207,7 +207,14 @@ SELECT c, count(*) FROM test_missing_target GROUP BY 3
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "size" : "2"
+ }
+}
-- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
index 76280074876..db2a855bf0f 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
@@ -210,7 +210,14 @@ SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "3",
+ "size" : "2"
+ }
+}
-- !query
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org