You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/08/30 17:43:43 UTC
[spark] branch master updated: [SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 296fe49ec85 [SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position
296fe49ec85 is described below

commit 296fe49ec855ac8c15c080e7bab6d519fe504bd3
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Tue Aug 30 20:43:19 2022 +0300

    [SPARK-40260][SQL] Use error classes in the compilation errors of GROUP BY a position
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to the following new error classes:
    - GROUP_BY_POS_OUT_OF_RANGE
    - GROUP_BY_POS_REFERS_AGG_EXPR
    
    and migrate 2 compilation exceptions related to GROUP BY a position onto them.
    
    ### Why are the changes needed?
    The migration onto error classes makes the errors searchable in docs, and allows to edit error's text messages w/o modifying the source code.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, in some sense because it modifies user-facing error messages.
    
    ### How was this patch tested?
    By running the affected test suites:
    ```
    $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
    $ build/sbt "core/testOnly *SparkThrowableSuite"
    ```
    
    Closes #37712 from MaxGekk/group-ref-agg-error.
    
    Lead-authored-by: Max Gekk <ma...@gmail.com>
    Co-authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 12 ++++
 .../org/apache/spark/sql/AnalysisException.scala   |  2 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  | 11 +--
 .../sql-tests/results/group-by-ordinal.sql.out     | 81 +++++++++++++++++++---
 .../results/postgreSQL/select_implicit.sql.out     |  9 ++-
 .../udf/postgreSQL/udf-select_implicit.sql.out     |  9 ++-
 6 files changed, 107 insertions(+), 17 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 816df79e508..df0f887a63c 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -136,6 +136,18 @@
       "Grouping sets size cannot be greater than <maxSize>"
     ]
   },
+  "GROUP_BY_POS_OUT_OF_RANGE" : {
+    "message" : [
+      "GROUP BY position <index> is not in select list (valid range is [1, <size>])."
+    ],
+    "sqlState" : "42000"
+  },
+  "GROUP_BY_POS_REFERS_AGG_EXPR" : {
+    "message" : [
+      "GROUP BY <index> refers to an expression <aggExpr> that contains an aggregate function. Aggregate functions are not allowed in GROUP BY."
+    ],
+    "sqlState" : "42000"
+  },
   "INCOMPARABLE_PIVOT_COLUMN" : {
     "message" : [
       "Invalid pivot column <columnName>. Pivot columns must be comparable."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 9ab0b223e11..48e1f91990b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -100,7 +100,7 @@ class AnalysisException protected[sql] (
       line = origin.line,
       startPosition = origin.startPosition,
       errorClass = Some(errorClass),
-      errorSubClass = Some(errorSubClass),
+      errorSubClass = Option(errorSubClass),
       messageParameters = messageParameters)
 
   def copy(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 20c3c81b250..7458e201be2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -366,14 +366,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   def groupByPositionRefersToAggregateFunctionError(
       index: Int,
       expr: Expression): Throwable = {
-    new AnalysisException(s"GROUP BY $index refers to an expression that is or contains " +
-      "an aggregate function. Aggregate functions are not allowed in GROUP BY, " +
-      s"but got ${expr.sql}")
+    new AnalysisException(
+      errorClass = "GROUP_BY_POS_REFERS_AGG_EXPR",
+      messageParameters = Array(index.toString, expr.sql))
   }
 
   def groupByPositionRangeError(index: Int, size: Int): Throwable = {
-    new AnalysisException(s"GROUP BY position $index is not in select list " +
-      s"(valid range is [1, $size])")
+    new AnalysisException(
+      errorClass = "GROUP_BY_POS_OUT_OF_RANGE",
+      messageParameters = Array(index.toString, size.toString))
   }
 
   def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = {
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index 0e4ec436b3b..10b244c1c43 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -92,7 +92,14 @@ select a, b from data group by -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "2"
+  }
+}
 
 
 -- !query
@@ -101,7 +108,14 @@ select a, b from data group by 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "0",
+    "size" : "2"
+  }
+}
 
 
 -- !query
@@ -110,7 +124,14 @@ select a, b from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  }
+}
 
 
 -- !query
@@ -119,7 +140,14 @@ select a, b, sum(b) from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got sum(data.b) AS `sum(b)`; line 1 pos 39
+{
+  "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "aggExpr" : "sum(data.b) AS `sum(b)`"
+  }
+}
 
 
 -- !query
@@ -128,7 +156,14 @@ select a, b, sum(b) + 2 from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got (sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`; line 1 pos 43
+{
+  "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "aggExpr" : "(sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`"
+  }
+}
 
 
 -- !query
@@ -349,7 +384,14 @@ select a, b, count(1) from data group by a, -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 44
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  }
+}
 
 
 -- !query
@@ -358,7 +400,14 @@ select a, b, count(1) from data group by a, 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 44
+{
+  "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "aggExpr" : "count(1) AS `count(1)`"
+  }
+}
 
 
 -- !query
@@ -367,7 +416,14 @@ select a, b, count(1) from data group by cube(-1, 2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 46
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  }
+}
 
 
 -- !query
@@ -376,7 +432,14 @@ select a, b, count(1) from data group by cube(1, 3)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 49
+{
+  "errorClass" : "GROUP_BY_POS_REFERS_AGG_EXPR",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "aggExpr" : "count(1) AS `count(1)`"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
index d39f6101ac7..cd5bc39d7c6 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
@@ -207,7 +207,14 @@ SELECT c, count(*) FROM test_missing_target GROUP BY 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
index 76280074876..db2a855bf0f 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
@@ -210,7 +210,14 @@ SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  }
+}
 
 
 -- !query


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org