You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2021/08/17 12:25:09 UTC

[spark] branch branch-3.2 updated: Revert "[SPARK-35028][SQL] ANSI mode: disallow group by aliases"

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 70635b4  Revert "[SPARK-35028][SQL] ANSI mode: disallow group by aliases"
70635b4 is described below

commit 70635b4b2633be544563c1cb00e6333fdb1f3782
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Tue Aug 17 20:23:49 2021 +0800

    Revert "[SPARK-35028][SQL] ANSI mode: disallow group by aliases"
    
    ### What changes were proposed in this pull request?
    
    Revert [[SPARK-35028][SQL] ANSI mode: disallow group by aliases ](https://github.com/apache/spark/pull/32129)
    
    ### Why are the changes needed?
    
    It turns out that many users are using the group by alias feature.  Spark has its precedence rule when alias names conflict with column names in Group by clause: always use the table column. This should be reasonable and acceptable.
    Also, external DBMS such as PostgreSQL and MySQL allow grouping by alias, too.
    
    As we are going to announce ANSI mode GA in Spark 3.2, I suggest allowing the group by alias in ANSI mode.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, the feature is not released yet.
    
    ### How was this patch tested?
    
    Unit tests
    
    Closes #33758 from gengliangwang/revertGroupByAlias.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
    (cherry picked from commit 8bfb4f1e72f33205b94957f7dacf298b0c8bde17)
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 docs/sql-ref-ansi-compliance.md                    |    1 -
 .../spark/sql/catalyst/analysis/Analyzer.scala     |    2 +-
 .../org/apache/spark/sql/internal/SQLConf.scala    |   27 +-
 .../sql-tests/inputs/ansi/group-analytics.sql      |    1 -
 .../sql-tests/results/ansi/group-analytics.sql.out | 1293 --------------------
 5 files changed, 14 insertions(+), 1310 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index a647abc..f0e1066 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -255,7 +255,6 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
 The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
   - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
-  - `GROUP BY`: aliases in a select list can not be used in GROUP BY clauses. Each column referenced in a GROUP BY clause shall unambiguously reference a column of the table resulting from the FROM clause.
 
 ### Useful Functions for ANSI Mode
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 2f0a709..92018eb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1951,7 +1951,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       // mayResolveAttrByAggregateExprs requires the TreePattern UNRESOLVED_ATTRIBUTE.
       _.containsAllPatterns(AGGREGATE, UNRESOLVED_ATTRIBUTE), ruleId) {
       case agg @ Aggregate(groups, aggs, child)
-          if allowGroupByAlias && child.resolved && aggs.forall(_.resolved) &&
+          if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
             groups.exists(!_.resolved) =>
         agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 555242f..6869977 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -240,17 +240,6 @@ object SQLConf {
     .intConf
     .createWithDefault(100)
 
-  val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
-    .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " +
-      "For example, Spark will throw an exception at runtime instead of returning null results " +
-      "when the inputs to a SQL operator/function are invalid." +
-      "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " +
-      "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " +
-      "standard directly, but their behaviors align with ANSI SQL's style")
-    .version("3.0.0")
-    .booleanConf
-    .createWithDefault(false)
-
   val OPTIMIZER_EXCLUDED_RULES = buildConf("spark.sql.optimizer.excludedRules")
     .doc("Configures a list of rules to be disabled in the optimizer, in which the rules are " +
       "specified by their rule names and separated by comma. It is not guaranteed that all the " +
@@ -1211,9 +1200,8 @@ object SQLConf {
     .createWithDefault(true)
 
   val GROUP_BY_ALIASES = buildConf("spark.sql.groupByAliases")
-    .doc("This configuration is only effective when ANSI mode is disabled. When it is true and " +
-      s"${ANSI_ENABLED.key} is false, aliases in a select list can be used in group by clauses. " +
-      "Otherwise, an analysis exception is thrown in the case.")
+    .doc("When true, aliases in a select list can be used in group by clauses. When false, " +
+      "an analysis exception is thrown in the case.")
     .version("2.2.0")
     .booleanConf
     .createWithDefault(true)
@@ -2537,6 +2525,17 @@ object SQLConf {
       .checkValues(StoreAssignmentPolicy.values.map(_.toString))
       .createWithDefault(StoreAssignmentPolicy.ANSI.toString)
 
+  val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
+    .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " +
+      "For example, Spark will throw an exception at runtime instead of returning null results " +
+      "when the inputs to a SQL operator/function are invalid." +
+      "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " +
+      "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " +
+      "standard directly, but their behaviors align with ANSI SQL's style")
+    .version("3.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val SORT_BEFORE_REPARTITION =
     buildConf("spark.sql.execution.sortBeforeRepartition")
       .internal()
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql
deleted file mode 100644
index d786892..0000000
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/group-analytics.sql
+++ /dev/null
@@ -1 +0,0 @@
---IMPORT group-analytics.sql
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out
deleted file mode 100644
index 9dbfc4c..0000000
--- a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out
+++ /dev/null
@@ -1,1293 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 52
-
-
--- !query
-CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
-(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
-AS testData(a, b)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE
--- !query schema
-struct<(a + b):int,b:int,sum((a - b)):bigint>
--- !query output
-2	1	0
-2	NULL	0
-3	1	1
-3	2	-1
-3	NULL	0
-4	1	2
-4	2	0
-4	NULL	2
-5	2	1
-5	NULL	1
-NULL	1	3
-NULL	2	0
-NULL	NULL	3
-
-
--- !query
-SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE
--- !query schema
-struct<a:int,b:int,sum(b):bigint>
--- !query output
-1	1	1
-1	2	2
-1	NULL	3
-2	1	1
-2	2	2
-2	NULL	3
-3	1	1
-3	2	2
-3	NULL	3
-NULL	1	3
-NULL	2	6
-NULL	NULL	9
-
-
--- !query
-SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
--- !query schema
-struct<(a + b):int,b:int,sum((a - b)):bigint>
--- !query output
-2	1	0
-2	NULL	0
-3	1	1
-3	2	-1
-3	NULL	0
-4	1	2
-4	2	0
-4	NULL	2
-5	2	1
-5	NULL	1
-NULL	NULL	3
-
-
--- !query
-SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP
--- !query schema
-struct<a:int,b:int,sum(b):bigint>
--- !query output
-1	1	1
-1	2	2
-1	NULL	3
-2	1	1
-2	2	2
-2	NULL	3
-3	1	1
-3	2	2
-3	NULL	3
-NULL	NULL	9
-
-
--- !query
-CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
-("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
-AS courseSales(course, year, earnings)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	NULL	113000
-Java	NULL	50000
-Java	2012	20000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	NULL	113000
-Java	NULL	50000
-Java	2012	20000
-Java	2012	20000
-Java	2013	30000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2013	48000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Empty set in ROLLUP grouping sets is not supported.(line 1, pos 61)
-
-== SQL ==
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
--------------------------------------------------------------^^^
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	NULL	113000
-NULL	2012	35000
-NULL	2013	78000
-Java	NULL	50000
-Java	2012	20000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	NULL	113000
-NULL	2012	35000
-NULL	2013	78000
-Java	NULL	50000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Empty set in CUBE grouping sets is not supported.(line 1, pos 61)
-
-== SQL ==
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
--------------------------------------------------------------^^^
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-Java	NULL	50000
-NULL	2012	35000
-NULL	2013	78000
-dotNET	NULL	63000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ())
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-Java	NULL	50000
-NULL	2012	35000
-NULL	2013	78000
-NULL	NULL	113000
-dotNET	NULL	63000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-Java	NULL	50000
-dotNET	NULL	63000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	2012	35000
-NULL	2013	78000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-Java	NULL	50000
-Java	NULL	50000
-Java	2012	20000
-Java	2012	20000
-Java	2013	30000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2013	48000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	NULL	113000
-NULL	2012	35000
-NULL	2013	78000
-Java	NULL	50000
-Java	NULL	50000
-Java	NULL	50000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	NULL	63000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year
--- !query schema
-struct<course:string,year:int,sum(earnings):bigint>
--- !query output
-NULL	2012	35000
-NULL	2012	35000
-NULL	2013	78000
-NULL	2013	78000
-Java	NULL	50000
-Java	NULL	50000
-Java	NULL	50000
-Java	NULL	50000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2012	20000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-Java	2013	30000
-dotNET	NULL	63000
-dotNET	NULL	63000
-dotNET	NULL	63000
-dotNET	NULL	63000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2012	15000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-dotNET	2013	48000
-
-
--- !query
-SELECT course, SUM(earnings) AS sum FROM courseSales
-GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
--- !query schema
-struct<course:string,sum:bigint>
--- !query output
-NULL	113000
-Java	20000
-Java	30000
-Java	50000
-dotNET	5000
-dotNET	10000
-dotNET	48000
-dotNET	63000
-
-
--- !query
-SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
-GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
--- !query schema
-struct<course:string,sum:bigint,grouping_id(course, earnings):bigint>
--- !query output
-NULL	113000	3
-Java	20000	0
-Java	30000	0
-Java	50000	1
-dotNET	5000	0
-dotNET	10000	0
-dotNET	48000	0
-dotNET	63000	1
-
-
--- !query
-SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
-GROUP BY CUBE(course, year)
--- !query schema
-struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):bigint>
--- !query output
-Java	2012	0	0	0
-Java	2013	0	0	0
-Java	NULL	0	1	1
-NULL	2012	1	0	2
-NULL	2013	1	0	2
-NULL	NULL	1	1	3
-dotNET	2012	0	0	0
-dotNET	2013	0	0	0
-dotNET	NULL	0	1	1
-
-
--- !query
-SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-grouping() can only be used with GroupingSets/Cube/Rollup
-
-
--- !query
-SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-grouping_id() can only be used with GroupingSets/Cube/Rollup
-
-
--- !query
-SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
--- !query schema
-struct<course:string,year:int,grouping__id:bigint>
--- !query output
-Java	2012	0
-Java	2013	0
-dotNET	2012	0
-dotNET	2013	0
-Java	NULL	1
-dotNET	NULL	1
-NULL	2012	2
-NULL	2013	2
-NULL	NULL	3
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
-HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year
--- !query schema
-struct<course:string,year:int>
--- !query output
-NULL	NULL
-Java	NULL
-dotNET	NULL
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
--- !query schema
-struct<course:string,year:int>
--- !query output
-Java	NULL
-NULL	2012
-NULL	2013
-NULL	NULL
-dotNET	NULL
-
-
--- !query
-SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
-ORDER BY GROUPING(course), GROUPING(year), course, year
--- !query schema
-struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
--- !query output
-Java	2012	0	0
-Java	2013	0	0
-dotNET	2012	0	0
-dotNET	2013	0	0
-Java	NULL	0	1
-dotNET	NULL	0	1
-NULL	2012	1	0
-NULL	2013	1	0
-NULL	NULL	1	1
-
-
--- !query
-SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
-ORDER BY GROUPING(course), GROUPING(year), course, year
--- !query schema
-struct<course:string,year:int,grouping_id(course, year):bigint>
--- !query output
-Java	2012	0
-Java	2013	0
-dotNET	2012	0
-dotNET	2013	0
-Java	NULL	1
-dotNET	NULL	1
-NULL	2012	2
-NULL	2013	2
-NULL	NULL	3
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
-
-
--- !query
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
--- !query schema
-struct<course:string,year:int>
--- !query output
-Java	2012
-Java	2013
-dotNET	2012
-dotNET	2013
-Java	NULL
-dotNET	NULL
-NULL	2012
-NULL	2013
-NULL	NULL
-
-
--- !query
-SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'k1' given input columns: [testdata.a, testdata.b]; line 1 pos 68
-
-
--- !query
-SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 63
-
-
--- !query
-SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 79
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, b, CUBE(a, b)
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, b, ROLLUP(a, b)
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b)
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-3	NULL	2
-NULL	1	3
-NULL	2	3
-NULL	NULL	6
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b)
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ())
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	2	1
-1	NULL	2
-1	NULL	2
-2	1	1
-2	2	1
-2	NULL	2
-2	NULL	2
-3	1	1
-3	2	1
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ())
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ())
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	2	1
-1	NULL	2
-1	NULL	2
-2	1	1
-2	2	1
-2	NULL	2
-2	NULL	2
-3	1	1
-3	2	1
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	2	1
-1	NULL	2
-1	NULL	2
-2	1	1
-2	2	1
-2	NULL	2
-2	NULL	2
-3	1	1
-3	2	1
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()))
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2
-
-
--- !query
-SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ())
--- !query schema
-struct<a:int,b:int,count(1):bigint>
--- !query output
-1	1	1
-1	1	1
-1	1	1
-1	2	1
-1	2	1
-1	2	1
-1	NULL	2
-1	NULL	2
-1	NULL	2
-1	NULL	2
-2	1	1
-2	1	1
-2	1	1
-2	2	1
-2	2	1
-2	2	1
-2	NULL	2
-2	NULL	2
-2	NULL	2
-2	NULL	2
-3	1	1
-3	1	1
-3	1	1
-3	2	1
-3	2	1
-3	2	1
-3	NULL	2
-3	NULL	2
-3	NULL	2
-3	NULL	2

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org