You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/09/19 10:53:37 UTC

[spark] branch master updated: [SPARK-40419][SQL][TESTS] Integrate Grouped Aggregate Pandas UDFs into *.sql test cases

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new dbb90e92282 [SPARK-40419][SQL][TESTS] Integrate Grouped Aggregate Pandas UDFs into *.sql test cases
dbb90e92282 is described below

commit dbb90e922821acab0e308c4b735d718c7e55207a
Author: itholic <ha...@databricks.com>
AuthorDate: Mon Sep 19 19:53:19 2022 +0900

    [SPARK-40419][SQL][TESTS] Integrate Grouped Aggregate Pandas UDFs into *.sql test cases
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to integrate Grouped Aggregate Pandas UDF tests into *.sql test cases.
    
    This PR includes the fixes below:
    - Add `UDAFTestCase` into `SQLQueryTestSuite.scala` to test the UDAF related functions in sql.
    - Add `udaf` directory and create related sql test cases into this directory.
    - Generate golden files for new added sql test files.
    - Skip from `ThriftServerQueryTestSuite.scala` for now.
    - Fix minor typos.
    
    ### Why are the changes needed?
    
    To improve the test coverage, so prevent the potential bug in the future.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, it's test-only.
    
    ### How was this patch tested?
    
    Added sql test files and corresponding golden files.
    
    Closes #37873 from itholic/SPARK-40419.
    
    Authored-by: itholic <ha...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../sql-tests/inputs/udaf/udaf-group-analytics.sql |   70 ++
 .../inputs/udaf/udaf-group-by-ordinal.sql          |   88 ++
 .../sql-tests/inputs/udaf/udaf-group-by.sql        |  110 ++
 .../sql-tests/inputs/udaf/udaf-grouping-set.sql    |   47 +
 .../resources/sql-tests/inputs/{ => udaf}/udaf.sql |    0
 .../results/udaf/udaf-group-analytics.sql.out      | 1150 ++++++++++++++++++++
 .../results/udaf/udaf-group-by-ordinal.sql.out     |  405 +++++++
 .../sql-tests/results/udaf/udaf-group-by.sql.out   |  411 +++++++
 .../results/udaf/udaf-grouping-set.sql.out         |  175 +++
 .../resources/sql-tests/results/udaf/udaf.sql.out  |   67 ++
 .../org/apache/spark/sql/SQLQueryTestSuite.scala   |   17 +
 .../thriftserver/ThriftServerQueryTestSuite.scala  |    3 +
 12 files changed, 2543 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql
new file mode 100644
index 00000000000..0249d98b6be
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql
@@ -0,0 +1,70 @@
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b);
+
+-- CUBE on overlapping columns
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE;
+
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE;
+
+-- ROLLUP on overlapping columns
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
+
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year;
+
+-- CUBE
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year;
+
+-- GROUPING SETS
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ());
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course);
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
+
+-- Partial ROLLUP/CUBE/GROUPING SETS
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year;
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, udaf(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k);
+
+-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ());
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ());
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ());
+
+-- Support nested CUBE/ROLLUP/GROUPING SETS in GROUPING SETS
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()));
+
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)));
+
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ());
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql
new file mode 100644
index 00000000000..ded3e74d20a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql
@@ -0,0 +1,88 @@
+-- group by ordinal positions
+
+create temporary view data as select * from values
+  (1, 1),
+  (1, 2),
+  (2, 1),
+  (2, 2),
+  (3, 1),
+  (3, 2)
+  as data(a, b);
+
+-- basic case
+select a, udaf(b) from data group by 1;
+
+-- constant case
+select 1, 2, udaf(b) from data group by 1, 2;
+
+-- duplicate group by column
+select a, 1, udaf(b) from data group by a, 1;
+select a, 1, udaf(b) from data group by 1, 2;
+
+-- group by a non-aggregate expression's ordinal
+select a, b + 2, udaf(2) from data group by a, 2;
+
+-- with alias
+select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2;
+
+-- foldable non-literal: this should be the same as no grouping.
+select udaf(b) from data group by 1 + 0;
+
+-- negative case: position is an aggregate expression
+select a, b, udaf(b) from data group by 3;
+select a, b, udaf(b) + 2 from data group by 3;
+
+-- negative case: nondeterministic expression
+select a, rand(0), udaf(b)
+from 
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2;
+
+-- group by ordinal followed by order by
+select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1;
+
+-- group by ordinal followed by having
+select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0;
+
+-- mixed cases: group-by ordinals and aliases
+select a, a AS k, udaf(b) from data group by k, 1;
+
+-- can use ordinal in CUBE
+select a, b, udaf(1) from data group by cube(1, 2);
+
+-- mixed cases: can use ordinal in CUBE
+select a, b, udaf(1) from data group by cube(1, b);
+
+-- can use ordinal with cube
+select a, b, udaf(1) from data group by 1, 2 with cube;
+
+-- can use ordinal in ROLLUP
+select a, b, udaf(1) from data group by rollup(1, 2);
+
+-- mixed cases: can use ordinal in ROLLUP
+select a, b, udaf(1) from data group by rollup(1, b);
+
+-- can use ordinal with rollup
+select a, b, udaf(1) from data group by 1, 2 with rollup;
+
+-- can use ordinal in GROUPING SETS
+select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2));
+
+-- mixed cases: can use ordinal in GROUPING SETS
+select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2));
+
+select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2));
+
+-- range error
+select a, b, udaf(1) from data group by a, -1;
+
+select a, b, udaf(1) from data group by a, 3;
+
+select a, b, udaf(1) from data group by cube(-1, 2);
+
+select a, b, udaf(1) from data group by cube(1, 3);
+
+-- turn off group by ordinal
+set spark.sql.groupByOrdinal=false;
+
+-- can now group by negative literal
+select udaf(b) from data group by -1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
new file mode 100644
index 00000000000..eaac13bcf6a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
@@ -0,0 +1,110 @@
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- Aggregate with empty GroupBy expressions.
+SELECT a, udaf(b) FROM testData;
+SELECT udaf(a), udaf(b) FROM testData;
+
+-- Aggregate with non-empty GroupBy expressions.
+SELECT a, udaf(b) FROM testData GROUP BY a;
+SELECT a, udaf(b) FROM testData GROUP BY b;
+SELECT udaf(a), udaf(b) FROM testData GROUP BY a;
+
+-- Aggregate grouped by literals.
+SELECT 'foo', udaf(a) FROM testData GROUP BY 1;
+
+-- Aggregate grouped by literals (hash aggregate).
+SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (sort aggregate).
+SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with complex GroupBy expressions.
+SELECT a + b, udaf(b) FROM testData GROUP BY a + b;
+SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1;
+
+-- Aggregate with nulls.
+SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
+FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, udaf(b) FROM testData GROUP BY k;
+SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1;
+
+-- GROUP BY alias with invalid col in SELECT list
+SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, udaf(b) FROM testData GROUP BY k;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, udaf(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT udaf(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+  SELECT 1 AS z,
+  udaf(a.x)
+  FROM (select 1 as x) a
+  WHERE false
+) b
+where b.z != b.z;
+
+-- SPARK-25708 HAVING without GROUP BY means global aggregate
+SELECT 1 FROM range(10) HAVING udaf(id) > 0;
+
+-- Test data
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+  (1, true), (1, false),
+  (2, true),
+  (3, false), (3, null),
+  (4, null), (4, null),
+  (5, null), (5, true), (5, false) AS test_agg(k, v);
+
+-- empty table
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
+
+-- all null values
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
+
+-- aggregates are null Filtering
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
+
+-- group by
+SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
+
+-- having
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false;
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+       udaf(v) AS count
+FROM   test_agg
+WHERE  k = 2
+       AND v IN (SELECT Any(v)
+                 FROM   test_agg
+                 WHERE  k = 1)
+GROUP  BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql
new file mode 100644
index 00000000000..1217b9e5b09
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql
@@ -0,0 +1,47 @@
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d);
+
+-- SPARK-17849: grouping set throws NPE #1
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
+
+-- SPARK-17849: grouping set throws NPE #2
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
+
+-- SPARK-17849: grouping set throws NPE #3
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
+
+-- Group sets without explicit group by
+SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);
+
+-- Group sets without group by and with grouping
+SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);
+
+-- Mutiple grouping within a grouping set
+SELECT c1, c2, udaf(c3), grouping__id
+FROM   (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
+GROUP  BY GROUPING SETS ( ( c1 ), ( c2 ) )
+HAVING GROUPING__ID > 1;
+
+-- complex expression in grouping sets
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b));
+
+-- complex expression in grouping sets
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b));
+
+-- negative tests - must have at least one grouping expression
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP;
+
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE;
+
+-- duplicate entries in grouping sets
+SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+-- grouping_id function
+SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/udaf.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out
new file mode 100644
index 00000000000..4b16407e1a0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out
@@ -0,0 +1,1150 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE
+-- !query schema
+struct<(a + b):int,b:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	0
+3	1	1
+3	2	1
+3	NULL	0
+4	1	1
+4	2	1
+4	NULL	0
+5	2	1
+5	NULL	0
+NULL	1	3
+NULL	2	3
+NULL	NULL	0
+
+
+-- !query
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE
+-- !query schema
+struct<a:int,b:int,udaf(b):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	0
+2	1	1
+2	2	1
+2	NULL	0
+3	1	1
+3	2	1
+3	NULL	0
+NULL	1	3
+NULL	2	3
+NULL	NULL	0
+
+
+-- !query
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
+-- !query schema
+struct<(a + b):int,b:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	0
+3	1	1
+3	2	1
+3	NULL	0
+4	1	1
+4	2	1
+4	NULL	0
+5	2	1
+5	NULL	0
+NULL	NULL	0
+
+
+-- !query
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP
+-- !query schema
+struct<a:int,b:int,udaf(b):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	0
+2	1	1
+2	2	1
+2	NULL	0
+3	1	1
+3	2	1
+3	NULL	0
+NULL	NULL	0
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+Java	NULL	2
+Java	2012	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Empty set in ROLLUP grouping sets is not supported.(line 1, pos 62)
+
+== SQL ==
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
+--------------------------------------------------------------^^^
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+NULL	2012	3
+NULL	2013	2
+Java	NULL	2
+Java	2012	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+NULL	2012	3
+NULL	2013	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Empty set in CUBE grouping sets is not supported.(line 1, pos 62)
+
+== SQL ==
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
+--------------------------------------------------------------^^^
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+NULL	2012	3
+NULL	2013	2
+dotNET	NULL	3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ())
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+NULL	2012	3
+NULL	2013	2
+NULL	NULL	5
+dotNET	NULL	3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+dotNET	NULL	3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	2012	3
+NULL	2013	2
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+NULL	2012	3
+NULL	2013	2
+Java	NULL	2
+Java	NULL	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	2012	3
+NULL	2012	3
+NULL	2013	2
+NULL	2013	2
+Java	NULL	2
+Java	NULL	2
+Java	NULL	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, udaf(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
+-- !query schema
+struct<course:string,sum:int>
+-- !query output
+NULL	0
+Java	0
+Java	1
+Java	1
+dotNET	0
+dotNET	1
+dotNET	1
+dotNET	1
+
+
+-- !query
+SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
+-- !query schema
+struct<course:string,sum:int,grouping_id(course, earnings):bigint>
+-- !query output
+NULL	0	3
+Java	0	1
+Java	1	0
+Java	1	0
+dotNET	0	1
+dotNET	1	0
+dotNET	1	0
+dotNET	1	0
+
+
+-- !query
+SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2)
+-- !query schema
+struct<k1:int,k2:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	1
+3	1	1
+3	2	1
+3	NULL	2
+4	1	1
+4	2	1
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b)
+-- !query schema
+struct<k:int,b:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	0
+3	1	1
+3	2	1
+3	NULL	0
+4	1	1
+4	2	1
+4	NULL	0
+5	2	1
+5	NULL	0
+NULL	NULL	0
+
+
+-- !query
+SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+-- !query schema
+struct<(a + b):int,k:int,udaf((a - b)):int>
+-- !query output
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
new file mode 100644
index 00000000000..d6e3f111001
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
@@ -0,0 +1,405 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view data as select * from values
+  (1, 1),
+  (1, 2),
+  (2, 1),
+  (2, 2),
+  (3, 1),
+  (3, 2)
+  as data(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select a, udaf(b) from data group by 1
+-- !query schema
+struct<a:int,udaf(b):int>
+-- !query output
+1	2
+2	2
+3	2
+
+
+-- !query
+select 1, 2, udaf(b) from data group by 1, 2
+-- !query schema
+struct<1:int,2:int,udaf(b):int>
+-- !query output
+1	2	6
+
+
+-- !query
+select a, 1, udaf(b) from data group by a, 1
+-- !query schema
+struct<a:int,1:int,udaf(b):int>
+-- !query output
+1	1	2
+2	1	2
+3	1	2
+
+
+-- !query
+select a, 1, udaf(b) from data group by 1, 2
+-- !query schema
+struct<a:int,1:int,udaf(b):int>
+-- !query output
+1	1	2
+2	1	2
+3	1	2
+
+
+-- !query
+select a, b + 2, udaf(2) from data group by a, 2
+-- !query schema
+struct<a:int,(b + 2):int,udaf(2):int>
+-- !query output
+1	3	1
+1	4	1
+2	3	1
+2	4	1
+3	3	1
+3	4	1
+
+
+-- !query
+select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2
+-- !query schema
+struct<aa:int,bb:int,udaf(2):int>
+-- !query output
+1	3	1
+1	4	1
+2	3	1
+2	4	1
+3	3	1
+3	4	1
+
+
+-- !query
+select udaf(b) from data group by 1 + 0
+-- !query schema
+struct<udaf(b):int>
+-- !query output
+6
+
+
+-- !query
+select a, b, udaf(b) from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "expression" : "\"a\""
+  }
+}
+
+
+-- !query
+select a, b, udaf(b) + 2 from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "expression" : "\"a\""
+  }
+}
+
+
+-- !query
+select a, rand(0), udaf(b)
+from 
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2
+-- !query schema
+struct<a:int,rand(0):double,udaf(b):int>
+-- !query output
+1	0.5234194256885571	1
+1	0.7604953758285915	1
+2	0.0953472826424725	1
+2	0.3163249920547614	1
+3	0.2710259815484829	1
+3	0.7141011170991605	1
+
+
+-- !query
+select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1
+-- !query schema
+struct<a:int,udaf(a):int>
+-- !query output
+1	1
+
+
+-- !query
+select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0
+-- !query schema
+struct<udaf(a):int,a:int>
+-- !query output
+1	1
+
+
+-- !query
+select a, a AS k, udaf(b) from data group by k, 1
+-- !query schema
+struct<a:int,k:int,udaf(b):int>
+-- !query output
+1	1	2
+2	2	2
+3	3	2
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, 2)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by 1, 2 with cube
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by rollup(1, 2)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by rollup(1, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by 1, 2 with rollup
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+select a, b, udaf(1) from data group by a, -1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 45,
+    "fragment" : "-1"
+  } ]
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by a, 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "expression" : "\"b\""
+  }
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(-1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 46,
+    "stopIndex" : 47,
+    "fragment" : "-1"
+  } ]
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, 3)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'data.a' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'data.a' in first() (or first_value) if you don't care which value you get.
+
+
+-- !query
+set spark.sql.groupByOrdinal=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByOrdinal	false
+
+
+-- !query
+select udaf(b) from data group by -1
+-- !query schema
+struct<udaf(b):int>
+-- !query output
+6
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
new file mode 100644
index 00000000000..bb98d244faa
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
@@ -0,0 +1,411 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, udaf(b) FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get.
+
+
+-- !query
+SELECT udaf(a), udaf(b) FROM testData
+-- !query schema
+struct<udaf(a):int,udaf(b):int>
+-- !query output
+7	7
+
+
+-- !query
+SELECT a, udaf(b) FROM testData GROUP BY a
+-- !query schema
+struct<a:int,udaf(b):int>
+-- !query output
+1	2
+2	2
+3	2
+NULL	1
+
+
+-- !query
+SELECT a, udaf(b) FROM testData GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "expression" : "\"a\""
+  }
+}
+
+
+-- !query
+SELECT udaf(a), udaf(b) FROM testData GROUP BY a
+-- !query schema
+struct<udaf(a):int,udaf(b):int>
+-- !query output
+0	1
+2	2
+2	2
+3	2
+
+
+-- !query
+SELECT 'foo', udaf(a) FROM testData GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(a):int>
+-- !query output
+foo	7
+
+
+-- !query
+SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(a):int>
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(struct(a, a)):int>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, udaf(b) FROM testData GROUP BY a + b
+-- !query schema
+struct<(a + b):int,udaf(b):int>
+-- !query output
+2	1
+3	2
+4	2
+5	1
+NULL	1
+
+
+-- !query
+SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "expression" : "\"a\""
+  }
+}
+
+
+-- !query
+SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<((a + 1) + 1):int,udaf(b):int>
+-- !query output
+3	2
+4	2
+5	2
+NULL	1
+
+
+-- !query
+SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
+FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Function pythonudf does not support DISTINCT; line 1 pos 7
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k
+-- !query schema
+struct<k:int,udaf(b):int>
+-- !query output
+1	2
+2	2
+3	2
+NULL	1
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1
+-- !query schema
+struct<k:int,udaf(b):int>
+-- !query output
+2	2
+3	2
+
+
+-- !query
+SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN",
+  "errorSubClass" : "WITH_SUGGESTION",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "objectName" : "`non_existing`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 32,
+    "fragment" : "non_existing"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "expression" : "\"k\""
+  }
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByAliases	false
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN",
+  "errorSubClass" : "WITH_SUGGESTION",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "objectName" : "`k`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 47,
+    "fragment" : "k"
+  } ]
+}
+
+
+-- !query
+SELECT a, udaf(1) FROM testData WHERE false GROUP BY a
+-- !query schema
+struct<a:int,udaf(1):int>
+-- !query output
+
+
+
+-- !query
+SELECT udaf(1) FROM testData WHERE false
+-- !query schema
+struct<udaf(1):int>
+-- !query output
+
+
+
+-- !query
+SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT 1 from (
+  SELECT 1 AS z,
+  udaf(a.x)
+  FROM (select 1 as x) a
+  WHERE false
+) b
+where b.z != b.z
+-- !query schema
+struct<1:int>
+-- !query output
+
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING udaf(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN",
+  "errorSubClass" : "WITH_SUGGESTION",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "objectName" : "`id`",
+    "proposal" : "`1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 38,
+    "fragment" : "id"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+  (1, true), (1, false),
+  (2, true),
+  (3, false), (3, null),
+  (4, null), (4, null),
+  (5, null), (5, true), (5, false) AS test_agg(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false
+-- !query schema
+struct<k:int,udaf(v):int>
+-- !query output
+4	0
+
+
+-- !query
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL
+-- !query schema
+struct<k:int,udaf(v):int>
+-- !query output
+
+
+
+-- !query
+SELECT k,
+       udaf(v) AS count
+FROM   test_agg
+WHERE  k = 2
+       AND v IN (SELECT Any(v)
+                 FROM   test_agg
+                 WHERE  k = 1)
+GROUP  BY k
+-- !query schema
+struct<k:int,count:int>
+-- !query output
+2	1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out
new file mode 100644
index 00000000000..c9b27dca0c0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out
@@ -0,0 +1,175 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+NULL	NULL	NULL	3
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+1	NULL	NULL	1
+4	NULL	NULL	1
+7	NULL	NULL	1
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+NULL	NULL	3	1
+NULL	NULL	6	1
+NULL	NULL	9	1
+
+
+-- !query
+SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
+-- !query schema
+struct<c1:string,udaf(c2):int>
+-- !query output
+x	1
+y	1
+
+
+-- !query
+SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
+-- !query schema
+struct<c1:string,udaf(c2):int,grouping(c1):tinyint>
+-- !query output
+x	1	0
+y	1	0
+
+
+-- !query
+SELECT c1, c2, udaf(c3), grouping__id
+FROM   (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
+GROUP  BY GROUPING SETS ( ( c1 ), ( c2 ) )
+HAVING GROUPING__ID > 1
+-- !query schema
+struct<c1:string,c2:string,udaf(c3):int,grouping__id:bigint>
+-- !query output
+NULL	a	1	2
+NULL	b	1	2
+
+
+-- !query
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b))
+-- !query schema
+struct<(a + b):int,b:int,udaf(c):int>
+-- !query output
+2	NULL	1
+4	NULL	1
+NULL	1	1
+NULL	2	1
+
+
+-- !query
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b))
+-- !query schema
+struct<(a + b):int,b:int,udaf(c):int>
+-- !query output
+2	NULL	1
+2	NULL	1
+4	NULL	1
+4	NULL	1
+NULL	1	1
+NULL	2	1
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "error" : "'ROLLUP'",
+    "hint" : ": extra input 'ROLLUP'"
+  }
+}
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "error" : "'CUBE'",
+    "hint" : ": extra input 'CUBE'"
+  }
+}
+
+
+-- !query
+SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<k1:int,k2:int,udaf(v):int>
+-- !query output
+1	1	1
+1	1	1
+1	NULL	1
+2	2	1
+2	2	1
+2	NULL	1
+
+
+-- !query
+SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping__id:bigint,k1:int,k2:int,udaf(v):int>
+-- !query output
+0	1	1	1
+0	1	1	1
+0	2	2	1
+0	2	2	1
+1	1	NULL	1
+1	2	NULL	1
+
+
+-- !query
+SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping(k1):tinyint,k1:int,k2:int,udaf(v):int>
+-- !query output
+0	1	1	1
+0	1	1	1
+0	1	NULL	1
+0	2	2	1
+0	2	2	1
+0	2	NULL	1
+
+
+-- !query
+SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1)
+-- !query schema
+struct<grouping_id(k1, k2):bigint,udaf(v):int>
+-- !query output
+0	1
+0	1
+1	1
+1	1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out
new file mode 100644
index 00000000000..0605af1c808
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out
@@ -0,0 +1,67 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
+(1), (2), (3), (4)
+as t1(int_col1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT default.myDoubleAvg(int_col1) as my_avg from t1
+-- !query schema
+struct<my_avg:double>
+-- !query output
+102.5
+
+
+-- !query
+SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function spark_catalog.default.mydoubleavg. Expected: 1; Found: 2; line 1 pos 7
+
+
+-- !query
+CREATE FUNCTION udaf1 AS 'test.non.existent.udaf'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT default.udaf1(int_col1) as udaf1 from t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Can not load class 'test.non.existent.udaf' when registering the function 'spark_catalog.default.udaf1', please make sure it is on the classpath; line 1 pos 7
+
+
+-- !query
+DROP FUNCTION myDoubleAvg
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION udaf1
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index cca9bb6741f..5af7fa3898d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -216,6 +216,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       resultFile: String,
       udf: TestUDF) extends TestCase with UDFTest
 
+  /** A UDAF test case. */
+  protected case class UDAFTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest
+
   /** A UDF PostgreSQL test case. */
   protected case class UDFPgSQLTestCase(
       name: String,
@@ -436,6 +443,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
           if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestScalarPandasUDFs =>
         s"${testCase.name}${System.lineSeparator()}" +
           s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestGroupedAggPandasUDF] &&
+            shouldTestGroupedAggPandasUDFs =>
+        s"${testCase.name}${System.lineSeparator()}" +
+          s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
       case _ =>
         s"${testCase.name}${System.lineSeparator()}"
     }
@@ -495,6 +507,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
           UDFTestCase(
             s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
         }
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) {
+        Seq(TestGroupedAggPandasUDF("udaf")).map { udf =>
+          UDAFTestCase(
+            s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
+        }
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 69e01cef5ab..b850ffccd4e 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -65,6 +65,7 @@ import org.apache.spark.sql.types._
  *   1. Support UDF testing.
  *   2. Support DESC command.
  *   3. Support SHOW command.
+ *   4. Support UDAF testing.
  */
 // scalastyle:on line.size.limit
 class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer {
@@ -247,6 +248,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
 
       if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
         Seq.empty
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) {
+        Seq.empty
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org