You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/09/19 10:53:37 UTC
[spark] branch master updated: [SPARK-40419][SQL][TESTS] Integrate Grouped Aggregate Pandas UDFs into *.sql test cases
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new dbb90e92282 [SPARK-40419][SQL][TESTS] Integrate Grouped Aggregate Pandas UDFs into *.sql test cases
dbb90e92282 is described below
commit dbb90e922821acab0e308c4b735d718c7e55207a
Author: itholic <ha...@databricks.com>
AuthorDate: Mon Sep 19 19:53:19 2022 +0900
[SPARK-40419][SQL][TESTS] Integrate Grouped Aggregate Pandas UDFs into *.sql test cases
### What changes were proposed in this pull request?
This PR proposes to integrate Grouped Aggregate Pandas UDF tests into *.sql test cases.
This PR includes the fixes below:
- Add `UDAFTestCase` into `SQLQueryTestSuite.scala` to test the UDAF related functions in sql.
- Add `udaf` directory and create related sql test cases into this directory.
- Generate golden files for new added sql test files.
- Skip from `ThriftServerQueryTestSuite.scala` for now.
- Fix minor typos.
### Why are the changes needed?
To improve the test coverage, so prevent the potential bug in the future.
### Does this PR introduce _any_ user-facing change?
No, it's test-only.
### How was this patch tested?
Added sql test files and corresponding golden files.
Closes #37873 from itholic/SPARK-40419.
Authored-by: itholic <ha...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../sql-tests/inputs/udaf/udaf-group-analytics.sql | 70 ++
.../inputs/udaf/udaf-group-by-ordinal.sql | 88 ++
.../sql-tests/inputs/udaf/udaf-group-by.sql | 110 ++
.../sql-tests/inputs/udaf/udaf-grouping-set.sql | 47 +
.../resources/sql-tests/inputs/{ => udaf}/udaf.sql | 0
.../results/udaf/udaf-group-analytics.sql.out | 1150 ++++++++++++++++++++
.../results/udaf/udaf-group-by-ordinal.sql.out | 405 +++++++
.../sql-tests/results/udaf/udaf-group-by.sql.out | 411 +++++++
.../results/udaf/udaf-grouping-set.sql.out | 175 +++
.../resources/sql-tests/results/udaf/udaf.sql.out | 67 ++
.../org/apache/spark/sql/SQLQueryTestSuite.scala | 17 +
.../thriftserver/ThriftServerQueryTestSuite.scala | 3 +
12 files changed, 2543 insertions(+)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql
new file mode 100644
index 00000000000..0249d98b6be
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql
@@ -0,0 +1,70 @@
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b);
+
+-- CUBE on overlapping columns
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE;
+
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE;
+
+-- ROLLUP on overlapping columns
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
+
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year;
+
+-- CUBE
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year;
+
+-- GROUPING SETS
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ());
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course);
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
+
+-- Partial ROLLUP/CUBE/GROUPING SETS
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year;
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, udaf(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k);
+
+-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ());
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ());
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ());
+
+-- Support nested CUBE/ROLLUP/GROUPING SETS in GROUPING SETS
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()));
+
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)));
+
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ());
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql
new file mode 100644
index 00000000000..ded3e74d20a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql
@@ -0,0 +1,88 @@
+-- group by ordinal positions
+
+create temporary view data as select * from values
+ (1, 1),
+ (1, 2),
+ (2, 1),
+ (2, 2),
+ (3, 1),
+ (3, 2)
+ as data(a, b);
+
+-- basic case
+select a, udaf(b) from data group by 1;
+
+-- constant case
+select 1, 2, udaf(b) from data group by 1, 2;
+
+-- duplicate group by column
+select a, 1, udaf(b) from data group by a, 1;
+select a, 1, udaf(b) from data group by 1, 2;
+
+-- group by a non-aggregate expression's ordinal
+select a, b + 2, udaf(2) from data group by a, 2;
+
+-- with alias
+select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2;
+
+-- foldable non-literal: this should be the same as no grouping.
+select udaf(b) from data group by 1 + 0;
+
+-- negative case: position is an aggregate expression
+select a, b, udaf(b) from data group by 3;
+select a, b, udaf(b) + 2 from data group by 3;
+
+-- negative case: nondeterministic expression
+select a, rand(0), udaf(b)
+from
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2;
+
+-- group by ordinal followed by order by
+select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1;
+
+-- group by ordinal followed by having
+select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0;
+
+-- mixed cases: group-by ordinals and aliases
+select a, a AS k, udaf(b) from data group by k, 1;
+
+-- can use ordinal in CUBE
+select a, b, udaf(1) from data group by cube(1, 2);
+
+-- mixed cases: can use ordinal in CUBE
+select a, b, udaf(1) from data group by cube(1, b);
+
+-- can use ordinal with cube
+select a, b, udaf(1) from data group by 1, 2 with cube;
+
+-- can use ordinal in ROLLUP
+select a, b, udaf(1) from data group by rollup(1, 2);
+
+-- mixed cases: can use ordinal in ROLLUP
+select a, b, udaf(1) from data group by rollup(1, b);
+
+-- can use ordinal with rollup
+select a, b, udaf(1) from data group by 1, 2 with rollup;
+
+-- can use ordinal in GROUPING SETS
+select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2));
+
+-- mixed cases: can use ordinal in GROUPING SETS
+select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2));
+
+select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2));
+
+-- range error
+select a, b, udaf(1) from data group by a, -1;
+
+select a, b, udaf(1) from data group by a, 3;
+
+select a, b, udaf(1) from data group by cube(-1, 2);
+
+select a, b, udaf(1) from data group by cube(1, 3);
+
+-- turn off group by ordinal
+set spark.sql.groupByOrdinal=false;
+
+-- can now group by negative literal
+select udaf(b) from data group by -1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
new file mode 100644
index 00000000000..eaac13bcf6a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
@@ -0,0 +1,110 @@
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- Aggregate with empty GroupBy expressions.
+SELECT a, udaf(b) FROM testData;
+SELECT udaf(a), udaf(b) FROM testData;
+
+-- Aggregate with non-empty GroupBy expressions.
+SELECT a, udaf(b) FROM testData GROUP BY a;
+SELECT a, udaf(b) FROM testData GROUP BY b;
+SELECT udaf(a), udaf(b) FROM testData GROUP BY a;
+
+-- Aggregate grouped by literals.
+SELECT 'foo', udaf(a) FROM testData GROUP BY 1;
+
+-- Aggregate grouped by literals (hash aggregate).
+SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (sort aggregate).
+SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with complex GroupBy expressions.
+SELECT a + b, udaf(b) FROM testData GROUP BY a + b;
+SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1;
+
+-- Aggregate with nulls.
+SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
+FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, udaf(b) FROM testData GROUP BY k;
+SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1;
+
+-- GROUP BY alias with invalid col in SELECT list
+SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, udaf(b) FROM testData GROUP BY k;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, udaf(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT udaf(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+ SELECT 1 AS z,
+ udaf(a.x)
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z;
+
+-- SPARK-25708 HAVING without GROUP BY means global aggregate
+SELECT 1 FROM range(10) HAVING udaf(id) > 0;
+
+-- Test data
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v);
+
+-- empty table
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
+
+-- all null values
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
+
+-- aggregates are null Filtering
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
+
+-- group by
+SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
+
+-- having
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false;
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+ udaf(v) AS count
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql
new file mode 100644
index 00000000000..1217b9e5b09
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql
@@ -0,0 +1,47 @@
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+ ("1", "2", "3", 1),
+ ("4", "5", "6", 1),
+ ("7", "8", "9", 1)
+ as grouping(a, b, c, d);
+
+-- SPARK-17849: grouping set throws NPE #1
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
+
+-- SPARK-17849: grouping set throws NPE #2
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
+
+-- SPARK-17849: grouping set throws NPE #3
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
+
+-- Group sets without explicit group by
+SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);
+
+-- Group sets without group by and with grouping
+SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);
+
+-- Mutiple grouping within a grouping set
+SELECT c1, c2, udaf(c3), grouping__id
+FROM (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
+GROUP BY GROUPING SETS ( ( c1 ), ( c2 ) )
+HAVING GROUPING__ID > 1;
+
+-- complex expression in grouping sets
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b));
+
+-- complex expression in grouping sets
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b));
+
+-- negative tests - must have at least one grouping expression
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP;
+
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE;
+
+-- duplicate entries in grouping sets
+SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+-- grouping_id function
+SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/udaf.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out
new file mode 100644
index 00000000000..4b16407e1a0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out
@@ -0,0 +1,1150 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE
+-- !query schema
+struct<(a + b):int,b:int,udaf((a - b)):int>
+-- !query output
+2 1 1
+2 NULL 0
+3 1 1
+3 2 1
+3 NULL 0
+4 1 1
+4 2 1
+4 NULL 0
+5 2 1
+5 NULL 0
+NULL 1 3
+NULL 2 3
+NULL NULL 0
+
+
+-- !query
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE
+-- !query schema
+struct<a:int,b:int,udaf(b):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 0
+2 1 1
+2 2 1
+2 NULL 0
+3 1 1
+3 2 1
+3 NULL 0
+NULL 1 3
+NULL 2 3
+NULL NULL 0
+
+
+-- !query
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
+-- !query schema
+struct<(a + b):int,b:int,udaf((a - b)):int>
+-- !query output
+2 1 1
+2 NULL 0
+3 1 1
+3 2 1
+3 NULL 0
+4 1 1
+4 2 1
+4 NULL 0
+5 2 1
+5 NULL 0
+NULL NULL 0
+
+
+-- !query
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP
+-- !query schema
+struct<a:int,b:int,udaf(b):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 0
+2 1 1
+2 2 1
+2 NULL 0
+3 1 1
+3 2 1
+3 NULL 0
+NULL NULL 0
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL NULL 5
+Java NULL 2
+Java 2012 1
+Java 2013 1
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2013 1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL NULL 5
+Java NULL 2
+Java 2012 1
+Java 2012 1
+Java 2013 1
+Java 2013 1
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2013 1
+dotNET 2013 1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Empty set in ROLLUP grouping sets is not supported.(line 1, pos 62)
+
+== SQL ==
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
+--------------------------------------------------------------^^^
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL NULL 5
+NULL 2012 3
+NULL 2013 2
+Java NULL 2
+Java 2012 1
+Java 2013 1
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2013 1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL NULL 5
+NULL 2012 3
+NULL 2013 2
+Java NULL 2
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Empty set in CUBE grouping sets is not supported.(line 1, pos 62)
+
+== SQL ==
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
+--------------------------------------------------------------^^^
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java NULL 2
+NULL 2012 3
+NULL 2013 2
+dotNET NULL 3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ())
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java NULL 2
+NULL 2012 3
+NULL 2013 2
+NULL NULL 5
+dotNET NULL 3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java NULL 2
+dotNET NULL 3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL 2012 3
+NULL 2013 2
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java NULL 2
+Java NULL 2
+Java 2012 1
+Java 2012 1
+Java 2013 1
+Java 2013 1
+dotNET NULL 3
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2013 1
+dotNET 2013 1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL NULL 5
+NULL 2012 3
+NULL 2013 2
+Java NULL 2
+Java NULL 2
+Java NULL 2
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+dotNET NULL 3
+dotNET NULL 3
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL 2012 3
+NULL 2012 3
+NULL 2013 2
+NULL 2013 2
+Java NULL 2
+Java NULL 2
+Java NULL 2
+Java NULL 2
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2012 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+Java 2013 1
+dotNET NULL 3
+dotNET NULL 3
+dotNET NULL 3
+dotNET NULL 3
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2012 2
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+dotNET 2013 1
+
+
+-- !query
+SELECT course, udaf(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
+-- !query schema
+struct<course:string,sum:int>
+-- !query output
+NULL 0
+Java 0
+Java 1
+Java 1
+dotNET 0
+dotNET 1
+dotNET 1
+dotNET 1
+
+
+-- !query
+SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
+-- !query schema
+struct<course:string,sum:int,grouping_id(course, earnings):bigint>
+-- !query output
+NULL 0 3
+Java 0 1
+Java 1 0
+Java 1 0
+dotNET 0 1
+dotNET 1 0
+dotNET 1 0
+dotNET 1 0
+
+
+-- !query
+SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2)
+-- !query schema
+struct<k1:int,k2:int,udaf((a - b)):int>
+-- !query output
+2 1 1
+2 NULL 1
+3 1 1
+3 2 1
+3 NULL 2
+4 1 1
+4 2 1
+4 NULL 2
+5 2 1
+5 NULL 1
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b)
+-- !query schema
+struct<k:int,b:int,udaf((a - b)):int>
+-- !query output
+2 1 1
+2 NULL 0
+3 1 1
+3 2 1
+3 NULL 0
+4 1 1
+4 2 1
+4 NULL 0
+5 2 1
+5 NULL 0
+NULL NULL 0
+
+
+-- !query
+SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+-- !query schema
+struct<(a + b):int,k:int,udaf((a - b)):int>
+-- !query output
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 1 1
+1 1 1
+1 2 1
+1 2 1
+1 2 1
+1 NULL 2
+1 NULL 2
+1 NULL 2
+1 NULL 2
+2 1 1
+2 1 1
+2 1 1
+2 2 1
+2 2 1
+2 2 1
+2 NULL 2
+2 NULL 2
+2 NULL 2
+2 NULL 2
+3 1 1
+3 1 1
+3 1 1
+3 2 1
+3 2 1
+3 2 1
+3 NULL 2
+3 NULL 2
+3 NULL 2
+3 NULL 2
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
new file mode 100644
index 00000000000..d6e3f111001
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
@@ -0,0 +1,405 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view data as select * from values
+ (1, 1),
+ (1, 2),
+ (2, 1),
+ (2, 2),
+ (3, 1),
+ (3, 2)
+ as data(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select a, udaf(b) from data group by 1
+-- !query schema
+struct<a:int,udaf(b):int>
+-- !query output
+1 2
+2 2
+3 2
+
+
+-- !query
+select 1, 2, udaf(b) from data group by 1, 2
+-- !query schema
+struct<1:int,2:int,udaf(b):int>
+-- !query output
+1 2 6
+
+
+-- !query
+select a, 1, udaf(b) from data group by a, 1
+-- !query schema
+struct<a:int,1:int,udaf(b):int>
+-- !query output
+1 1 2
+2 1 2
+3 1 2
+
+
+-- !query
+select a, 1, udaf(b) from data group by 1, 2
+-- !query schema
+struct<a:int,1:int,udaf(b):int>
+-- !query output
+1 1 2
+2 1 2
+3 1 2
+
+
+-- !query
+select a, b + 2, udaf(2) from data group by a, 2
+-- !query schema
+struct<a:int,(b + 2):int,udaf(2):int>
+-- !query output
+1 3 1
+1 4 1
+2 3 1
+2 4 1
+3 3 1
+3 4 1
+
+
+-- !query
+select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2
+-- !query schema
+struct<aa:int,bb:int,udaf(2):int>
+-- !query output
+1 3 1
+1 4 1
+2 3 1
+2 4 1
+3 3 1
+3 4 1
+
+
+-- !query
+select udaf(b) from data group by 1 + 0
+-- !query schema
+struct<udaf(b):int>
+-- !query output
+6
+
+
+-- !query
+select a, b, udaf(b) from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "expression" : "\"a\""
+ }
+}
+
+
+-- !query
+select a, b, udaf(b) + 2 from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "expression" : "\"a\""
+ }
+}
+
+
+-- !query
+select a, rand(0), udaf(b)
+from
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2
+-- !query schema
+struct<a:int,rand(0):double,udaf(b):int>
+-- !query output
+1 0.5234194256885571 1
+1 0.7604953758285915 1
+2 0.0953472826424725 1
+2 0.3163249920547614 1
+3 0.2710259815484829 1
+3 0.7141011170991605 1
+
+
+-- !query
+select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1
+-- !query schema
+struct<a:int,udaf(a):int>
+-- !query output
+1 1
+
+
+-- !query
+select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0
+-- !query schema
+struct<udaf(a):int,a:int>
+-- !query output
+1 1
+
+
+-- !query
+select a, a AS k, udaf(b) from data group by k, 1
+-- !query schema
+struct<a:int,k:int,udaf(b):int>
+-- !query output
+1 1 2
+2 2 2
+3 3 2
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, 2)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+select a, b, udaf(1) from data group by 1, 2 with cube
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+NULL NULL 6
+
+
+-- !query
+select a, b, udaf(1) from data group by rollup(1, 2)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL NULL 6
+
+
+-- !query
+select a, b, udaf(1) from data group by rollup(1, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL NULL 6
+
+
+-- !query
+select a, b, udaf(1) from data group by 1, 2 with rollup
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL NULL 6
+
+
+-- !query
+select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1 1 1
+1 2 1
+1 NULL 2
+2 1 1
+2 2 1
+2 NULL 2
+3 1 1
+3 2 1
+3 NULL 2
+NULL 1 3
+NULL 2 3
+
+
+-- !query
+select a, b, udaf(1) from data group by a, -1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 44,
+ "stopIndex" : 45,
+ "fragment" : "-1"
+ } ]
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by a, 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "expression" : "\"b\""
+ }
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(-1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "index" : "-1",
+ "size" : "3"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 46,
+ "stopIndex" : 47,
+ "fragment" : "-1"
+ } ]
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, 3)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'data.a' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'data.a' in first() (or first_value) if you don't care which value you get.
+
+
+-- !query
+set spark.sql.groupByOrdinal=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByOrdinal false
+
+
+-- !query
+select udaf(b) from data group by -1
+-- !query schema
+struct<udaf(b):int>
+-- !query output
+6
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
new file mode 100644
index 00000000000..bb98d244faa
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
@@ -0,0 +1,411 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, udaf(b) FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get.
+
+
+-- !query
+SELECT udaf(a), udaf(b) FROM testData
+-- !query schema
+struct<udaf(a):int,udaf(b):int>
+-- !query output
+7 7
+
+
+-- !query
+SELECT a, udaf(b) FROM testData GROUP BY a
+-- !query schema
+struct<a:int,udaf(b):int>
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT a, udaf(b) FROM testData GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "expression" : "\"a\""
+ }
+}
+
+
+-- !query
+SELECT udaf(a), udaf(b) FROM testData GROUP BY a
+-- !query schema
+struct<udaf(a):int,udaf(b):int>
+-- !query output
+0 1
+2 2
+2 2
+3 2
+
+
+-- !query
+SELECT 'foo', udaf(a) FROM testData GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(a):int>
+-- !query output
+foo 7
+
+
+-- !query
+SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(a):int>
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(struct(a, a)):int>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, udaf(b) FROM testData GROUP BY a + b
+-- !query schema
+struct<(a + b):int,udaf(b):int>
+-- !query output
+2 1
+3 2
+4 2
+5 1
+NULL 1
+
+
+-- !query
+SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "expression" : "\"a\""
+ }
+}
+
+
+-- !query
+SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<((a + 1) + 1):int,udaf(b):int>
+-- !query output
+3 2
+4 2
+5 2
+NULL 1
+
+
+-- !query
+SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
+FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+ "messageParameters" : {
+ "functionList" : "`udaf`"
+ }
+}
+
+
+-- !query
+SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Function pythonudf does not support DISTINCT; line 1 pos 7
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k
+-- !query schema
+struct<k:int,udaf(b):int>
+-- !query output
+1 2
+2 2
+3 2
+NULL 1
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1
+-- !query schema
+struct<k:int,udaf(b):int>
+-- !query output
+2 2
+3 2
+
+
+-- !query
+SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN",
+ "errorSubClass" : "WITH_SUGGESTION",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "objectName" : "`non_existing`",
+ "proposal" : "`testdata`.`a`, `testdata`.`b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 21,
+ "stopIndex" : 32,
+ "fragment" : "non_existing"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "expression" : "\"k\""
+ }
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByAliases false
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN",
+ "errorSubClass" : "WITH_SUGGESTION",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "objectName" : "`k`",
+ "proposal" : "`testdata`.`a`, `testdata`.`b`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 47,
+ "stopIndex" : 47,
+ "fragment" : "k"
+ } ]
+}
+
+
+-- !query
+SELECT a, udaf(1) FROM testData WHERE false GROUP BY a
+-- !query schema
+struct<a:int,udaf(1):int>
+-- !query output
+
+
+
+-- !query
+SELECT udaf(1) FROM testData WHERE false
+-- !query schema
+struct<udaf(1):int>
+-- !query output
+
+
+
+-- !query
+SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT 1 from (
+ SELECT 1 AS z,
+ udaf(a.x)
+ FROM (select 1 as x) a
+ WHERE false
+) b
+where b.z != b.z
+-- !query schema
+struct<1:int>
+-- !query output
+
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING udaf(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN",
+ "errorSubClass" : "WITH_SUGGESTION",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "objectName" : "`id`",
+ "proposal" : "`1`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 37,
+ "stopIndex" : 38,
+ "fragment" : "id"
+ } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+ (1, true), (1, false),
+ (2, true),
+ (3, false), (3, null),
+ (4, null), (4, null),
+ (5, null), (5, true), (5, false) AS test_agg(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+ "messageParameters" : {
+ "functionList" : "`udaf`"
+ }
+}
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+ "messageParameters" : {
+ "functionList" : "`udaf`"
+ }
+}
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+ "messageParameters" : {
+ "functionList" : "`udaf`"
+ }
+}
+
+
+-- !query
+SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+ "messageParameters" : {
+ "functionList" : "`udaf`"
+ }
+}
+
+
+-- !query
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false
+-- !query schema
+struct<k:int,udaf(v):int>
+-- !query output
+4 0
+
+
+-- !query
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL
+-- !query schema
+struct<k:int,udaf(v):int>
+-- !query output
+
+
+
+-- !query
+SELECT k,
+ udaf(v) AS count
+FROM test_agg
+WHERE k = 2
+ AND v IN (SELECT Any(v)
+ FROM test_agg
+ WHERE k = 1)
+GROUP BY k
+-- !query schema
+struct<k:int,count:int>
+-- !query output
+2 1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out
new file mode 100644
index 00000000000..c9b27dca0c0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out
@@ -0,0 +1,175 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+ ("1", "2", "3", 1),
+ ("4", "5", "6", 1),
+ ("7", "8", "9", 1)
+ as grouping(a, b, c, d)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+NULL NULL NULL 3
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+1 NULL NULL 1
+4 NULL NULL 1
+7 NULL NULL 1
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+NULL NULL 3 1
+NULL NULL 6 1
+NULL NULL 9 1
+
+
+-- !query
+SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
+-- !query schema
+struct<c1:string,udaf(c2):int>
+-- !query output
+x 1
+y 1
+
+
+-- !query
+SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
+-- !query schema
+struct<c1:string,udaf(c2):int,grouping(c1):tinyint>
+-- !query output
+x 1 0
+y 1 0
+
+
+-- !query
+SELECT c1, c2, udaf(c3), grouping__id
+FROM (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
+GROUP BY GROUPING SETS ( ( c1 ), ( c2 ) )
+HAVING GROUPING__ID > 1
+-- !query schema
+struct<c1:string,c2:string,udaf(c3):int,grouping__id:bigint>
+-- !query output
+NULL a 1 2
+NULL b 1 2
+
+
+-- !query
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b))
+-- !query schema
+struct<(a + b):int,b:int,udaf(c):int>
+-- !query output
+2 NULL 1
+4 NULL 1
+NULL 1 1
+NULL 2 1
+
+
+-- !query
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b))
+-- !query schema
+struct<(a + b):int,b:int,udaf(c):int>
+-- !query output
+2 NULL 1
+2 NULL 1
+4 NULL 1
+4 NULL 1
+NULL 1 1
+NULL 2 1
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "error" : "'ROLLUP'",
+ "hint" : ": extra input 'ROLLUP'"
+ }
+}
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42000",
+ "messageParameters" : {
+ "error" : "'CUBE'",
+ "hint" : ": extra input 'CUBE'"
+ }
+}
+
+
+-- !query
+SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<k1:int,k2:int,udaf(v):int>
+-- !query output
+1 1 1
+1 1 1
+1 NULL 1
+2 2 1
+2 2 1
+2 NULL 1
+
+
+-- !query
+SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping__id:bigint,k1:int,k2:int,udaf(v):int>
+-- !query output
+0 1 1 1
+0 1 1 1
+0 2 2 1
+0 2 2 1
+1 1 NULL 1
+1 2 NULL 1
+
+
+-- !query
+SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping(k1):tinyint,k1:int,k2:int,udaf(v):int>
+-- !query output
+0 1 1 1
+0 1 1 1
+0 1 NULL 1
+0 2 2 1
+0 2 2 1
+0 2 NULL 1
+
+
+-- !query
+SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1)
+-- !query schema
+struct<grouping_id(k1, k2):bigint,udaf(v):int>
+-- !query output
+0 1
+0 1
+1 1
+1 1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out
new file mode 100644
index 00000000000..0605af1c808
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out
@@ -0,0 +1,67 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
+(1), (2), (3), (4)
+as t1(int_col1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT default.myDoubleAvg(int_col1) as my_avg from t1
+-- !query schema
+struct<my_avg:double>
+-- !query output
+102.5
+
+
+-- !query
+SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function spark_catalog.default.mydoubleavg. Expected: 1; Found: 2; line 1 pos 7
+
+
+-- !query
+CREATE FUNCTION udaf1 AS 'test.non.existent.udaf'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT default.udaf1(int_col1) as udaf1 from t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Can not load class 'test.non.existent.udaf' when registering the function 'spark_catalog.default.udaf1', please make sure it is on the classpath; line 1 pos 7
+
+
+-- !query
+DROP FUNCTION myDoubleAvg
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION udaf1
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index cca9bb6741f..5af7fa3898d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -216,6 +216,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
resultFile: String,
udf: TestUDF) extends TestCase with UDFTest
+ /** A UDAF test case. */
+ protected case class UDAFTestCase(
+ name: String,
+ inputFile: String,
+ resultFile: String,
+ udf: TestUDF) extends TestCase with UDFTest
+
/** A UDF PostgreSQL test case. */
protected case class UDFPgSQLTestCase(
name: String,
@@ -436,6 +443,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestScalarPandasUDFs =>
s"${testCase.name}${System.lineSeparator()}" +
s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
+ case udfTestCase: UDFTest
+ if udfTestCase.udf.isInstanceOf[TestGroupedAggPandasUDF] &&
+ shouldTestGroupedAggPandasUDFs =>
+ s"${testCase.name}${System.lineSeparator()}" +
+ s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
case _ =>
s"${testCase.name}${System.lineSeparator()}"
}
@@ -495,6 +507,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
UDFTestCase(
s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
}
+ } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) {
+ Seq(TestGroupedAggPandasUDF("udaf")).map { udf =>
+ UDAFTestCase(
+ s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
+ }
} else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
} else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 69e01cef5ab..b850ffccd4e 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -65,6 +65,7 @@ import org.apache.spark.sql.types._
* 1. Support UDF testing.
* 2. Support DESC command.
* 3. Support SHOW command.
+ * 4. Support UDAF testing.
*/
// scalastyle:on line.size.limit
class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer {
@@ -247,6 +248,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
Seq.empty
+ } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) {
+ Seq.empty
} else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
} else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org