You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/20 06:20:27 UTC
[spark] branch master updated: [SPARK-28279][SQL][PYTHON][TESTS]
Convert and port 'group-analytics.sql' into UDF test base
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9e5e511 [SPARK-28279][SQL][PYTHON][TESTS] Convert and port 'group-analytics.sql' into UDF test base
9e5e511 is described below
commit 9e5e511ca0936dc655145816f8bd285cfa855695
Author: Stavros Kontopoulos <st...@gmail.com>
AuthorDate: Sat Jul 20 15:19:57 2019 +0900
[SPARK-28279][SQL][PYTHON][TESTS] Convert and port 'group-analytics.sql' into UDF test base
## What changes were proposed in this pull request?
This PR adds some tests converted from group-analytics.sql to test UDFs. Please see contribution guide of this umbrella ticket - SPARK-27921.
<details><summary>Diff comparing to 'group-analytics.sql'</summary>
<p>
```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
index 31e9e08e2c..3439a05727 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
-13,9 +13,9 struct<>
-- !query 1
-SELECT a + b, b, udf(SUM(a - b)) FROM testData GROUP BY a + b, b WITH CUBE
+SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE
-- !query 1 schema
-struct<(a + b):int,b:int,CAST(udf(cast(sum(cast((a - b) as bigint)) as string)) AS BIGINT):bigint>
+struct<(a + b):int,b:int,sum((a - b)):bigint>
-- !query 1 output
2 1 0
2 NULL 0
-33,9 +33,9 NULL NULL 3
-- !query 2
-SELECT a, udf(b), SUM(b) FROM testData GROUP BY a, b WITH CUBE
+SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE
-- !query 2 schema
-struct<a:int,CAST(udf(cast(b as string)) AS INT):int,sum(b):bigint>
+struct<a:int,b:int,sum(b):bigint>
-- !query 2 output
1 1 1
1 2 2
-52,9 +52,9 NULL NULL 9
-- !query 3
-SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
+SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
-- !query 3 schema
-struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,sum((a - b)):bigint>
+struct<(a + b):int,b:int,sum((a - b)):bigint>
-- !query 3 output
2 1 0
2 NULL 0
-70,9 +70,9 NULL NULL 3
-- !query 4
-SELECT a, b, udf(SUM(b)) FROM testData GROUP BY a, b WITH ROLLUP
+SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP
-- !query 4 schema
-struct<a:int,b:int,CAST(udf(cast(sum(cast(b as bigint)) as string)) AS BIGINT):bigint>
+struct<a:int,b:int,sum(b):bigint>
-- !query 4 output
1 1 1
1 2 2
-97,7 +97,7 struct<>
-- !query 6
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
-- !query 6 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 6 output
-111,7 +111,7 dotNET 2013 48000
-- !query 7
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year)
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
-- !query 7 schema
struct<course:string,year:int,sum(earnings):bigint>
-- !query 7 output
-127,9 +127,9 dotNET 2013 48000
-- !query 8
-SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
-- !query 8 schema
-struct<course:string,CAST(udf(cast(year as string)) AS INT):int,sum(earnings):bigint>
+struct<course:string,year:int,sum(earnings):bigint>
-- !query 8 output
Java NULL 50000
NULL 2012 35000
-138,26 +138,26 dotNET NULL 63000
-- !query 9
-SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course)
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
-- !query 9 schema
-struct<course:string,year:int,CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint>
+struct<course:string,year:int,sum(earnings):bigint>
-- !query 9 output
Java NULL 50000
dotNET NULL 63000
-- !query 10
-SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
-- !query 10 schema
-struct<CAST(udf(cast(course as string)) AS STRING):string,year:int,sum(earnings):bigint>
+struct<course:string,year:int,sum(earnings):bigint>
-- !query 10 output
NULL 2012 35000
NULL 2013 78000
-- !query 11
-SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
-GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum)
+SELECT course, SUM(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
-- !query 11 schema
struct<course:string,sum:bigint>
-- !query 11 output
-173,7 +173,7 dotNET 63000
-- !query 12
SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
-GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
-- !query 12 schema
struct<course:string,sum:bigint,grouping_id(course, earnings):int>
-- !query 12 output
-188,10 +188,10 dotNET 63000 1
-- !query 13
-SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
+SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
GROUP BY CUBE(course, year)
-- !query 13 schema
-struct<CAST(udf(cast(course as string)) AS STRING):string,CAST(udf(cast(year as string)) AS INT):int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
+struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
-- !query 13 output
Java 2012 0 0 0
Java 2013 0 0 0
-205,7 +205,7 dotNET NULL 0 1 1
-- !query 14
-SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, year
+SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
-- !query 14 schema
struct<>
-- !query 14 output
-214,7 +214,7 grouping() can only be used with GroupingSets/Cube/Rollup;
-- !query 15
-SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
-- !query 15 schema
struct<>
-- !query 15 output
-223,7 +223,7 grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 16
-SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year)
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
-- !query 16 schema
struct<course:string,year:int,grouping__id:int>
-- !query 16 output
-240,7 +240,7 NULL NULL 3
-- !query 17
SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
-HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year
-- !query 17 schema
struct<course:string,year:int>
-- !query 17 output
-250,7 +250,7 dotNET NULL
-- !query 18
-SELECT course, udf(year) FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
+SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
-- !query 18 schema
struct<>
-- !query 18 output
-259,7 +259,7 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 19
-SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
+SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
-- !query 19 schema
struct<>
-- !query 19 output
-268,9 +268,9 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 20
-SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
-- !query 20 schema
-struct<CAST(udf(cast(course as string)) AS STRING):string,year:int>
+struct<course:string,year:int>
-- !query 20 output
Java NULL
NULL 2012
-281,7 +281,7 dotNET NULL
-- !query 21
SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
-ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
+ORDER BY GROUPING(course), GROUPING(year), course, year
-- !query 21 schema
struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
-- !query 21 output
-298,7 +298,7 NULL NULL 1 1
-- !query 22
SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
-ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
+ORDER BY GROUPING(course), GROUPING(year), course, year
-- !query 22 schema
struct<course:string,year:int,grouping_id(course, year):int>
-- !query 22 output
-314,7 +314,7 NULL NULL 3
-- !query 23
-SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course)
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course)
-- !query 23 schema
struct<>
-- !query 23 output
-323,7 +323,7 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 24
-SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course)
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course)
-- !query 24 schema
struct<>
-- !query 24 output
-332,7 +332,7 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
-- !query 25
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
-- !query 25 schema
struct<course:string,year:int>
-- !query 25 output
-348,7 +348,7 NULL NULL
-- !query 26
-SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
+SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
-- !query 26 schema
struct<k1:int,k2:int,sum((a - b)):bigint>
-- !query 26 output
-368,7 +368,7 NULL NULL 3
-- !query 27
-SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
+SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
-- !query 27 schema
struct<k:int,b:int,sum((a - b)):bigint>
-- !query 27 output
-386,9 +386,9 NULL NULL 3
-- !query 28
-SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
-- !query 28 schema
-struct<CAST(udf(cast((a + b) as string)) AS INT):int,k:int,sum((a - b)):bigint>
+struct<(a + b):int,k:int,sum((a - b)):bigint>
-- !query 28 output
NULL 1 3
NULL 2 0
```
</p>
</details>
## How was this patch tested?
Tested as guided in SPARK-27921.
Verified pandas & pyarrow versions:
```$python3
Python 3.6.8 (default, Jan 14 2019, 11:02:34)
[GCC 8.0.1 20180414 (experimental) [trunk revision 259383]] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas
>>> import pyarrow
>>> pyarrow.__version__
'0.14.0'
>>> pandas.__version__
'0.24.2'
```
From the sql output it seems that sql statements are evaluated correctly given that udf returns a string and may change results as Null will be returned as None and will be counted in returned values.
Closes #25196 from skonto/group-analytics.sql.
Authored-by: Stavros Kontopoulos <st...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../sql-tests/inputs/udf/udf-group-analytics.sql | 64 ++++
.../results/udf/udf-group-analytics.sql.out | 394 +++++++++++++++++++++
2 files changed, 458 insertions(+)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-group-analytics.sql
new file mode 100644
index 0000000..618c8df
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-group-analytics.sql
@@ -0,0 +1,64 @@
+-- This test file was converted from group-analytics.sql.
+-- TODO: UDF should be inserted and tested at GROUP BY clause after SPARK-28445
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b);
+
+-- CUBE on overlapping columns
+SELECT a + b, b, udf(SUM(a - b)) FROM testData GROUP BY a + b, b WITH CUBE;
+
+SELECT a, udf(b), SUM(b) FROM testData GROUP BY a, b WITH CUBE;
+
+-- ROLLUP on overlapping columns
+SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
+
+SELECT a, b, udf(SUM(b)) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year;
+
+-- CUBE
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year);
+
+-- GROUPING SETS
+SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
+SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course);
+SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum);
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum;
+
+-- GROUPING/GROUPING_ID
+SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
+GROUP BY CUBE(course, year);
+SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, year;
+SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY course, year;
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year);
+
+-- GROUPING/GROUPING_ID in having clause
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year);
+SELECT course, udf(year) FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0;
+SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0;
+SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0;
+
+-- GROUPING/GROUPING_ID in orderBy clause
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year);
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year);
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course);
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course);
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
new file mode 100644
index 0000000..31e9e08
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
@@ -0,0 +1,394 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 29
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT a + b, b, udf(SUM(a - b)) FROM testData GROUP BY a + b, b WITH CUBE
+-- !query 1 schema
+struct<(a + b):int,b:int,CAST(udf(cast(sum(cast((a - b) as bigint)) as string)) AS BIGINT):bigint>
+-- !query 1 output
+2 1 0
+2 NULL 0
+3 1 1
+3 2 -1
+3 NULL 0
+4 1 2
+4 2 0
+4 NULL 2
+5 2 1
+5 NULL 1
+NULL 1 3
+NULL 2 0
+NULL NULL 3
+
+
+-- !query 2
+SELECT a, udf(b), SUM(b) FROM testData GROUP BY a, b WITH CUBE
+-- !query 2 schema
+struct<a:int,CAST(udf(cast(b as string)) AS INT):int,sum(b):bigint>
+-- !query 2 output
+1 1 1
+1 2 2
+1 NULL 3
+2 1 1
+2 2 2
+2 NULL 3
+3 1 1
+3 2 2
+3 NULL 3
+NULL 1 3
+NULL 2 6
+NULL NULL 9
+
+
+-- !query 3
+SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
+-- !query 3 schema
+struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,sum((a - b)):bigint>
+-- !query 3 output
+2 1 0
+2 NULL 0
+3 1 1
+3 2 -1
+3 NULL 0
+4 1 2
+4 2 0
+4 NULL 2
+5 2 1
+5 NULL 1
+NULL NULL 3
+
+
+-- !query 4
+SELECT a, b, udf(SUM(b)) FROM testData GROUP BY a, b WITH ROLLUP
+-- !query 4 schema
+struct<a:int,b:int,CAST(udf(cast(sum(cast(b as bigint)) as string)) AS BIGINT):bigint>
+-- !query 4 output
+1 1 1
+1 2 2
+1 NULL 3
+2 1 1
+2 2 2
+2 NULL 3
+3 1 1
+3 2 2
+3 NULL 3
+NULL NULL 9
+
+
+-- !query 5
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings)
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year
+-- !query 6 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 6 output
+NULL NULL 113000
+Java NULL 50000
+Java 2012 20000
+Java 2013 30000
+dotNET NULL 63000
+dotNET 2012 15000
+dotNET 2013 48000
+
+
+-- !query 7
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year)
+-- !query 7 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 7 output
+NULL NULL 113000
+NULL 2012 35000
+NULL 2013 78000
+Java NULL 50000
+Java 2012 20000
+Java 2013 30000
+dotNET NULL 63000
+dotNET 2012 15000
+dotNET 2013 48000
+
+
+-- !query 8
+SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
+-- !query 8 schema
+struct<course:string,CAST(udf(cast(year as string)) AS INT):int,sum(earnings):bigint>
+-- !query 8 output
+Java NULL 50000
+NULL 2012 35000
+NULL 2013 78000
+dotNET NULL 63000
+
+
+-- !query 9
+SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course)
+-- !query 9 schema
+struct<course:string,year:int,CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint>
+-- !query 9 output
+Java NULL 50000
+dotNET NULL 63000
+
+
+-- !query 10
+SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
+-- !query 10 schema
+struct<CAST(udf(cast(course as string)) AS STRING):string,year:int,sum(earnings):bigint>
+-- !query 10 output
+NULL 2012 35000
+NULL 2013 78000
+
+
+-- !query 11
+SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum)
+-- !query 11 schema
+struct<course:string,sum:bigint>
+-- !query 11 output
+NULL 113000
+Java 20000
+Java 30000
+Java 50000
+dotNET 5000
+dotNET 10000
+dotNET 48000
+dotNET 63000
+
+
+-- !query 12
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum
+-- !query 12 schema
+struct<course:string,sum:bigint,grouping_id(course, earnings):int>
+-- !query 12 output
+NULL 113000 3
+Java 20000 0
+Java 30000 0
+Java 50000 1
+dotNET 5000 0
+dotNET 10000 0
+dotNET 48000 0
+dotNET 63000 1
+
+
+-- !query 13
+SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
+GROUP BY CUBE(course, year)
+-- !query 13 schema
+struct<CAST(udf(cast(course as string)) AS STRING):string,CAST(udf(cast(year as string)) AS INT):int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
+-- !query 13 output
+Java 2012 0 0 0
+Java 2013 0 0 0
+Java NULL 0 1 1
+NULL 2012 1 0 2
+NULL 2013 1 0 2
+NULL NULL 1 1 3
+dotNET 2012 0 0 0
+dotNET 2013 0 0 0
+dotNET NULL 0 1 1
+
+
+-- !query 14
+SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, year
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+grouping() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 15
+SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
+-- !query 15 schema
+struct<>
+-- !query 15 output
+org.apache.spark.sql.AnalysisException
+grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 16
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year)
+-- !query 16 schema
+struct<course:string,year:int,grouping__id:int>
+-- !query 16 output
+Java 2012 0
+Java 2013 0
+dotNET 2012 0
+dotNET 2013 0
+Java NULL 1
+dotNET NULL 1
+NULL 2012 2
+NULL 2013 2
+NULL NULL 3
+
+
+-- !query 17
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year)
+-- !query 17 schema
+struct<course:string,year:int>
+-- !query 17 output
+NULL NULL
+Java NULL
+dotNET NULL
+
+
+-- !query 18
+SELECT course, udf(year) FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
+-- !query 18 schema
+struct<>
+-- !query 18 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 19
+SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 20
+SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
+-- !query 20 schema
+struct<CAST(udf(cast(course as string)) AS STRING):string,year:int>
+-- !query 20 output
+Java NULL
+NULL 2012
+NULL 2013
+NULL NULL
+dotNET NULL
+
+
+-- !query 21
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
+-- !query 21 schema
+struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
+-- !query 21 output
+Java 2012 0 0
+Java 2013 0 0
+dotNET 2012 0 0
+dotNET 2013 0 0
+Java NULL 0 1
+dotNET NULL 0 1
+NULL 2012 1 0
+NULL 2013 1 0
+NULL NULL 1 1
+
+
+-- !query 22
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
+-- !query 22 schema
+struct<course:string,year:int,grouping_id(course, year):int>
+-- !query 22 output
+Java 2012 0
+Java 2013 0
+dotNET 2012 0
+dotNET 2013 0
+Java NULL 1
+dotNET NULL 1
+NULL 2012 2
+NULL 2013 2
+NULL NULL 3
+
+
+-- !query 23
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course)
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 24
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course)
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 25
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year
+-- !query 25 schema
+struct<course:string,year:int>
+-- !query 25 output
+Java 2012
+Java 2013
+dotNET 2012
+dotNET 2013
+Java NULL
+dotNET NULL
+NULL 2012
+NULL 2013
+NULL NULL
+
+
+-- !query 26
+SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
+-- !query 26 schema
+struct<k1:int,k2:int,sum((a - b)):bigint>
+-- !query 26 output
+2 1 0
+2 NULL 0
+3 1 1
+3 2 -1
+3 NULL 0
+4 1 2
+4 2 0
+4 NULL 2
+5 2 1
+5 NULL 1
+NULL 1 3
+NULL 2 0
+NULL NULL 3
+
+
+-- !query 27
+SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
+-- !query 27 schema
+struct<k:int,b:int,sum((a - b)):bigint>
+-- !query 27 output
+2 1 0
+2 NULL 0
+3 1 1
+3 2 -1
+3 NULL 0
+4 1 2
+4 2 0
+4 NULL 2
+5 2 1
+5 NULL 1
+NULL NULL 3
+
+
+-- !query 28
+SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+-- !query 28 schema
+struct<CAST(udf(cast((a + b) as string)) AS INT):int,k:int,sum((a - b)):bigint>
+-- !query 28 output
+NULL 1 3
+NULL 2 0
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org