You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/20 06:20:27 UTC

[spark] branch master updated: [SPARK-28279][SQL][PYTHON][TESTS] Convert and port 'group-analytics.sql' into UDF test base

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e5e511  [SPARK-28279][SQL][PYTHON][TESTS] Convert and port 'group-analytics.sql' into UDF test base
9e5e511 is described below

commit 9e5e511ca0936dc655145816f8bd285cfa855695
Author: Stavros Kontopoulos <st...@gmail.com>
AuthorDate: Sat Jul 20 15:19:57 2019 +0900

    [SPARK-28279][SQL][PYTHON][TESTS] Convert and port 'group-analytics.sql' into UDF test base
    
    ## What changes were proposed in this pull request?
    This PR adds some tests converted from group-analytics.sql to test UDFs. Please see contribution guide of this umbrella ticket - SPARK-27921.
    
    <details><summary>Diff comparing to 'group-analytics.sql'</summary>
    <p>
    
    ```diff
    diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
    index 31e9e08e2c..3439a05727 100644
    --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
    +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
     -13,9 +13,9  struct<>
    
     -- !query 1
    -SELECT a + b, b, udf(SUM(a - b)) FROM testData GROUP BY a + b, b WITH CUBE
    +SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE
     -- !query 1 schema
    -struct<(a + b):int,b:int,CAST(udf(cast(sum(cast((a - b) as bigint)) as string)) AS BIGINT):bigint>
    +struct<(a + b):int,b:int,sum((a - b)):bigint>
     -- !query 1 output
     2	1	0
     2	NULL	0
     -33,9 +33,9  NULL	NULL	3
    
     -- !query 2
    -SELECT a, udf(b), SUM(b) FROM testData GROUP BY a, b WITH CUBE
    +SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE
     -- !query 2 schema
    -struct<a:int,CAST(udf(cast(b as string)) AS INT):int,sum(b):bigint>
    +struct<a:int,b:int,sum(b):bigint>
     -- !query 2 output
     1	1	1
     1	2	2
     -52,9 +52,9  NULL	NULL	9
    
     -- !query 3
    -SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
    +SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
     -- !query 3 schema
    -struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,sum((a - b)):bigint>
    +struct<(a + b):int,b:int,sum((a - b)):bigint>
     -- !query 3 output
     2	1	0
     2	NULL	0
     -70,9 +70,9  NULL	NULL	3
    
     -- !query 4
    -SELECT a, b, udf(SUM(b)) FROM testData GROUP BY a, b WITH ROLLUP
    +SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP
     -- !query 4 schema
    -struct<a:int,b:int,CAST(udf(cast(sum(cast(b as bigint)) as string)) AS BIGINT):bigint>
    +struct<a:int,b:int,sum(b):bigint>
     -- !query 4 output
     1	1	1
     1	2	2
     -97,7 +97,7  struct<>
    
     -- !query 6
    -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year
    +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
     -- !query 6 schema
     struct<course:string,year:int,sum(earnings):bigint>
     -- !query 6 output
     -111,7 +111,7  dotNET	2013	48000
    
     -- !query 7
    -SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year)
    +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
     -- !query 7 schema
     struct<course:string,year:int,sum(earnings):bigint>
     -- !query 7 output
     -127,9 +127,9  dotNET	2013	48000
    
     -- !query 8
    -SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
    +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
     -- !query 8 schema
    -struct<course:string,CAST(udf(cast(year as string)) AS INT):int,sum(earnings):bigint>
    +struct<course:string,year:int,sum(earnings):bigint>
     -- !query 8 output
     Java	NULL	50000
     NULL	2012	35000
     -138,26 +138,26  dotNET	NULL	63000
    
     -- !query 9
    -SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course)
    +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
     -- !query 9 schema
    -struct<course:string,year:int,CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint>
    +struct<course:string,year:int,sum(earnings):bigint>
     -- !query 9 output
     Java	NULL	50000
     dotNET	NULL	63000
    
     -- !query 10
    -SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
    +SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
     -- !query 10 schema
    -struct<CAST(udf(cast(course as string)) AS STRING):string,year:int,sum(earnings):bigint>
    +struct<course:string,year:int,sum(earnings):bigint>
     -- !query 10 output
     NULL	2012	35000
     NULL	2013	78000
    
     -- !query 11
    -SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
    -GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum)
    +SELECT course, SUM(earnings) AS sum FROM courseSales
    +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
     -- !query 11 schema
     struct<course:string,sum:bigint>
     -- !query 11 output
     -173,7 +173,7  dotNET	63000
    
     -- !query 12
     SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
    -GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum
    +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
     -- !query 12 schema
     struct<course:string,sum:bigint,grouping_id(course, earnings):int>
     -- !query 12 output
     -188,10 +188,10  dotNET	63000	1
    
     -- !query 13
    -SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
    +SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
     GROUP BY CUBE(course, year)
     -- !query 13 schema
    -struct<CAST(udf(cast(course as string)) AS STRING):string,CAST(udf(cast(year as string)) AS INT):int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
    +struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
     -- !query 13 output
     Java	2012	0	0	0
     Java	2013	0	0	0
     -205,7 +205,7  dotNET	NULL	0	1	1
    
     -- !query 14
    -SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, year
    +SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
     -- !query 14 schema
     struct<>
     -- !query 14 output
     -214,7 +214,7  grouping() can only be used with GroupingSets/Cube/Rollup;
    
     -- !query 15
    -SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
    +SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
     -- !query 15 schema
     struct<>
     -- !query 15 output
     -223,7 +223,7  grouping_id() can only be used with GroupingSets/Cube/Rollup;
    
     -- !query 16
    -SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year)
    +SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
     -- !query 16 schema
     struct<course:string,year:int,grouping__id:int>
     -- !query 16 output
     -240,7 +240,7  NULL	NULL	3
    
     -- !query 17
     SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
    -HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year)
    +HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year
     -- !query 17 schema
     struct<course:string,year:int>
     -- !query 17 output
     -250,7 +250,7  dotNET	NULL
    
     -- !query 18
    -SELECT course, udf(year) FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
    +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
     -- !query 18 schema
     struct<>
     -- !query 18 output
     -259,7 +259,7  grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
    
     -- !query 19
    -SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
    +SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
     -- !query 19 schema
     struct<>
     -- !query 19 output
     -268,9 +268,9  grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
    
     -- !query 20
    -SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
    +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
     -- !query 20 schema
    -struct<CAST(udf(cast(course as string)) AS STRING):string,year:int>
    +struct<course:string,year:int>
     -- !query 20 output
     Java	NULL
     NULL	2012
     -281,7 +281,7  dotNET	NULL
    
     -- !query 21
     SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
    -ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
    +ORDER BY GROUPING(course), GROUPING(year), course, year
     -- !query 21 schema
     struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
     -- !query 21 output
     -298,7 +298,7  NULL	NULL	1	1
    
     -- !query 22
     SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
    -ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
    +ORDER BY GROUPING(course), GROUPING(year), course, year
     -- !query 22 schema
     struct<course:string,year:int,grouping_id(course, year):int>
     -- !query 22 output
     -314,7 +314,7  NULL	NULL	3
    
     -- !query 23
    -SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course)
    +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course)
     -- !query 23 schema
     struct<>
     -- !query 23 output
     -323,7 +323,7  grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
    
     -- !query 24
    -SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course)
    +SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course)
     -- !query 24 schema
     struct<>
     -- !query 24 output
     -332,7 +332,7  grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
    
     -- !query 25
    -SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year
    +SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
     -- !query 25 schema
     struct<course:string,year:int>
     -- !query 25 output
     -348,7 +348,7  NULL	NULL
    
     -- !query 26
    -SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
    +SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
     -- !query 26 schema
     struct<k1:int,k2:int,sum((a - b)):bigint>
     -- !query 26 output
     -368,7 +368,7  NULL	NULL	3
    
     -- !query 27
    -SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
    +SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
     -- !query 27 schema
     struct<k:int,b:int,sum((a - b)):bigint>
     -- !query 27 output
     -386,9 +386,9  NULL	NULL	3
    
     -- !query 28
    -SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
    +SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
     -- !query 28 schema
    -struct<CAST(udf(cast((a + b) as string)) AS INT):int,k:int,sum((a - b)):bigint>
    +struct<(a + b):int,k:int,sum((a - b)):bigint>
     -- !query 28 output
     NULL	1	3
     NULL	2	0
    
    ```
    
    </p>
    </details>
    
    ## How was this patch tested?
    
    Tested as guided in SPARK-27921.
    Verified pandas & pyarrow versions:
    ```$python3
    Python 3.6.8 (default, Jan 14 2019, 11:02:34)
    [GCC 8.0.1 20180414 (experimental) [trunk revision 259383]] on linux
    Type "help", "copyright", "credits" or "license" for more information.
    >>> import pandas
    >>> import pyarrow
    >>> pyarrow.__version__
    '0.14.0'
    >>> pandas.__version__
    '0.24.2'
    ```
    From the sql output it seems that sql statements are evaluated correctly given that udf returns a string and may change results as Null will be returned as None and will be counted in returned values.
    
    Closes #25196 from skonto/group-analytics.sql.
    
    Authored-by: Stavros Kontopoulos <st...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../sql-tests/inputs/udf/udf-group-analytics.sql   |  64 ++++
 .../results/udf/udf-group-analytics.sql.out        | 394 +++++++++++++++++++++
 2 files changed, 458 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-group-analytics.sql
new file mode 100644
index 0000000..618c8df
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-group-analytics.sql
@@ -0,0 +1,64 @@
+-- This test file was converted from group-analytics.sql.
+-- TODO: UDF should be inserted and tested at GROUP BY clause after SPARK-28445
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b);
+
+-- CUBE on overlapping columns
+SELECT a + b, b, udf(SUM(a - b)) FROM testData GROUP BY a + b, b WITH CUBE;
+
+SELECT a, udf(b), SUM(b) FROM testData GROUP BY a, b WITH CUBE;
+
+-- ROLLUP on overlapping columns
+SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
+
+SELECT a, b, udf(SUM(b)) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year;
+
+-- CUBE
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year);
+
+-- GROUPING SETS
+SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
+SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course);
+SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum);
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum;
+
+-- GROUPING/GROUPING_ID
+SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
+GROUP BY CUBE(course, year);
+SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, year;
+SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY course, year;
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year);
+
+-- GROUPING/GROUPING_ID in having clause
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year);
+SELECT course, udf(year) FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0;
+SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0;
+SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0;
+
+-- GROUPING/GROUPING_ID in orderBy clause
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year);
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year);
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course);
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course);
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
new file mode 100644
index 0000000..31e9e08
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
@@ -0,0 +1,394 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 29
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT a + b, b, udf(SUM(a - b)) FROM testData GROUP BY a + b, b WITH CUBE
+-- !query 1 schema
+struct<(a + b):int,b:int,CAST(udf(cast(sum(cast((a - b) as bigint)) as string)) AS BIGINT):bigint>
+-- !query 1 output
+2	1	0
+2	NULL	0
+3	1	1
+3	2	-1
+3	NULL	0
+4	1	2
+4	2	0
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	1	3
+NULL	2	0
+NULL	NULL	3
+
+
+-- !query 2
+SELECT a, udf(b), SUM(b) FROM testData GROUP BY a, b WITH CUBE
+-- !query 2 schema
+struct<a:int,CAST(udf(cast(b as string)) AS INT):int,sum(b):bigint>
+-- !query 2 output
+1	1	1
+1	2	2
+1	NULL	3
+2	1	1
+2	2	2
+2	NULL	3
+3	1	1
+3	2	2
+3	NULL	3
+NULL	1	3
+NULL	2	6
+NULL	NULL	9
+
+
+-- !query 3
+SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
+-- !query 3 schema
+struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,sum((a - b)):bigint>
+-- !query 3 output
+2	1	0
+2	NULL	0
+3	1	1
+3	2	-1
+3	NULL	0
+4	1	2
+4	2	0
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	NULL	3
+
+
+-- !query 4
+SELECT a, b, udf(SUM(b)) FROM testData GROUP BY a, b WITH ROLLUP
+-- !query 4 schema
+struct<a:int,b:int,CAST(udf(cast(sum(cast(b as bigint)) as string)) AS BIGINT):bigint>
+-- !query 4 output
+1	1	1
+1	2	2
+1	NULL	3
+2	1	1
+2	2	2
+2	NULL	3
+3	1	1
+3	2	2
+3	NULL	3
+NULL	NULL	9
+
+
+-- !query 5
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings)
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year
+-- !query 6 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 6 output
+NULL	NULL	113000
+Java	NULL	50000
+Java	2012	20000
+Java	2013	30000
+dotNET	NULL	63000
+dotNET	2012	15000
+dotNET	2013	48000
+
+
+-- !query 7
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year)
+-- !query 7 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 7 output
+NULL	NULL	113000
+NULL	2012	35000
+NULL	2013	78000
+Java	NULL	50000
+Java	2012	20000
+Java	2013	30000
+dotNET	NULL	63000
+dotNET	2012	15000
+dotNET	2013	48000
+
+
+-- !query 8
+SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
+-- !query 8 schema
+struct<course:string,CAST(udf(cast(year as string)) AS INT):int,sum(earnings):bigint>
+-- !query 8 output
+Java	NULL	50000
+NULL	2012	35000
+NULL	2013	78000
+dotNET	NULL	63000
+
+
+-- !query 9
+SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course)
+-- !query 9 schema
+struct<course:string,year:int,CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint>
+-- !query 9 output
+Java	NULL	50000
+dotNET	NULL	63000
+
+
+-- !query 10
+SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
+-- !query 10 schema
+struct<CAST(udf(cast(course as string)) AS STRING):string,year:int,sum(earnings):bigint>
+-- !query 10 output
+NULL	2012	35000
+NULL	2013	78000
+
+
+-- !query 11
+SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum)
+-- !query 11 schema
+struct<course:string,sum:bigint>
+-- !query 11 output
+NULL	113000
+Java	20000
+Java	30000
+Java	50000
+dotNET	5000
+dotNET	10000
+dotNET	48000
+dotNET	63000
+
+
+-- !query 12
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum
+-- !query 12 schema
+struct<course:string,sum:bigint,grouping_id(course, earnings):int>
+-- !query 12 output
+NULL	113000	3
+Java	20000	0
+Java	30000	0
+Java	50000	1
+dotNET	5000	0
+dotNET	10000	0
+dotNET	48000	0
+dotNET	63000	1
+
+
+-- !query 13
+SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
+GROUP BY CUBE(course, year)
+-- !query 13 schema
+struct<CAST(udf(cast(course as string)) AS STRING):string,CAST(udf(cast(year as string)) AS INT):int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
+-- !query 13 output
+Java	2012	0	0	0
+Java	2013	0	0	0
+Java	NULL	0	1	1
+NULL	2012	1	0	2
+NULL	2013	1	0	2
+NULL	NULL	1	1	3
+dotNET	2012	0	0	0
+dotNET	2013	0	0	0
+dotNET	NULL	0	1	1
+
+
+-- !query 14
+SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, year
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+grouping() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 15
+SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
+-- !query 15 schema
+struct<>
+-- !query 15 output
+org.apache.spark.sql.AnalysisException
+grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 16
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year)
+-- !query 16 schema
+struct<course:string,year:int,grouping__id:int>
+-- !query 16 output
+Java	2012	0
+Java	2013	0
+dotNET	2012	0
+dotNET	2013	0
+Java	NULL	1
+dotNET	NULL	1
+NULL	2012	2
+NULL	2013	2
+NULL	NULL	3
+
+
+-- !query 17
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year)
+-- !query 17 schema
+struct<course:string,year:int>
+-- !query 17 output
+NULL	NULL
+Java	NULL
+dotNET	NULL
+
+
+-- !query 18
+SELECT course, udf(year) FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
+-- !query 18 schema
+struct<>
+-- !query 18 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 19
+SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 20
+SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
+-- !query 20 schema
+struct<CAST(udf(cast(course as string)) AS STRING):string,year:int>
+-- !query 20 output
+Java	NULL
+NULL	2012
+NULL	2013
+NULL	NULL
+dotNET	NULL
+
+
+-- !query 21
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
+-- !query 21 schema
+struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
+-- !query 21 output
+Java	2012	0	0
+Java	2013	0	0
+dotNET	2012	0	0
+dotNET	2013	0	0
+Java	NULL	0	1
+dotNET	NULL	0	1
+NULL	2012	1	0
+NULL	2013	1	0
+NULL	NULL	1	1
+
+
+-- !query 22
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
+-- !query 22 schema
+struct<course:string,year:int,grouping_id(course, year):int>
+-- !query 22 output
+Java	2012	0
+Java	2013	0
+dotNET	2012	0
+dotNET	2013	0
+Java	NULL	1
+dotNET	NULL	1
+NULL	2012	2
+NULL	2013	2
+NULL	NULL	3
+
+
+-- !query 23
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course)
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 24
+SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course)
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 25
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year
+-- !query 25 schema
+struct<course:string,year:int>
+-- !query 25 output
+Java	2012
+Java	2013
+dotNET	2012
+dotNET	2013
+Java	NULL
+dotNET	NULL
+NULL	2012
+NULL	2013
+NULL	NULL
+
+
+-- !query 26
+SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
+-- !query 26 schema
+struct<k1:int,k2:int,sum((a - b)):bigint>
+-- !query 26 output
+2	1	0
+2	NULL	0
+3	1	1
+3	2	-1
+3	NULL	0
+4	1	2
+4	2	0
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	1	3
+NULL	2	0
+NULL	NULL	3
+
+
+-- !query 27
+SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
+-- !query 27 schema
+struct<k:int,b:int,sum((a - b)):bigint>
+-- !query 27 output
+2	1	0
+2	NULL	0
+3	1	1
+3	2	-1
+3	NULL	0
+4	1	2
+4	2	0
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	NULL	3
+
+
+-- !query 28
+SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+-- !query 28 schema
+struct<CAST(udf(cast((a + b) as string)) AS INT):int,k:int,sum((a - b)):bigint>
+-- !query 28 output
+NULL	1	3
+NULL	2	0


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org