You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/08/10 00:56:01 UTC

[GitHub] [spark] maropu commented on a change in pull request #24829: [SPARK-27988][SQL][TEST] Port AGGREGATES.sql [Part 3]

maropu commented on a change in pull request #24829: [SPARK-27988][SQL][TEST] Port AGGREGATES.sql [Part 3]
URL: https://github.com/apache/spark/pull/24829#discussion_r312683576
 
 

 ##########
 File path: sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part3.sql
 ##########
 @@ -0,0 +1,287 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- AGGREGATES [Part 3]
+-- https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/aggregates.sql#L352-L605
+
+create temporary view varchar_tbl as select * from values
+  ('a'),
+  ('A'),
+  ('1'),
+  ('2'),
+  ('3'),
+  (''),
+  -- ('cd'),
+  ('c')
+  as varchar_tbl(f1);
+
+-- We do not support inheritance tree, skip related tests.
+-- try it on an inheritance tree
+-- create table minmaxtest(f1 int);
+-- create table minmaxtest1() inherits (minmaxtest);
+-- create table minmaxtest2() inherits (minmaxtest);
+-- create table minmaxtest3() inherits (minmaxtest);
+-- create index minmaxtesti on minmaxtest(f1);
+-- create index minmaxtest1i on minmaxtest1(f1);
+-- create index minmaxtest2i on minmaxtest2(f1 desc);
+-- create index minmaxtest3i on minmaxtest3(f1) where f1 is not null;
+
+-- insert into minmaxtest values(11), (12);
+-- insert into minmaxtest1 values(13), (14);
+-- insert into minmaxtest2 values(15), (16);
+-- insert into minmaxtest3 values(17), (18);
+
+-- explain (costs off)
+--   select min(f1), max(f1) from minmaxtest;
+-- select min(f1), max(f1) from minmaxtest;
+
+-- DISTINCT doesn't do anything useful here, but it shouldn't fail
+-- explain (costs off)
+--   select distinct min(f1), max(f1) from minmaxtest;
+-- select distinct min(f1), max(f1) from minmaxtest;
+
+-- drop table minmaxtest cascade;
+
+-- [SPARK-9830] It is not allowed to use an aggregate function in the argument of another aggregate function
+-- check for correct detection of nested-aggregate errors
+-- select max(min(unique1)) from tenk1;
+-- select (select max(min(unique1)) from int8_tbl) from tenk1;
+
+-- These tests only test the explain. Skip these tests.
+--
+-- Test removal of redundant GROUP BY columns
+--
+
+-- create temp table t1 (a int, b int, c int, d int, primary key (a, b));
+-- create temp table t2 (x int, y int, z int, primary key (x, y));
+-- create temp table t3 (a int, b int, c int, primary key(a, b) deferrable);
+
+-- Non-primary-key columns can be removed from GROUP BY
+-- explain (costs off) select * from t1 group by a,b,c,d;
+
+-- No removal can happen if the complete PK is not present in GROUP BY
+-- explain (costs off) select a,c from t1 group by a,c,d;
+
+-- Test removal across multiple relations
+-- explain (costs off) select *
+-- from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
+-- group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
+
+-- Test case where t1 can be optimized but not t2
+-- explain (costs off) select t1.*,t2.x,t2.z
+-- from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
+-- group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
+
+-- Cannot optimize when PK is deferrable
+-- explain (costs off) select * from t3 group by a,b,c;
+
+-- drop table t1;
+-- drop table t2;
+-- drop table t3;
+
+-- [SPARK-27974] Add built-in Aggregate Function: array_agg
+--
+-- Test combinations of DISTINCT and/or ORDER BY
+--
+
+-- select array_agg(a order by b)
+--   from (values (1,4),(2,3),(3,1),(4,2)) v(a,b);
+-- select array_agg(a order by a)
+--   from (values (1,4),(2,3),(3,1),(4,2)) v(a,b);
+-- select array_agg(a order by a desc)
+--   from (values (1,4),(2,3),(3,1),(4,2)) v(a,b);
+-- select array_agg(b order by a desc)
+--   from (values (1,4),(2,3),(3,1),(4,2)) v(a,b);
+
+-- select array_agg(distinct a)
+--   from (values (1),(2),(1),(3),(null),(2)) v(a);
+-- select array_agg(distinct a order by a)
+--   from (values (1),(2),(1),(3),(null),(2)) v(a);
+-- select array_agg(distinct a order by a desc)
+--   from (values (1),(2),(1),(3),(null),(2)) v(a);
+-- select array_agg(distinct a order by a desc nulls last)
+--   from (values (1),(2),(1),(3),(null),(2)) v(a);
+
+-- Skip the test below because it requires 4 UDFs: aggf_trans, aggfns_trans, aggfstr, and aggfns
+-- multi-arg aggs, strict/nonstrict, distinct/order by
+
+-- select aggfstr(a,b,c)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);
+-- select aggfns(a,b,c)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);
+
+-- select aggfstr(distinct a,b,c)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,3) i;
+-- select aggfns(distinct a,b,c)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,3) i;
+
+-- select aggfstr(distinct a,b,c order by b)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,3) i;
+-- select aggfns(distinct a,b,c order by b)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,3) i;
+
+-- test specific code paths
+
+-- select aggfns(distinct a,a,c order by c using ~<~,a)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,2) i;
+-- select aggfns(distinct a,a,c order by c using ~<~)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,2) i;
+-- select aggfns(distinct a,a,c order by a)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,2) i;
+-- select aggfns(distinct a,b,c order by a,c using ~<~,b)
+--   from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--        generate_series(1,2) i;
+
+-- check node I/O via view creation and usage, also deparsing logic
+
+-- create view agg_view1 as
+--   select aggfns(a,b,c)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- create or replace view agg_view1 as
+--   select aggfns(distinct a,b,c)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--          generate_series(1,3) i;
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- create or replace view agg_view1 as
+--   select aggfns(distinct a,b,c order by b)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--          generate_series(1,3) i;
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- create or replace view agg_view1 as
+--   select aggfns(a,b,c order by b+1)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- create or replace view agg_view1 as
+--   select aggfns(a,a,c order by b)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- create or replace view agg_view1 as
+--   select aggfns(a,b,c order by c using ~<~)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c);
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- create or replace view agg_view1 as
+--   select aggfns(distinct a,b,c order by a,c using ~<~,b)
+--     from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c),
+--          generate_series(1,2) i;
+
+-- select * from agg_view1;
+-- select pg_get_viewdef('agg_view1'::regclass);
+
+-- drop view agg_view1;
+
+-- incorrect DISTINCT usage errors
+
+-- select aggfns(distinct a,b,c order by i)
+--   from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i;
+-- select aggfns(distinct a,b,c order by a,b+1)
+--   from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i;
+-- select aggfns(distinct a,b,c order by a,b,i,c)
+--   from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i;
+-- select aggfns(distinct a,a,c order by a,b)
+--   from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i;
+
+-- [SPARK-27978] We use concat_ws(delimiter, collect_list(expression)) to rewrite string_agg
+-- string_agg tests
+select concat_ws(',', collect_list(a)) from (values('aaaa'),('bbbb'),('cccc')) g(a);
 
 Review comment:
   How about keeping the original queries here?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org