You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/11/11 01:40:48 UTC
[5/5] hive git commit: HIVE-15119: Support standard syntax for ROLLUP
& CUBE (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
HIVE-15119: Support standard syntax for ROLLUP & CUBE (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/444af207
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/444af207
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/444af207
Branch: refs/heads/master
Commit: 444af20720b34eae3a3549f402aab63ce61283b6
Parents: 35be3f1
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Fri Nov 11 01:39:10 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Nov 11 01:39:10 2016 +0000
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/IdentifiersParser.g | 30 +-
.../clientpositive/annotate_stats_groupby.q | 2 +
.../cbo_rp_annotate_stats_groupby.q | 2 +
ql/src/test/queries/clientpositive/cte_1.q | 5 +
.../test/queries/clientpositive/groupby_cube1.q | 2 +
.../clientpositive/groupby_cube_multi_gby.q | 2 +-
.../clientpositive/groupby_grouping_id1.q | 3 +
.../clientpositive/groupby_grouping_id2.q | 16 +-
.../clientpositive/groupby_grouping_sets1.q | 1 +
.../clientpositive/groupby_grouping_sets2.q | 3 +
.../clientpositive/groupby_grouping_sets3.q | 3 +
.../clientpositive/groupby_grouping_sets4.q | 7 +
.../clientpositive/groupby_grouping_sets5.q | 4 +
.../queries/clientpositive/groupby_rollup1.q | 2 +-
.../infer_bucket_sort_grouping_operators.q | 2 +
.../queries/clientpositive/limit_pushdown2.q | 5 +
.../clientpositive/vector_grouping_sets.q | 7 +-
.../clientpositive/annotate_stats_groupby.q.out | 100 +
.../cbo_rp_annotate_stats_groupby.q.out | 106 +
ql/src/test/results/clientpositive/cte_1.q.out | 12248 +++++++++++++++++
.../results/clientpositive/groupby_cube1.q.out | 59 +
.../clientpositive/groupby_cube_multi_gby.q.out | 4 +-
.../clientpositive/groupby_grouping_id1.q.out | 72 +
.../clientpositive/groupby_grouping_sets1.q.out | 23 +
.../clientpositive/groupby_grouping_sets2.q.out | 84 +
.../clientpositive/groupby_grouping_sets3.q.out | 59 +
.../clientpositive/groupby_grouping_sets4.q.out | 147 +
.../clientpositive/groupby_grouping_sets5.q.out | 89 +
.../clientpositive/groupby_rollup1.q.out | 4 +-
.../infer_bucket_sort_grouping_operators.q.out | 1254 ++
.../clientpositive/limit_pushdown2.q.out | 89 +
.../results/clientpositive/llap/cte_1.q.out | 12248 +++++++++++++++++
.../llap/groupby_grouping_id2.q.out | 111 +-
.../llap/vector_grouping_sets.q.out | 69 +
.../clientpositive/spark/groupby_cube1.q.out | 65 +
.../spark/groupby_grouping_id2.q.out | 111 +-
.../clientpositive/spark/groupby_rollup1.q.out | 4 +-
.../clientpositive/vector_grouping_sets.q.out | 59 +
38 files changed, 27085 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 58ddf7a..2e40aa5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -50,11 +50,36 @@ groupByClause
@init { gParent.pushMsg("group by clause", state); }
@after { gParent.popMsg(state); }
:
- KW_GROUP KW_BY
+ KW_GROUP KW_BY groupby_expression
+ -> groupby_expression
+ ;
+
+// support for new and old rollup/cube syntax
+groupby_expression :
+ rollupStandard |
+ rollupOldSyntax
+;
+
+// standard rollup syntax
+rollupStandard
+@init { gParent.pushMsg("standard rollup syntax", state); }
+@after { gParent.popMsg(state); }
+ :
+ (rollup=KW_ROLLUP | cube=KW_CUBE)
+ LPAREN expression ( COMMA expression)* RPAREN
+ -> {rollup != null}? ^(TOK_ROLLUP_GROUPBY expression+)
+ -> ^(TOK_CUBE_GROUPBY expression+)
+ ;
+
+// old hive rollup syntax
+rollupOldSyntax
+@init { gParent.pushMsg("rollup old syntax", state); }
+@after { gParent.popMsg(state); }
+ :
expression
( COMMA expression)*
((rollup=KW_WITH KW_ROLLUP) | (cube=KW_WITH KW_CUBE)) ?
- (sets=KW_GROUPING KW_SETS
+ (sets=KW_GROUPING KW_SETS
LPAREN groupingSetExpression ( COMMA groupingSetExpression)* RPAREN ) ?
-> {rollup != null}? ^(TOK_ROLLUP_GROUPBY expression+)
-> {cube != null}? ^(TOK_CUBE_GROUPBY expression+)
@@ -62,6 +87,7 @@ groupByClause
-> ^(TOK_GROUPBY expression+)
;
+
groupingSetExpression
@init {gParent.pushMsg("grouping set expression", state); }
@after {gParent.popMsg(state); }
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_groupby.q b/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
index 854e401..77571cf 100644
--- a/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
+++ b/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
@@ -67,6 +67,7 @@ explain select state,locid from loc_orc group by state,locid with cube;
-- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by rollup( state,locid );
-- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
-- Case 8: column stats, grouping sets - cardinality = 8
@@ -111,6 +112,7 @@ explain select state,locid from loc_orc group by state,locid with cube;
-- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by rollup (state,locid);
-- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/cbo_rp_annotate_stats_groupby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_annotate_stats_groupby.q b/ql/src/test/queries/clientpositive/cbo_rp_annotate_stats_groupby.q
index 3159fc7..99bd780 100644
--- a/ql/src/test/queries/clientpositive/cbo_rp_annotate_stats_groupby.q
+++ b/ql/src/test/queries/clientpositive/cbo_rp_annotate_stats_groupby.q
@@ -69,6 +69,7 @@ explain select state,locid from loc_orc group by state,locid with cube;
-- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by rollup (state,locid);
-- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
-- Case 8: column stats, grouping sets - cardinality = 8
@@ -113,6 +114,7 @@ explain select state,locid from loc_orc group by state,locid with cube;
-- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by rollup (state,locid);
-- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/cte_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cte_1.q b/ql/src/test/queries/clientpositive/cte_1.q
index 2956339..15d3f06 100644
--- a/ql/src/test/queries/clientpositive/cte_1.q
+++ b/ql/src/test/queries/clientpositive/cte_1.q
@@ -44,6 +44,11 @@ with q1 as (select * from alltypesorc)
from q1
select cint, cstring1, avg(csmallint)
group by cint, cstring1 with rollup;
+--standard rollup syntax
+with q1 as (select * from alltypesorc)
+ from q1
+ select cint, cstring1, avg(csmallint)
+ group by rollup (cint, cstring1);
drop table if exists cte9_t1;
create table cte9_t1 as
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_cube1.q b/ql/src/test/queries/clientpositive/groupby_cube1.q
index bfa13ee..fd2f0de 100644
--- a/ql/src/test/queries/clientpositive/groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/groupby_cube1.q
@@ -10,6 +10,8 @@ LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
EXPLAIN
SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube;
+EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val);
SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube;
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q b/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
index 80022bb..dff81a4 100644
--- a/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
+++ b/ql/src/test/queries/clientpositive/groupby_cube_multi_gby.q
@@ -6,7 +6,7 @@ create table t2 like src;
explain from src
insert into table t1 select
key, GROUPING__ID
-group by key, value with cube
+group by cube(key, value)
insert into table t2 select
key, value
group by key, value grouping sets ((key), (key, value));
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
index de4a7c3..d43ea37 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
@@ -3,8 +3,11 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
SELECT key, val, GROUPING__ID from T1 group by key, val with cube;
+SELECT key, val, GROUPING__ID from T1 group by cube(key, val);
SELECT GROUPING__ID, key, val from T1 group by key, val with rollup;
+SELECT GROUPING__ID, key, val from T1 group by rollup (key, val);
SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube;
+SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val);
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index 5c05aad..77a1638 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -7,19 +7,33 @@ set hive.groupby.skewindata = true;
-- SORT_QUERY_RESULTS
SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP;
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value);
SELECT GROUPING__ID, count(*)
FROM
(
SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP
-) t
+) t
GROUP BY GROUPING__ID;
+SELECT GROUPING__ID, count(*)
+FROM
+(
+SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value)
+) t
+GROUP BY GROUPING__ID;
+
+
SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1
JOIN
(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2
ON t1.GROUPING__ID = t2.GROUPING__ID;
+SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1
+JOIN
+(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2
+ON t1.GROUPING__ID = t2.GROUPING__ID;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
index 804dfb3..e239a87 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
@@ -5,6 +5,7 @@ LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
SELECT * FROM T1;
SELECT a, b, count(*) from T1 group by a, b with cube;
+SELECT a, b, count(*) from T1 group by cube(a, b);
SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ());
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
index 824942c..b470964 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
@@ -8,6 +8,9 @@ LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created
EXPLAIN
SELECT a, b, count(*) from T1 group by a, b with cube;
+
+EXPLAIN
+SELECT a, b, count(*) from T1 group by cube(a, b);
SELECT a, b, count(*) from T1 group by a, b with cube;
EXPLAIN
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
index 7077377..3c1a5e7 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
@@ -15,6 +15,9 @@ set hive.new.job.grouping.set.cardinality = 30;
-- hive.new.job.grouping.set.cardinality is more than 4.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube;
+
+EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by cube(a, b);
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube;
set hive.new.job.grouping.set.cardinality=2;
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
index 06e5e1a..6e3201c 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
@@ -18,6 +18,13 @@ join
(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2
on subq1.a = subq2.a;
+EXPLAIN
+SELECT * FROM
+(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1
+join
+(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2
+on subq1.a = subq2.a;
+
SELECT * FROM
(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1
join
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
index 6a09c88..c1c98b3 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
@@ -12,6 +12,10 @@ EXPLAIN
SELECT a, b, count(*) FROM
(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube;
+EXPLAIN
+SELECT a, b, count(*) FROM
+(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b);
+
SELECT a, b, count(*) FROM
(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube;
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/groupby_rollup1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_rollup1.q b/ql/src/test/queries/clientpositive/groupby_rollup1.q
index 23cac80..f30ace0 100644
--- a/ql/src/test/queries/clientpositive/groupby_rollup1.q
+++ b/ql/src/test/queries/clientpositive/groupby_rollup1.q
@@ -39,7 +39,7 @@ CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE;
EXPLAIN
FROM T1
INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup
-INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup;
+INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by rollup(key, val);
FROM T1
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q
index 928f6fb..e1a6ead 100644
--- a/ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q
+++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_grouping_operators.q
@@ -12,6 +12,7 @@ CREATE TABLE test_table_out_2 (key STRING, value STRING, grouping_key STRING, ag
-- Test rollup, should not be bucketed or sorted because its missing the grouping ID
EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP;
+SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, value);
INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT key, value, count(1) FROM src GROUP BY key, value WITH ROLLUP;
@@ -22,6 +23,7 @@ DESCRIBE FORMATTED test_table_out PARTITION (part = '1');
INSERT OVERWRITE TABLE test_table_out_2 PARTITION (part = '1')
SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY key, value WITH ROLLUP;
+SELECT key, value, GROUPING__ID, count(1) FROM src GROUP BY ROLLUP (key, value);
DESCRIBE FORMATTED test_table_out_2 PARTITION (part = '1');
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/limit_pushdown2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/limit_pushdown2.q b/ql/src/test/queries/clientpositive/limit_pushdown2.q
index 637b5b0..e222763 100644
--- a/ql/src/test/queries/clientpositive/limit_pushdown2.q
+++ b/ql/src/test/queries/clientpositive/limit_pushdown2.q
@@ -73,6 +73,11 @@ select key, value, avg(key + 1) from src
group by value, key with rollup
order by key, value limit 20;
+explain
+select key, value, avg(key + 1) from src
+group by rollup(value, key)
+order by key, value limit 20;
+
select key, value, avg(key + 1) from src
group by value, key with rollup
order by key, value limit 20;
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/queries/clientpositive/vector_grouping_sets.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_grouping_sets.q b/ql/src/test/queries/clientpositive/vector_grouping_sets.q
index 09ba6b6..ec5a3c7 100644
--- a/ql/src/test/queries/clientpositive/vector_grouping_sets.q
+++ b/ql/src/test/queries/clientpositive/vector_grouping_sets.q
@@ -61,4 +61,9 @@ select s_store_id, GROUPING__ID
select s_store_id, GROUPING__ID
from store
- group by s_store_id with rollup;
\ No newline at end of file
+ group by s_store_id with rollup;
+
+ explain
+select s_store_id, GROUPING__ID
+ from store
+ group by rollup(s_store_id);
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index f6971a0..99be3c1 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -444,6 +444,56 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select state,locid from loc_orc group by rollup( state,locid )
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select state,locid from loc_orc group by rollup( state,locid )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
-- Case 8: column stats, grouping sets - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state))
@@ -934,6 +984,56 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select state,locid from loc_orc group by rollup (state,locid)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select state,locid from loc_orc group by rollup (state,locid)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ pruneGroupingSetId: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
explain select state,locid from loc_orc group by state,locid grouping sets((state))
http://git-wip-us.apache.org/repos/asf/hive/blob/444af207/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index f5b4375..f1ddd87 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -450,6 +450,59 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select state,locid from loc_orc group by rollup (state,locid)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select state,locid from loc_orc group by rollup (state,locid)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: state, locid
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
-- Case 8: column stats, grouping sets - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state))
@@ -961,6 +1014,59 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select state,locid from loc_orc group by rollup (state,locid)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select state,locid from loc_orc group by rollup (state,locid)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: state (type: string), locid (type: int), '0' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: state, locid
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: state, locid
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
-- Case 7: NO column stats - cardinality = 4
explain select state,locid from loc_orc group by state,locid grouping sets((state))