You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/10/01 03:03:01 UTC

svn commit: r1527843 - in /hive/branches/branch-0.12/ql/src: java/org/apache/hadoop/hive/ql/optimizer/correlation/ test/queries/clientpositive/ test/results/clientpositive/

Author: thejas
Date: Tue Oct  1 01:03:00 2013
New Revision: 1527843

URL: http://svn.apache.org/r1527843
Log:
HIVE-5357: ReduceSinkDeDuplication optimizer pick the wrong keys in pRS-cGBYm-cRS-cGBYr scenario when there are distinct keys in child GBY (Chun Chen via Ashutosh Chauhan)

Modified:
    hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
    hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
    hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out

Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1527843&r1=1527842&r2=1527843&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java Tue Oct  1 01:03:00 2013
@@ -378,7 +378,8 @@ public final class CorrelationUtilities 
       // copies desc of cGBYm to cGBYr and remove cGBYm and cRS
       GroupByOperator cGBYm = (GroupByOperator) parent;
 
-      cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
+      cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
+              .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
       cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
       for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
         aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);

Modified: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1527843&r1=1527842&r2=1527843&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q (original)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q Tue Oct  1 01:03:00 2013
@@ -18,6 +18,7 @@ explain select src.key, sum(src.key) FRO
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
 -- mGBY-RS-rGBY-mGBY-RS-rGBY
 explain from (select key, value from src group by key, value) s select s.key group by s.key;
+explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
 select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value);
@@ -26,6 +27,7 @@ select key, sum(key) as value from src g
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
 from (select key, value from src group by key, value) s select s.key group by s.key;
+select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
 
 set hive.map.aggr=false;
 
@@ -41,6 +43,7 @@ explain select src.key, sum(src.key) FRO
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
 -- RS-GBY-RS-GBY
 explain from (select key, value from src group by key, value) s select s.key group by s.key;
+explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
 select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value);
@@ -49,3 +52,4 @@ select key, sum(key) as value from src g
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
 from (select key, value from src group by key, value) s select s.key group by s.key;
+select key, count(distinct value) from (select key, value from src group by key, value) t group by key;

Modified: hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1527843&r1=1527842&r2=1527843&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue Oct  1 01:03:00 2013
@@ -648,6 +648,96 @@ STAGE PLANS:
       limit: -1
 
 
+PREHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Group By Operator
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            Group By Operator
+              aggregations:
+                    expr: count(DISTINCT _col1)
+              bucketGroup: false
+              keys:
+                    expr: _col0
+                    type: string
+              mode: complete
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
 PREHOOK: query: select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -2305,6 +2395,323 @@ POSTHOOK: Input: default@src
 96
 97
 98
+PREHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	1
+10	1
+100	1
+103	1
+104	1
+105	1
+11	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+12	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+15	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+17	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+18	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+19	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+2	1
+20	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+24	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+26	1
+260	1
+262	1
+263	1
+265	1
+266	1
+27	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+28	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+30	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+33	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+34	1
+341	1
+342	1
+344	1
+345	1
+348	1
+35	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+37	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+4	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+41	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+42	1
+421	1
+424	1
+427	1
+429	1
+43	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+44	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+47	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
+5	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+8	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+9	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1
 PREHOOK: query: -- RS-RS-GBY
 explain select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key
 PREHOOK: type: QUERY
@@ -2912,6 +3319,87 @@ STAGE PLANS:
       limit: -1
 
 
+PREHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                sort order: ++
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: complete
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            Group By Operator
+              aggregations:
+                    expr: count(DISTINCT _col1)
+              bucketGroup: false
+              keys:
+                    expr: _col0
+                    type: string
+              mode: complete
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
 PREHOOK: query: select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -4569,3 +5057,320 @@ POSTHOOK: Input: default@src
 96
 97
 98
+PREHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	1
+10	1
+100	1
+103	1
+104	1
+105	1
+11	1
+111	1
+113	1
+114	1
+116	1
+118	1
+119	1
+12	1
+120	1
+125	1
+126	1
+128	1
+129	1
+131	1
+133	1
+134	1
+136	1
+137	1
+138	1
+143	1
+145	1
+146	1
+149	1
+15	1
+150	1
+152	1
+153	1
+155	1
+156	1
+157	1
+158	1
+160	1
+162	1
+163	1
+164	1
+165	1
+166	1
+167	1
+168	1
+169	1
+17	1
+170	1
+172	1
+174	1
+175	1
+176	1
+177	1
+178	1
+179	1
+18	1
+180	1
+181	1
+183	1
+186	1
+187	1
+189	1
+19	1
+190	1
+191	1
+192	1
+193	1
+194	1
+195	1
+196	1
+197	1
+199	1
+2	1
+20	1
+200	1
+201	1
+202	1
+203	1
+205	1
+207	1
+208	1
+209	1
+213	1
+214	1
+216	1
+217	1
+218	1
+219	1
+221	1
+222	1
+223	1
+224	1
+226	1
+228	1
+229	1
+230	1
+233	1
+235	1
+237	1
+238	1
+239	1
+24	1
+241	1
+242	1
+244	1
+247	1
+248	1
+249	1
+252	1
+255	1
+256	1
+257	1
+258	1
+26	1
+260	1
+262	1
+263	1
+265	1
+266	1
+27	1
+272	1
+273	1
+274	1
+275	1
+277	1
+278	1
+28	1
+280	1
+281	1
+282	1
+283	1
+284	1
+285	1
+286	1
+287	1
+288	1
+289	1
+291	1
+292	1
+296	1
+298	1
+30	1
+302	1
+305	1
+306	1
+307	1
+308	1
+309	1
+310	1
+311	1
+315	1
+316	1
+317	1
+318	1
+321	1
+322	1
+323	1
+325	1
+327	1
+33	1
+331	1
+332	1
+333	1
+335	1
+336	1
+338	1
+339	1
+34	1
+341	1
+342	1
+344	1
+345	1
+348	1
+35	1
+351	1
+353	1
+356	1
+360	1
+362	1
+364	1
+365	1
+366	1
+367	1
+368	1
+369	1
+37	1
+373	1
+374	1
+375	1
+377	1
+378	1
+379	1
+382	1
+384	1
+386	1
+389	1
+392	1
+393	1
+394	1
+395	1
+396	1
+397	1
+399	1
+4	1
+400	1
+401	1
+402	1
+403	1
+404	1
+406	1
+407	1
+409	1
+41	1
+411	1
+413	1
+414	1
+417	1
+418	1
+419	1
+42	1
+421	1
+424	1
+427	1
+429	1
+43	1
+430	1
+431	1
+432	1
+435	1
+436	1
+437	1
+438	1
+439	1
+44	1
+443	1
+444	1
+446	1
+448	1
+449	1
+452	1
+453	1
+454	1
+455	1
+457	1
+458	1
+459	1
+460	1
+462	1
+463	1
+466	1
+467	1
+468	1
+469	1
+47	1
+470	1
+472	1
+475	1
+477	1
+478	1
+479	1
+480	1
+481	1
+482	1
+483	1
+484	1
+485	1
+487	1
+489	1
+490	1
+491	1
+492	1
+493	1
+494	1
+495	1
+496	1
+497	1
+498	1
+5	1
+51	1
+53	1
+54	1
+57	1
+58	1
+64	1
+65	1
+66	1
+67	1
+69	1
+70	1
+72	1
+74	1
+76	1
+77	1
+78	1
+8	1
+80	1
+82	1
+83	1
+84	1
+85	1
+86	1
+87	1
+9	1
+90	1
+92	1
+95	1
+96	1
+97	1
+98	1