You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2013/10/01 03:03:01 UTC
svn commit: r1527843 - in /hive/branches/branch-0.12/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/correlation/
test/queries/clientpositive/ test/results/clientpositive/
Author: thejas
Date: Tue Oct 1 01:03:00 2013
New Revision: 1527843
URL: http://svn.apache.org/r1527843
Log:
HIVE-5357: ReduceSinkDeDuplication optimizer pick the wrong keys in pRS-cGBYm-cRS-cGBYr scenario when there are distinct keys in child GBY (Chun Chen via Ashutosh Chauhan)
Modified:
hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
Modified: hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1527843&r1=1527842&r2=1527843&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java (original)
+++ hive/branches/branch-0.12/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java Tue Oct 1 01:03:00 2013
@@ -378,7 +378,8 @@ public final class CorrelationUtilities
// copies desc of cGBYm to cGBYr and remove cGBYm and cRS
GroupByOperator cGBYm = (GroupByOperator) parent;
- cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
+ cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
+ .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);
Modified: hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1527843&r1=1527842&r2=1527843&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q (original)
+++ hive/branches/branch-0.12/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q Tue Oct 1 01:03:00 2013
@@ -18,6 +18,7 @@ explain select src.key, sum(src.key) FRO
explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
-- mGBY-RS-rGBY-mGBY-RS-rGBY
explain from (select key, value from src group by key, value) s select s.key group by s.key;
+explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value);
@@ -26,6 +27,7 @@ select key, sum(key) as value from src g
select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
from (select key, value from src group by key, value) s select s.key group by s.key;
+select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
set hive.map.aggr=false;
@@ -41,6 +43,7 @@ explain select src.key, sum(src.key) FRO
explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
-- RS-GBY-RS-GBY
explain from (select key, value from src group by key, value) s select s.key group by s.key;
+explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key;
select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value);
@@ -49,3 +52,4 @@ select key, sum(key) as value from src g
select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
from (select key, value from src group by key, value) s select s.key group by s.key;
+select key, count(distinct value) from (select key, value from src group by key, value) t group by key;
Modified: hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1527843&r1=1527842&r2=1527843&view=diff
==============================================================================
--- hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original)
+++ hive/branches/branch-0.12/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue Oct 1 01:03:00 2013
@@ -648,6 +648,96 @@ STAGE PLANS:
limit: -1
+PREHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
PREHOOK: query: select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -2305,6 +2395,323 @@ POSTHOOK: Input: default@src
96
97
98
+PREHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1
+10 1
+100 1
+103 1
+104 1
+105 1
+11 1
+111 1
+113 1
+114 1
+116 1
+118 1
+119 1
+12 1
+120 1
+125 1
+126 1
+128 1
+129 1
+131 1
+133 1
+134 1
+136 1
+137 1
+138 1
+143 1
+145 1
+146 1
+149 1
+15 1
+150 1
+152 1
+153 1
+155 1
+156 1
+157 1
+158 1
+160 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+17 1
+170 1
+172 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+18 1
+180 1
+181 1
+183 1
+186 1
+187 1
+189 1
+19 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+199 1
+2 1
+20 1
+200 1
+201 1
+202 1
+203 1
+205 1
+207 1
+208 1
+209 1
+213 1
+214 1
+216 1
+217 1
+218 1
+219 1
+221 1
+222 1
+223 1
+224 1
+226 1
+228 1
+229 1
+230 1
+233 1
+235 1
+237 1
+238 1
+239 1
+24 1
+241 1
+242 1
+244 1
+247 1
+248 1
+249 1
+252 1
+255 1
+256 1
+257 1
+258 1
+26 1
+260 1
+262 1
+263 1
+265 1
+266 1
+27 1
+272 1
+273 1
+274 1
+275 1
+277 1
+278 1
+28 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+291 1
+292 1
+296 1
+298 1
+30 1
+302 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+315 1
+316 1
+317 1
+318 1
+321 1
+322 1
+323 1
+325 1
+327 1
+33 1
+331 1
+332 1
+333 1
+335 1
+336 1
+338 1
+339 1
+34 1
+341 1
+342 1
+344 1
+345 1
+348 1
+35 1
+351 1
+353 1
+356 1
+360 1
+362 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+37 1
+373 1
+374 1
+375 1
+377 1
+378 1
+379 1
+382 1
+384 1
+386 1
+389 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+399 1
+4 1
+400 1
+401 1
+402 1
+403 1
+404 1
+406 1
+407 1
+409 1
+41 1
+411 1
+413 1
+414 1
+417 1
+418 1
+419 1
+42 1
+421 1
+424 1
+427 1
+429 1
+43 1
+430 1
+431 1
+432 1
+435 1
+436 1
+437 1
+438 1
+439 1
+44 1
+443 1
+444 1
+446 1
+448 1
+449 1
+452 1
+453 1
+454 1
+455 1
+457 1
+458 1
+459 1
+460 1
+462 1
+463 1
+466 1
+467 1
+468 1
+469 1
+47 1
+470 1
+472 1
+475 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+487 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+5 1
+51 1
+53 1
+54 1
+57 1
+58 1
+64 1
+65 1
+66 1
+67 1
+69 1
+70 1
+72 1
+74 1
+76 1
+77 1
+78 1
+8 1
+80 1
+82 1
+83 1
+84 1
+85 1
+86 1
+87 1
+9 1
+90 1
+92 1
+95 1
+96 1
+97 1
+98 1
PREHOOK: query: -- RS-RS-GBY
explain select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key
PREHOOK: type: QUERY
@@ -2912,6 +3319,87 @@ STAGE PLANS:
limit: -1
+PREHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
PREHOOK: query: select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -4569,3 +5057,320 @@ POSTHOOK: Input: default@src
96
97
98
+PREHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct value) from (select key, value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1
+10 1
+100 1
+103 1
+104 1
+105 1
+11 1
+111 1
+113 1
+114 1
+116 1
+118 1
+119 1
+12 1
+120 1
+125 1
+126 1
+128 1
+129 1
+131 1
+133 1
+134 1
+136 1
+137 1
+138 1
+143 1
+145 1
+146 1
+149 1
+15 1
+150 1
+152 1
+153 1
+155 1
+156 1
+157 1
+158 1
+160 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+17 1
+170 1
+172 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+18 1
+180 1
+181 1
+183 1
+186 1
+187 1
+189 1
+19 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+199 1
+2 1
+20 1
+200 1
+201 1
+202 1
+203 1
+205 1
+207 1
+208 1
+209 1
+213 1
+214 1
+216 1
+217 1
+218 1
+219 1
+221 1
+222 1
+223 1
+224 1
+226 1
+228 1
+229 1
+230 1
+233 1
+235 1
+237 1
+238 1
+239 1
+24 1
+241 1
+242 1
+244 1
+247 1
+248 1
+249 1
+252 1
+255 1
+256 1
+257 1
+258 1
+26 1
+260 1
+262 1
+263 1
+265 1
+266 1
+27 1
+272 1
+273 1
+274 1
+275 1
+277 1
+278 1
+28 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+291 1
+292 1
+296 1
+298 1
+30 1
+302 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+315 1
+316 1
+317 1
+318 1
+321 1
+322 1
+323 1
+325 1
+327 1
+33 1
+331 1
+332 1
+333 1
+335 1
+336 1
+338 1
+339 1
+34 1
+341 1
+342 1
+344 1
+345 1
+348 1
+35 1
+351 1
+353 1
+356 1
+360 1
+362 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+37 1
+373 1
+374 1
+375 1
+377 1
+378 1
+379 1
+382 1
+384 1
+386 1
+389 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+399 1
+4 1
+400 1
+401 1
+402 1
+403 1
+404 1
+406 1
+407 1
+409 1
+41 1
+411 1
+413 1
+414 1
+417 1
+418 1
+419 1
+42 1
+421 1
+424 1
+427 1
+429 1
+43 1
+430 1
+431 1
+432 1
+435 1
+436 1
+437 1
+438 1
+439 1
+44 1
+443 1
+444 1
+446 1
+448 1
+449 1
+452 1
+453 1
+454 1
+455 1
+457 1
+458 1
+459 1
+460 1
+462 1
+463 1
+466 1
+467 1
+468 1
+469 1
+47 1
+470 1
+472 1
+475 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+487 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+5 1
+51 1
+53 1
+54 1
+57 1
+58 1
+64 1
+65 1
+66 1
+67 1
+69 1
+70 1
+72 1
+74 1
+76 1
+77 1
+78 1
+8 1
+80 1
+82 1
+83 1
+84 1
+85 1
+86 1
+87 1
+9 1
+90 1
+92 1
+95 1
+96 1
+97 1
+98 1