You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/01/11 23:08:32 UTC
hive git commit: HIVE-12824 : CBO doesnt get triggered when aggregate
function is used within windowing function (Ashutosh Chauhan via Jesus
Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/branch-2.0 06f8d74f7 -> 77c384da4
HIVE-12824 : CBO doesnt get triggered when aggregate function is used within windowing function (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/77c384da
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/77c384da
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/77c384da
Branch: refs/heads/branch-2.0
Commit: 77c384da402eca67d3bcc19c3e97fd4c625d2dc1
Parents: 06f8d74
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Fri Jan 8 17:45:47 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon Jan 11 14:08:12 2016 -0800
----------------------------------------------------------------------
.../translator/PlanModifierForASTConv.java | 14 ++++
.../test/queries/clientpositive/windowing_gby.q | 1 +
.../clientpositive/groupby_resolution.q.out | 4 +-
.../results/clientpositive/quotedid_basic.q.out | 32 +++++---
.../spark/groupby_resolution.q.out | 4 +-
.../clientpositive/tez/windowing_gby.q.out | 81 +++++++++++---------
6 files changed, 83 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/77c384da/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index b77beb8..e2fbb4f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -32,10 +32,12 @@ import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.SetOp;
import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Window.RexWinAggCall;
import org.apache.calcite.rel.rules.MultiJoin;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.util.Pair;
import org.slf4j.Logger;
@@ -295,6 +297,18 @@ public class PlanModifierForASTConv {
validParent = false;
}
+ if (parent instanceof Project) {
+ for (RexNode child : parent.getChildExps()) {
+ if (child instanceof RexOver || child instanceof RexWinAggCall) {
+ // Hive can't handle select rank() over(order by sum(c1)/sum(c2)) from t1 group by c3
+ // but can handle select rank() over (order by c4) from
+ // (select sum(c1)/sum(c2) as c4 from t1 group by c3) t2;
+ // so introduce a project on top of this gby.
+ return false;
+ }
+ }
+ }
+
return validParent;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/77c384da/ql/src/test/queries/clientpositive/windowing_gby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_gby.q b/ql/src/test/queries/clientpositive/windowing_gby.q
index b948f76..d844f11 100644
--- a/ql/src/test/queries/clientpositive/windowing_gby.q
+++ b/ql/src/test/queries/clientpositive/windowing_gby.q
@@ -1,3 +1,4 @@
+set hive.mapred.mode=nonstrict;
explain
select rank() over (order by return_ratio) as return_rank from
(select sum(wr.cint)/sum(ws.c_int) as return_ratio
http://git-wip-us.apache.org/repos/asf/hive/blob/77c384da/ql/src/test/results/clientpositive/groupby_resolution.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_resolution.q.out b/ql/src/test/results/clientpositive/groupby_resolution.q.out
index ea40014..9e58b75 100644
--- a/ql/src/test/results/clientpositive/groupby_resolution.q.out
+++ b/ql/src/test/results/clientpositive/groupby_resolution.q.out
@@ -666,10 +666,10 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: 0 (type: int)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: bigint)
+ value expressions: _col0 (type: string)
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint)
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
PTF Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/77c384da/ql/src/test/results/clientpositive/quotedid_basic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/quotedid_basic.q.out b/ql/src/test/results/clientpositive/quotedid_basic.q.out
index 519f647..43b63f2 100644
--- a/ql/src/test/results/clientpositive/quotedid_basic.q.out
+++ b/ql/src/test/results/clientpositive/quotedid_basic.q.out
@@ -175,12 +175,16 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -279,12 +283,16 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
http://git-wip-us.apache.org/repos/asf/hive/blob/77c384da/ql/src/test/results/clientpositive/spark/groupby_resolution.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_resolution.q.out b/ql/src/test/results/clientpositive/spark/groupby_resolution.q.out
index cb2c9bd..cef5b23 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_resolution.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_resolution.q.out
@@ -659,11 +659,11 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: 0 (type: int)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: bigint)
+ value expressions: _col0 (type: string)
Reducer 4
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint)
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
PTF Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/77c384da/ql/src/test/results/clientpositive/tez/windowing_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/windowing_gby.q.out b/ql/src/test/results/clientpositive/tez/windowing_gby.q.out
index e65533f..72d1f14 100644
--- a/ql/src/test/results/clientpositive/tez/windowing_gby.q.out
+++ b/ql/src/test/results/clientpositive/tez/windowing_gby.q.out
@@ -10,7 +10,7 @@ POSTHOOK: query: explain
from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
group by ws.c_boolean ) in_web
POSTHOOK: type: QUERY
-Plan not optimized by CBO.
+Plan optimized by CBO.
Vertex dependency in root stage
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
@@ -22,77 +22,84 @@ Stage-0
limit:-1
Stage-1
Reducer 4
- File Output Operator [FS_17]
+ File Output Operator [FS_19]
compressed:false
Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
- Select Operator [SEL_15]
+ Select Operator [SEL_17]
outputColumnNames:["_col0"]
Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
- PTF Operator [PTF_14]
- Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"name:":"windowingtablefunction","order by:":"_col0","partition by:":"0"}]
+ PTF Operator [PTF_16]
+ Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"name:":"windowingtablefunction","order by:":"(UDFToDouble(_col1) / UDFToDouble(_col2))","partition by:":"0"}]
Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
- Select Operator [SEL_13]
- | outputColumnNames:["_col0"]
+ Select Operator [SEL_15]
+ | outputColumnNames:["_col1","_col2"]
| Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
|<-Reducer 3 [SIMPLE_EDGE]
- Reduce Output Operator [RS_12]
- key expressions:0 (type: int), _col0 (type: double)
+ Reduce Output Operator [RS_14]
+ key expressions:0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double)
Map-reduce partition columns:0 (type: int)
sort order:++
Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
- Select Operator [SEL_11]
- outputColumnNames:["_col0"]
+ value expressions:_col1 (type: bigint), _col2 (type: bigint)
+ Select Operator [SEL_13]
+ outputColumnNames:["_col1","_col2"]
Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
- Group By Operator [GBY_10]
+ Group By Operator [GBY_12]
| aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"]
| keys:KEY._col0 (type: boolean)
| outputColumnNames:["_col0","_col1","_col2"]
| Statistics:Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
|<-Reducer 2 [SIMPLE_EDGE]
- Reduce Output Operator [RS_9]
+ Reduce Output Operator [RS_11]
key expressions:_col0 (type: boolean)
Map-reduce partition columns:_col0 (type: boolean)
sort order:+
Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
value expressions:_col1 (type: bigint), _col2 (type: bigint)
- Group By Operator [GBY_8]
- aggregations:["sum(_col10)","sum(_col2)"]
- keys:_col4 (type: boolean)
+ Group By Operator [GBY_10]
+ aggregations:["sum(_col3)","sum(_col1)"]
+ keys:_col2 (type: boolean)
outputColumnNames:["_col0","_col1","_col2"]
Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
- Select Operator [SEL_7]
- outputColumnNames:["_col4","_col10","_col2"]
+ Select Operator [SEL_9]
+ outputColumnNames:["_col2","_col3","_col1"]
Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator [MERGEJOIN_22]
+ Merge Join Operator [MERGEJOIN_24]
| condition map:[{"":"Inner Join 0 to 1"}]
- | keys:{"0":"value (type: string)","1":"cstring1 (type: string)"}
- | outputColumnNames:["_col2","_col4","_col10"]
+ | keys:{"0":"_col0 (type: string)","1":"_col1 (type: string)"}
+ | outputColumnNames:["_col1","_col2","_col3"]
| Statistics:Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
|<-Map 1 [SIMPLE_EDGE]
- | Reduce Output Operator [RS_3]
- | key expressions:value (type: string)
- | Map-reduce partition columns:value (type: string)
+ | Reduce Output Operator [RS_6]
+ | key expressions:_col0 (type: string)
+ | Map-reduce partition columns:_col0 (type: string)
| sort order:+
| Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
- | value expressions:c_int (type: int), c_boolean (type: boolean)
- | Filter Operator [FIL_20]
- | predicate:value is not null (type: boolean)
+ | value expressions:_col1 (type: int), _col2 (type: boolean)
+ | Select Operator [SEL_2]
+ | outputColumnNames:["_col0","_col1","_col2"]
| Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
- | TableScan [TS_0]
- | alias:ws
+ | Filter Operator [FIL_22]
+ | predicate:value is not null (type: boolean)
| Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ | TableScan [TS_0]
+ | alias:ws
+ | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
|<-Map 5 [SIMPLE_EDGE]
- Reduce Output Operator [RS_5]
- key expressions:cstring1 (type: string)
- Map-reduce partition columns:cstring1 (type: string)
+ Reduce Output Operator [RS_7]
+ key expressions:_col1 (type: string)
+ Map-reduce partition columns:_col1 (type: string)
sort order:+
Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- value expressions:cint (type: int)
- Filter Operator [FIL_21]
- predicate:cstring1 is not null (type: boolean)
+ value expressions:_col0 (type: int)
+ Select Operator [SEL_5]
+ outputColumnNames:["_col0","_col1"]
Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- TableScan [TS_1]
- alias:wr
+ Filter Operator [FIL_23]
+ predicate:cstring1 is not null (type: boolean)
Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ TableScan [TS_3]
+ alias:wr
+ Statistics:Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE