You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/01/13 10:35:44 UTC
hive git commit: HIVE-12640 : Allow StatsOptimizer to optimize the
query for Constant GroupBy keys (Hari Subramaniyan,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 9f1ff4f17 -> de30fe4e2
HIVE-12640 : Allow StatsOptimizer to optimize the query for Constant GroupBy keys (Hari Subramaniyan, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de30fe4e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de30fe4e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de30fe4e
Branch: refs/heads/master
Commit: de30fe4e2dd9d3aa62d522fbe340cce549f0d3f5
Parents: 9f1ff4f
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Wed Jan 13 01:35:34 2016 -0800
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Wed Jan 13 01:35:34 2016 -0800
----------------------------------------------------------------------
.../hive/ql/optimizer/StatsOptimizer.java | 22 ++++++++--
.../clientpositive/metadata_only_queries.q | 6 +++
.../clientpositive/metadata_only_queries.q.out | 46 ++++++++++++++++++++
.../spark/metadata_only_queries.q.out | 46 ++++++++++++++++++++
.../tez/metadata_only_queries.q.out | 46 ++++++++++++++++++++
5 files changed, 162 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 03c1c3f..03dcf9f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
@@ -199,6 +200,21 @@ public class StatsOptimizer extends Transform {
}
}
+ private boolean hasNullOrConstantGbyKey(GroupByOperator gbyOp) {
+ GroupByDesc gbyDesc = gbyOp.getConf();
+ // If the Group by operator has null key
+ if (gbyDesc.getOutputColumnNames().size() ==
+ gbyDesc.getAggregators().size()) {
+ return true;
+ }
+ for (ExprNodeDesc en :gbyDesc.getKeys()) {
+ if (!(en instanceof ExprNodeConstantDesc)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
@@ -227,8 +243,7 @@ public class StatsOptimizer extends Transform {
// Since we have done an exact match on TS-SEL-GBY-RS-GBY-(SEL)-FS
// we need not to do any instanceof checks for following.
GroupByOperator pgbyOp = (GroupByOperator)stack.get(2);
- if (pgbyOp.getConf().getOutputColumnNames().size() !=
- pgbyOp.getConf().getAggregators().size()) {
+ if (!hasNullOrConstantGbyKey(pgbyOp)) {
return null;
}
ReduceSinkOperator rsOp = (ReduceSinkOperator)stack.get(3);
@@ -238,8 +253,7 @@ public class StatsOptimizer extends Transform {
}
GroupByOperator cgbyOp = (GroupByOperator)stack.get(4);
- if (cgbyOp.getConf().getOutputColumnNames().size() !=
- cgbyOp.getConf().getAggregators().size()) {
+ if (!hasNullOrConstantGbyKey(cgbyOp)) {
return null;
}
Operator<?> last = (Operator<?>) stack.get(5);
http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/queries/clientpositive/metadata_only_queries.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q
index bce121d..cc72bb3 100644
--- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
+++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -93,6 +93,12 @@ select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as thre
explain select count(ts) from stats_tbl_part;
+explain select count('1') from stats_tbl group by '1';
+select count('1') from stats_tbl group by '1';
+
+explain select count('1') from stats_tbl_part group by '1';
+select count('1') from stats_tbl_part group by '1';
+
drop table stats_tbl;
drop table stats_tbl_part;
http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
index 65a4dfa..2f782a9 100644
--- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
@@ -620,6 +620,52 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+9999
+PREHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+9489
PREHOOK: query: drop table stats_tbl
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_tbl
http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index 0d85f4e..359a9dd 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -650,6 +650,52 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+9999
+PREHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+9489
PREHOOK: query: drop table stats_tbl
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_tbl
http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
index ab86ab0..14fbf0e 100644
--- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
@@ -650,6 +650,52 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+9999
+PREHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+9489
PREHOOK: query: drop table stats_tbl
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@stats_tbl