You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/01/13 10:35:44 UTC

hive git commit: HIVE-12640 : Allow StatsOptimizer to optimize the query for Constant GroupBy keys (Hari Subramaniyan, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 9f1ff4f17 -> de30fe4e2


HIVE-12640 : Allow StatsOptimizer to optimize the query for Constant GroupBy keys (Hari Subramaniyan, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de30fe4e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de30fe4e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de30fe4e

Branch: refs/heads/master
Commit: de30fe4e2dd9d3aa62d522fbe340cce549f0d3f5
Parents: 9f1ff4f
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Wed Jan 13 01:35:34 2016 -0800
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Wed Jan 13 01:35:34 2016 -0800

----------------------------------------------------------------------
 .../hive/ql/optimizer/StatsOptimizer.java       | 22 ++++++++--
 .../clientpositive/metadata_only_queries.q      |  6 +++
 .../clientpositive/metadata_only_queries.q.out  | 46 ++++++++++++++++++++
 .../spark/metadata_only_queries.q.out           | 46 ++++++++++++++++++++
 .../tez/metadata_only_queries.q.out             | 46 ++++++++++++++++++++
 5 files changed, 162 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 03c1c3f..03dcf9f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
@@ -199,6 +200,21 @@ public class StatsOptimizer extends Transform {
       }
     }
 
+    private boolean hasNullOrConstantGbyKey(GroupByOperator gbyOp) {
+      GroupByDesc gbyDesc = gbyOp.getConf();
+      // If the Group by operator has null key
+      if (gbyDesc.getOutputColumnNames().size() ==
+        gbyDesc.getAggregators().size()) {
+        return true;
+      }
+      for (ExprNodeDesc en :gbyDesc.getKeys()) {
+        if (!(en instanceof ExprNodeConstantDesc)) {
+          return false;
+        }
+      }
+      return true;
+    }
+
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
@@ -227,8 +243,7 @@ public class StatsOptimizer extends Transform {
         // Since we have done an exact match on TS-SEL-GBY-RS-GBY-(SEL)-FS
         // we need not to do any instanceof checks for following.
         GroupByOperator pgbyOp = (GroupByOperator)stack.get(2);
-        if (pgbyOp.getConf().getOutputColumnNames().size() !=
-            pgbyOp.getConf().getAggregators().size()) {
+        if (!hasNullOrConstantGbyKey(pgbyOp)) {
           return null;
         }
         ReduceSinkOperator rsOp = (ReduceSinkOperator)stack.get(3);
@@ -238,8 +253,7 @@ public class StatsOptimizer extends Transform {
         }
 
         GroupByOperator cgbyOp = (GroupByOperator)stack.get(4);
-        if (cgbyOp.getConf().getOutputColumnNames().size() !=
-            cgbyOp.getConf().getAggregators().size()) {
+        if (!hasNullOrConstantGbyKey(cgbyOp)) {
           return null;
         }
         Operator<?> last = (Operator<?>) stack.get(5);

http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/queries/clientpositive/metadata_only_queries.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q
index bce121d..cc72bb3 100644
--- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
+++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -93,6 +93,12 @@ select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as thre
 
 explain select count(ts) from stats_tbl_part;
 
+explain select count('1') from stats_tbl group by '1';
+select count('1') from stats_tbl group by '1';
+
+explain select count('1') from stats_tbl_part group by '1';
+select count('1') from stats_tbl_part group by '1';
+
 drop table stats_tbl;
 drop table stats_tbl_part;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
index 65a4dfa..2f782a9 100644
--- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
@@ -620,6 +620,52 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+9999
+PREHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+9489
 PREHOOK: query: drop table stats_tbl
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@stats_tbl

http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index 0d85f4e..359a9dd 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -650,6 +650,52 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+9999
+PREHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+9489
 PREHOOK: query: drop table stats_tbl
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@stats_tbl

http://git-wip-us.apache.org/repos/asf/hive/blob/de30fe4e/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
index ab86ab0..14fbf0e 100644
--- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
@@ -650,6 +650,52 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count('1') from stats_tbl group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+9999
+PREHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count('1') from stats_tbl_part group by '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select count('1') from stats_tbl_part group by '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+9489
 PREHOOK: query: drop table stats_tbl
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@stats_tbl