You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/12/13 06:20:19 UTC

hive git commit: HIVE-13452: StatsOptimizer should return no rows on empty table with group by (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 4f2fd77b7 -> cccd38422


HIVE-13452: StatsOptimizer should return no rows on empty table with group by (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cccd3842
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cccd3842
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cccd3842

Branch: refs/heads/master
Commit: cccd38422ee8d3410d53064e282c42ad165862c7
Parents: 4f2fd77
Author: Pengcheng Xiong <px...@apache.org>
Authored: Mon Dec 12 22:20:05 2016 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Mon Dec 12 22:20:05 2016 -0800

----------------------------------------------------------------------
 .../hive/ql/optimizer/StatsOptimizer.java       |  47 +++--
 ql/src/test/queries/clientpositive/constGby.q   |  21 +++
 .../test/results/clientpositive/constGby.q.out  | 176 +++++++++++++++++++
 3 files changed, 228 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cccd3842/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 17510e9..32d1de1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -178,6 +178,10 @@ public class StatsOptimizer extends Transform {
       abstract Object cast(double doubleValue);
     }
 
+    enum GbyKeyType {
+      NULL, CONSTANT, OTHER
+    }
+
     private StatType getType(String origType) {
       if (serdeConstants.IntegralTypes.contains(origType)) {
         return StatType.Integeral;
@@ -212,23 +216,23 @@ public class StatsOptimizer extends Transform {
       }
     }
 
-    private boolean hasNullOrConstantGbyKey(GroupByOperator gbyOp) {
+    private GbyKeyType getGbyKeyType(GroupByOperator gbyOp) {
       GroupByDesc gbyDesc = gbyOp.getConf();
       int numCols = gbyDesc.getOutputColumnNames().size();
       int aggCols = gbyDesc.getAggregators().size();
       // If the Group by operator has null key
       if (numCols == aggCols) {
-        return true;
+        return GbyKeyType.NULL;
       }
       // If the Gby key is a constant
       List<String> dpCols = gbyOp.getSchema().getColumnNames().subList(0, numCols - aggCols);
       for(String dpCol : dpCols) {
         ExprNodeDesc end = ExprNodeDescUtils.findConstantExprOrigin(dpCol, gbyOp);
         if (!(end instanceof ExprNodeConstantDesc)) {
-          return false;
+          return GbyKeyType.OTHER;
         }
       }
-      return true;
+      return GbyKeyType.CONSTANT;
     }
 
     @Override
@@ -267,6 +271,16 @@ public class StatsOptimizer extends Transform {
           // limit. In order to be safe, we do not use it now.
           return null;
         }
+        Table tbl = tsOp.getConf().getTableMetadata();
+        if (AcidUtils.isAcidTable(tbl)) {
+          Logger.info("Table " + tbl.getTableName() + " is ACID table. Skip StatsOptimizer.");
+          return null;
+        }
+        Long rowCnt = getRowCnt(pctx, tsOp, tbl);
+        // if we can not have correct table stats, then both the table stats and column stats are not useful.
+        if (rowCnt == null) {
+          return null;
+        }
         SelectOperator pselOp = (SelectOperator)stack.get(1);
         for(ExprNodeDesc desc : pselOp.getConf().getColList()) {
           if (!((desc instanceof ExprNodeColumnDesc) || (desc instanceof ExprNodeConstantDesc))) {
@@ -278,7 +292,12 @@ public class StatsOptimizer extends Transform {
         // Since we have done an exact match on TS-SEL-GBY-RS-GBY-(SEL)-FS
         // we need not to do any instanceof checks for following.
         GroupByOperator pgbyOp = (GroupByOperator)stack.get(2);
-        if (!hasNullOrConstantGbyKey(pgbyOp)) {
+        if (getGbyKeyType(pgbyOp) == GbyKeyType.OTHER) {
+          return null;
+        }
+        // we already check if rowCnt is null and rowCnt==0 means table is
+        // empty.
+        else if (getGbyKeyType(pgbyOp) == GbyKeyType.CONSTANT && rowCnt == 0) {
           return null;
         }
         ReduceSinkOperator rsOp = (ReduceSinkOperator)stack.get(3);
@@ -288,7 +307,12 @@ public class StatsOptimizer extends Transform {
         }
 
         GroupByOperator cgbyOp = (GroupByOperator)stack.get(4);
-        if (!hasNullOrConstantGbyKey(cgbyOp)) {
+        if (getGbyKeyType(cgbyOp) == GbyKeyType.OTHER) {
+          return null;
+        }
+        // we already check if rowCnt is null and rowCnt==0 means table is
+        // empty.
+        else if (getGbyKeyType(cgbyOp) == GbyKeyType.CONSTANT && rowCnt == 0) {
           return null;
         }
         Operator<?> last = (Operator<?>) stack.get(5);
@@ -318,11 +342,6 @@ public class StatsOptimizer extends Transform {
           return null;  // todo we can collapse this part of tree into single TS
         }
 
-        Table tbl = tsOp.getConf().getTableMetadata();
-        if (AcidUtils.isAcidTable(tbl)) {
-          Logger.info("Table " + tbl.getTableName() + " is ACID table. Skip StatsOptimizer.");
-          return null;
-        }
         List<Object> oneRow = new ArrayList<Object>();
 
         Hive hive = Hive.get(pctx.getConf());
@@ -350,10 +369,6 @@ public class StatsOptimizer extends Transform {
             } else {
               return null;
             }
-            Long rowCnt = getRowCnt(pctx, tsOp, tbl);
-            if(rowCnt == null) {
-              return null;
-            }
             switch (category) {
               case LONG:
                 oneRow.add(Long.valueOf(constant) * rowCnt);
@@ -370,7 +385,7 @@ public class StatsOptimizer extends Transform {
           }
           else if (udaf instanceof GenericUDAFCount) {
             // always long
-            Long rowCnt = 0L;
+            rowCnt = 0L;
             if (aggr.getParameters().isEmpty()) {
               // Its either count (*) or count() case
               rowCnt = getRowCnt(pctx, tsOp, tbl);

http://git-wip-us.apache.org/repos/asf/hive/blob/cccd3842/ql/src/test/queries/clientpositive/constGby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constGby.q b/ql/src/test/queries/clientpositive/constGby.q
new file mode 100644
index 0000000..bde2e79
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constGby.q
@@ -0,0 +1,21 @@
+set hive.mapred.mode=nonstrict;
+
+create table t1 (a int);
+analyze table t1 compute statistics;
+analyze table t1 compute statistics for columns;
+
+explain select count(1) from t1 group by 1;
+select count(1) from t1 group by 1;
+select count(1) from t1;
+explain select count(*) from t1;
+select count(*) from t1;
+select count(1) from t1 group by 1=1;
+select count(1), max(a) from t1 group by 1=1;
+
+set hive.compute.query.using.stats=false;
+
+select count(1) from t1 group by 1;
+select count(1) from t1;
+select count(*) from t1;
+select count(1) from t1 group by 1=1;
+select count(1), max(a) from t1 group by 1=1;

http://git-wip-us.apache.org/repos/asf/hive/blob/cccd3842/ql/src/test/results/clientpositive/constGby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constGby.q.out b/ql/src/test/results/clientpositive/constGby.q.out
new file mode 100644
index 0000000..fd8ecc2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/constGby.q.out
@@ -0,0 +1,176 @@
+PREHOOK: query: create table t1 (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: analyze table t1 compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: analyze table t1 compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: analyze table t1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(1) from t1 group by 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from t1 group by 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Group By Operator
+                aggregations: count(1)
+                keys: 1 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col1 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1) from t1 group by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1 group by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: select count(1) from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0
+PREHOOK: query: explain select count(*) from t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0
+PREHOOK: query: select count(1) from t1 group by 1=1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1 group by 1=1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: select count(1), max(a) from t1 group by 1=1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), max(a) from t1 group by 1=1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: select count(1) from t1 group by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1 group by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: select count(1) from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0
+PREHOOK: query: select count(*) from t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+0
+PREHOOK: query: select count(1) from t1 group by 1=1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from t1 group by 1=1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: select count(1), max(a) from t1 group by 1=1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), max(a) from t1 group by 1=1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####