You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/01/17 07:52:46 UTC

svn commit: r1559030 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java test/queries/clientpositive/metadata_only_queries.q test/results/clientpositive/metadata_only_queries.q.out

Author: hashutosh
Date: Fri Jan 17 06:52:46 2014
New Revision: 1559030

URL: http://svn.apache.org/r1559030
Log:
HIVE-6192 : Optimize sum(1) to answer query using metadata (Ashutosh Chauhan via Thejas Nair)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q
    hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1559030&r1=1559029&r2=1559030&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Fri Jan 17 06:52:46 2014
@@ -10,6 +10,7 @@ import java.util.Stack;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.Description;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -42,12 +44,15 @@ import org.apache.hadoop.hive.ql.plan.Fe
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 
 /** There is a set of queries which can be answered entirely from statistics stored in metastore.
  * Examples of such queries are count(*), count(a), max(a), min(b) etc. Hive already collects
@@ -181,30 +186,29 @@ public class StatsOptimizer implements T
         Hive hive = Hive.get(pctx.getConf());
 
         for (AggregationDesc aggr : aggrs) {
-          if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation(
+          if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation(
               Description.class).name())) {
-            long rowCnt = 0;
+              if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){
+                return null;
+              }
+              Long rowCnt = getRowCnt(hive, tbl);
+              if(rowCnt == null) {
+                return null;
+              }
+              oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0))
+                .getValue().toString()).multiply(HiveDecimal.create(rowCnt)));
+              ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+                PrimitiveCategory.DECIMAL));
+          }
+          else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation(
+              Description.class).name())) {
+            Long rowCnt = 0L;
             if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
                 ExprNodeConstantDesc)) {
               // Its either count (*) or count(1) case
-              if(tbl.isPartitioned()) {
-                for (Partition part : hive.getAllPartitionsOf(tbl)) {
-                  long partRowCnt = Long.parseLong(part.getParameters()
-                    .get(StatsSetupConst.ROW_COUNT));
-                  if (partRowCnt < 1) {
-                    Log.debug("Partition doesn't have upto date stats " + part.getSpec());
-                    return null;
-                  }
-                  rowCnt += partRowCnt;
-                }
-              } else { // unpartitioned table
-                rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
-                if (rowCnt < 1) {
-                  // if rowCnt < 1 than its either empty table or table on which stats are not
-                  //  computed We assume the worse and don't attempt to optimize.
-                  Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
-                  return null;
-                }
+              rowCnt = getRowCnt(hive, tbl);
+              if(rowCnt == null) {
+            	  return null;
               }
             } else {
               // Its count(col) case
@@ -442,5 +446,29 @@ public class StatsOptimizer implements T
 
       return null;
     }
+    
+    private Long getRowCnt (Hive hive, Table tbl) throws HiveException {
+        Long rowCnt = 0L;
+    	if(tbl.isPartitioned()) {
+            for (Partition part : hive.getAllPartitionsOf(tbl)) {
+              long partRowCnt = Long.parseLong(part.getParameters()
+                .get(StatsSetupConst.ROW_COUNT));
+              if (partRowCnt < 1) {
+                Log.debug("Partition doesn't have upto date stats " + part.getSpec());
+                return null;
+              }
+              rowCnt += partRowCnt;
+            }
+          } else { // unpartitioned table
+            rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
+            if (rowCnt < 1) {
+              // if rowCnt < 1 than its either empty table or table on which stats are not
+              //  computed We assume the worse and don't attempt to optimize.
+              Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
+              rowCnt = null;
+            }
+          }
+    return rowCnt;
+    }
   }
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q?rev=1559030&r1=1559029&r2=1559030&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q Fri Jan 17 06:52:46 2014
@@ -9,7 +9,7 @@ create table over10k(
            d double,
            bo boolean,
            s string,
-	   ts timestamp, 
+           ts timestamp, 
            dec decimal,  
            bin binary)
        row format delimited
@@ -51,9 +51,9 @@ insert into table stats_tbl_part partiti
 insert into table stats_tbl_part partition (dt='2012') select * from over10k where t>60;
 
 explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
 explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
 
 analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
 analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
@@ -61,11 +61,11 @@ analyze table stats_tbl_part partition(d
 analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
 
 explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
 explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
 
 explain select count(ts) from stats_tbl_part;
 

Modified: hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out?rev=1559030&r1=1559029&r2=1559030&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out Fri Jan 17 06:52:46 2014
@@ -7,7 +7,7 @@ PREHOOK: query: create table over10k(
            d double,
            bo boolean,
            s string,
-	   ts timestamp, 
+           ts timestamp, 
            dec decimal,  
            bin binary)
        row format delimited
@@ -22,7 +22,7 @@ POSTHOOK: query: create table over10k(
            d double,
            bo boolean,
            s string,
-	   ts timestamp, 
+           ts timestamp, 
            dec decimal,  
            bin binary)
        row format delimited
@@ -232,10 +232,10 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 PREHOOK: query: explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
 PREHOOK: type: QUERY
 POSTHOOK: query: explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
 POSTHOOK: type: QUERY
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -282,7 +282,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 ABSTRACT SYNTAX TREE:
-  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -313,6 +313,8 @@ STAGE PLANS:
               Group By Operator
                 aggregations:
                       expr: count()
+                      expr: sum(1)
+                      expr: sum(0.2)
                       expr: count(1)
                       expr: count(s)
                       expr: count(bo)
@@ -322,7 +324,7 @@ STAGE PLANS:
                       expr: min(b)
                 bucketGroup: false
                 mode: hash
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
                 Reduce Output Operator
                   sort order: 
                   tag: -1
@@ -332,7 +334,7 @@ STAGE PLANS:
                         expr: _col1
                         type: bigint
                         expr: _col2
-                        type: bigint
+                        type: double
                         expr: _col3
                         type: bigint
                         expr: _col4
@@ -340,23 +342,29 @@ STAGE PLANS:
                         expr: _col5
                         type: bigint
                         expr: _col6
-                        type: int
+                        type: bigint
                         expr: _col7
                         type: bigint
+                        expr: _col8
+                        type: int
+                        expr: _col9
+                        type: bigint
       Reduce Operator Tree:
         Group By Operator
           aggregations:
                 expr: count(VALUE._col0)
-                expr: count(VALUE._col1)
-                expr: count(VALUE._col2)
+                expr: sum(VALUE._col1)
+                expr: sum(VALUE._col2)
                 expr: count(VALUE._col3)
                 expr: count(VALUE._col4)
                 expr: count(VALUE._col5)
-                expr: max(VALUE._col6)
-                expr: min(VALUE._col7)
+                expr: count(VALUE._col6)
+                expr: count(VALUE._col7)
+                expr: max(VALUE._col8)
+                expr: min(VALUE._col9)
           bucketGroup: false
           mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
           Select Operator
             expressions:
                   expr: _col0
@@ -364,7 +372,7 @@ STAGE PLANS:
                   expr: _col1
                   type: bigint
                   expr: _col2
-                  type: bigint
+                  type: double
                   expr: _col3
                   type: bigint
                   expr: _col4
@@ -372,10 +380,14 @@ STAGE PLANS:
                   expr: _col5
                   type: bigint
                   expr: _col6
-                  type: int
+                  type: bigint
                   expr: _col7
                   type: bigint
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                  expr: _col8
+                  type: int
+                  expr: _col9
+                  type: bigint
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -389,10 +401,10 @@ STAGE PLANS:
       limit: -1
 
 PREHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
 POSTHOOK: type: QUERY
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -439,7 +451,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 ABSTRACT SYNTAX TREE:
-  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -470,6 +482,8 @@ STAGE PLANS:
               Group By Operator
                 aggregations:
                       expr: count()
+                      expr: sum(1)
+                      expr: sum(0.2)
                       expr: count(1)
                       expr: count(s)
                       expr: count(bo)
@@ -479,7 +493,7 @@ STAGE PLANS:
                       expr: min(b)
                 bucketGroup: false
                 mode: hash
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
                 Reduce Output Operator
                   sort order: 
                   tag: -1
@@ -489,7 +503,7 @@ STAGE PLANS:
                         expr: _col1
                         type: bigint
                         expr: _col2
-                        type: bigint
+                        type: double
                         expr: _col3
                         type: bigint
                         expr: _col4
@@ -497,23 +511,29 @@ STAGE PLANS:
                         expr: _col5
                         type: bigint
                         expr: _col6
-                        type: int
+                        type: bigint
                         expr: _col7
                         type: bigint
+                        expr: _col8
+                        type: int
+                        expr: _col9
+                        type: bigint
       Reduce Operator Tree:
         Group By Operator
           aggregations:
                 expr: count(VALUE._col0)
-                expr: count(VALUE._col1)
-                expr: count(VALUE._col2)
+                expr: sum(VALUE._col1)
+                expr: sum(VALUE._col2)
                 expr: count(VALUE._col3)
                 expr: count(VALUE._col4)
                 expr: count(VALUE._col5)
-                expr: max(VALUE._col6)
-                expr: min(VALUE._col7)
+                expr: count(VALUE._col6)
+                expr: count(VALUE._col7)
+                expr: max(VALUE._col8)
+                expr: min(VALUE._col9)
           bucketGroup: false
           mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
           Select Operator
             expressions:
                   expr: _col0
@@ -521,7 +541,7 @@ STAGE PLANS:
                   expr: _col1
                   type: bigint
                   expr: _col2
-                  type: bigint
+                  type: double
                   expr: _col3
                   type: bigint
                   expr: _col4
@@ -529,10 +549,14 @@ STAGE PLANS:
                   expr: _col5
                   type: bigint
                   expr: _col6
-                  type: int
+                  type: bigint
                   expr: _col7
                   type: bigint
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+                  expr: _col8
+                  type: int
+                  expr: _col9
+                  type: bigint
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
             File Output Operator
               compressed: false
               GlobalTableId: 0
@@ -760,10 +784,10 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 PREHOOK: query: explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
 PREHOOK: type: QUERY
 POSTHOOK: query: explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
 POSTHOOK: type: QUERY
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -810,7 +834,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 ABSTRACT SYNTAX TREE:
-  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
@@ -820,10 +844,10 @@ STAGE PLANS:
     Fetch Operator
       limit: 1
 
-PREHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
 PREHOOK: type: QUERY
 #### A masked pattern was here ####
-POSTHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
 POSTHOOK: type: QUERY
 #### A masked pattern was here ####
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
@@ -870,12 +894,12 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9999	9999	9999	9999	9999	9999	65791	0	99.9800033569336	0.0
+9999	9999	1999.8	9999	9999	9999	9999	9999	65791	0	99.9800033569336	0.0
 PREHOOK: query: explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain 
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 POSTHOOK: type: QUERY
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -922,7 +946,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 ABSTRACT SYNTAX TREE:
-  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
@@ -932,10 +956,10 @@ STAGE PLANS:
     Fetch Operator
       limit: 1
 
-PREHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 PREHOOK: type: QUERY
 #### A masked pattern was here ####
-POSTHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 POSTHOOK: type: QUERY
 #### A masked pattern was here ####
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
@@ -982,7 +1006,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9489	9489	9489	9489	9489	9489	65791	0	99.9800033569336	0.0
+9489	9489	1897.8	9489	9489	9489	9489	9489	65791	0	99.9800033569336	0.0
 PREHOOK: query: explain select count(ts) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(ts) from stats_tbl_part