You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/01/17 07:52:46 UTC
svn commit: r1559030 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
test/queries/clientpositive/metadata_only_queries.q
test/results/clientpositive/metadata_only_queries.q.out
Author: hashutosh
Date: Fri Jan 17 06:52:46 2014
New Revision: 1559030
URL: http://svn.apache.org/r1559030
Log:
HIVE-6192 : Optimize sum(1) to answer query using metadata (Ashutosh Chauhan via Thejas Nair)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q
hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java?rev=1559030&r1=1559029&r2=1559030&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java Fri Jan 17 06:52:46 2014
@@ -10,6 +10,7 @@ import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -42,12 +44,15 @@ import org.apache.hadoop.hive.ql.plan.Fe
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
/** There is a set of queries which can be answered entirely from statistics stored in metastore.
* Examples of such queries are count(*), count(a), max(a), min(b) etc. Hive already collects
@@ -181,30 +186,29 @@ public class StatsOptimizer implements T
Hive hive = Hive.get(pctx.getConf());
for (AggregationDesc aggr : aggrs) {
- if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation(
+ if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation(
Description.class).name())) {
- long rowCnt = 0;
+ if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){
+ return null;
+ }
+ Long rowCnt = getRowCnt(hive, tbl);
+ if(rowCnt == null) {
+ return null;
+ }
+ oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0))
+ .getValue().toString()).multiply(HiveDecimal.create(rowCnt)));
+ ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+ PrimitiveCategory.DECIMAL));
+ }
+ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation(
+ Description.class).name())) {
+ Long rowCnt = 0L;
if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
ExprNodeConstantDesc)) {
// Its either count (*) or count(1) case
- if(tbl.isPartitioned()) {
- for (Partition part : hive.getAllPartitionsOf(tbl)) {
- long partRowCnt = Long.parseLong(part.getParameters()
- .get(StatsSetupConst.ROW_COUNT));
- if (partRowCnt < 1) {
- Log.debug("Partition doesn't have upto date stats " + part.getSpec());
- return null;
- }
- rowCnt += partRowCnt;
- }
- } else { // unpartitioned table
- rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
- if (rowCnt < 1) {
- // if rowCnt < 1 than its either empty table or table on which stats are not
- // computed We assume the worse and don't attempt to optimize.
- Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
- return null;
- }
+ rowCnt = getRowCnt(hive, tbl);
+ if(rowCnt == null) {
+ return null;
}
} else {
// Its count(col) case
@@ -442,5 +446,29 @@ public class StatsOptimizer implements T
return null;
}
+
+ private Long getRowCnt (Hive hive, Table tbl) throws HiveException {
+ Long rowCnt = 0L;
+ if(tbl.isPartitioned()) {
+ for (Partition part : hive.getAllPartitionsOf(tbl)) {
+ long partRowCnt = Long.parseLong(part.getParameters()
+ .get(StatsSetupConst.ROW_COUNT));
+ if (partRowCnt < 1) {
+ Log.debug("Partition doesn't have upto date stats " + part.getSpec());
+ return null;
+ }
+ rowCnt += partRowCnt;
+ }
+ } else { // unpartitioned table
+ rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
+ if (rowCnt < 1) {
+ // if rowCnt < 1 than its either empty table or table on which stats are not
+ // computed We assume the worse and don't attempt to optimize.
+ Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
+ rowCnt = null;
+ }
+ }
+ return rowCnt;
+ }
}
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q?rev=1559030&r1=1559029&r2=1559030&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q Fri Jan 17 06:52:46 2014
@@ -9,7 +9,7 @@ create table over10k(
d double,
bo boolean,
s string,
- ts timestamp,
+ ts timestamp,
dec decimal,
bin binary)
row format delimited
@@ -51,9 +51,9 @@ insert into table stats_tbl_part partiti
insert into table stats_tbl_part partition (dt='2012') select * from over10k where t>60;
explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
@@ -61,11 +61,11 @@ analyze table stats_tbl_part partition(d
analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl;
explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part;
explain select count(ts) from stats_tbl_part;
Modified: hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out?rev=1559030&r1=1559029&r2=1559030&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/metadata_only_queries.q.out Fri Jan 17 06:52:46 2014
@@ -7,7 +7,7 @@ PREHOOK: query: create table over10k(
d double,
bo boolean,
s string,
- ts timestamp,
+ ts timestamp,
dec decimal,
bin binary)
row format delimited
@@ -22,7 +22,7 @@ POSTHOOK: query: create table over10k(
d double,
bo boolean,
s string,
- ts timestamp,
+ ts timestamp,
dec decimal,
bin binary)
row format delimited
@@ -232,10 +232,10 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
PREHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
POSTHOOK: type: QUERY
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -282,7 +282,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -313,6 +313,8 @@ STAGE PLANS:
Group By Operator
aggregations:
expr: count()
+ expr: sum(1)
+ expr: sum(0.2)
expr: count(1)
expr: count(s)
expr: count(bo)
@@ -322,7 +324,7 @@ STAGE PLANS:
expr: min(b)
bucketGroup: false
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Reduce Output Operator
sort order:
tag: -1
@@ -332,7 +334,7 @@ STAGE PLANS:
expr: _col1
type: bigint
expr: _col2
- type: bigint
+ type: double
expr: _col3
type: bigint
expr: _col4
@@ -340,23 +342,29 @@ STAGE PLANS:
expr: _col5
type: bigint
expr: _col6
- type: int
+ type: bigint
expr: _col7
type: bigint
+ expr: _col8
+ type: int
+ expr: _col9
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
expr: count(VALUE._col0)
- expr: count(VALUE._col1)
- expr: count(VALUE._col2)
+ expr: sum(VALUE._col1)
+ expr: sum(VALUE._col2)
expr: count(VALUE._col3)
expr: count(VALUE._col4)
expr: count(VALUE._col5)
- expr: max(VALUE._col6)
- expr: min(VALUE._col7)
+ expr: count(VALUE._col6)
+ expr: count(VALUE._col7)
+ expr: max(VALUE._col8)
+ expr: min(VALUE._col9)
bucketGroup: false
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Select Operator
expressions:
expr: _col0
@@ -364,7 +372,7 @@ STAGE PLANS:
expr: _col1
type: bigint
expr: _col2
- type: bigint
+ type: double
expr: _col3
type: bigint
expr: _col4
@@ -372,10 +380,14 @@ STAGE PLANS:
expr: _col5
type: bigint
expr: _col6
- type: int
+ type: bigint
expr: _col7
type: bigint
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ expr: _col8
+ type: int
+ expr: _col9
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
File Output Operator
compressed: false
GlobalTableId: 0
@@ -389,10 +401,10 @@ STAGE PLANS:
limit: -1
PREHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
POSTHOOK: type: QUERY
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -439,7 +451,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -470,6 +482,8 @@ STAGE PLANS:
Group By Operator
aggregations:
expr: count()
+ expr: sum(1)
+ expr: sum(0.2)
expr: count(1)
expr: count(s)
expr: count(bo)
@@ -479,7 +493,7 @@ STAGE PLANS:
expr: min(b)
bucketGroup: false
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Reduce Output Operator
sort order:
tag: -1
@@ -489,7 +503,7 @@ STAGE PLANS:
expr: _col1
type: bigint
expr: _col2
- type: bigint
+ type: double
expr: _col3
type: bigint
expr: _col4
@@ -497,23 +511,29 @@ STAGE PLANS:
expr: _col5
type: bigint
expr: _col6
- type: int
+ type: bigint
expr: _col7
type: bigint
+ expr: _col8
+ type: int
+ expr: _col9
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
expr: count(VALUE._col0)
- expr: count(VALUE._col1)
- expr: count(VALUE._col2)
+ expr: sum(VALUE._col1)
+ expr: sum(VALUE._col2)
expr: count(VALUE._col3)
expr: count(VALUE._col4)
expr: count(VALUE._col5)
- expr: max(VALUE._col6)
- expr: min(VALUE._col7)
+ expr: count(VALUE._col6)
+ expr: count(VALUE._col7)
+ expr: max(VALUE._col8)
+ expr: min(VALUE._col9)
bucketGroup: false
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Select Operator
expressions:
expr: _col0
@@ -521,7 +541,7 @@ STAGE PLANS:
expr: _col1
type: bigint
expr: _col2
- type: bigint
+ type: double
expr: _col3
type: bigint
expr: _col4
@@ -529,10 +549,14 @@ STAGE PLANS:
expr: _col5
type: bigint
expr: _col6
- type: int
+ type: bigint
expr: _col7
type: bigint
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ expr: _col8
+ type: int
+ expr: _col9
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
File Output Operator
compressed: false
GlobalTableId: 0
@@ -760,10 +784,10 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
PREHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
POSTHOOK: type: QUERY
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -810,7 +834,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -820,10 +844,10 @@ STAGE PLANS:
Fetch Operator
limit: 1
-PREHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
PREHOOK: type: QUERY
#### A masked pattern was here ####
-POSTHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
+POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
POSTHOOK: type: QUERY
#### A masked pattern was here ####
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
@@ -870,12 +894,12 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9999 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0
+9999 9999 1999.8 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0
PREHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
POSTHOOK: type: QUERY
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -922,7 +946,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d))))))
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -932,10 +956,10 @@ STAGE PLANS:
Fetch Operator
limit: 1
-PREHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
PREHOOK: type: QUERY
#### A masked pattern was here ####
-POSTHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
+POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
POSTHOOK: type: QUERY
#### A masked pattern was here ####
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
@@ -982,7 +1006,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9489 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0
+9489 9489 1897.8 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part