You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2015/09/30 02:55:52 UTC
hive git commit: HIVE-11937: Improve StatsOptimizer to deal with
query with additional constant columns (Pengcheng Xiong,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 6a8d7e4cd -> cdaf35674
HIVE-11937: Improve StatsOptimizer to deal with query with additional constant columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cdaf3567
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cdaf3567
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cdaf3567
Branch: refs/heads/master
Commit: cdaf356740195cde6f5b6bfdade2f614e1c618d3
Parents: 6a8d7e4
Author: Pengcheng Xiong <px...@apache.org>
Authored: Tue Sep 29 17:47:39 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Tue Sep 29 17:47:39 2015 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/StatsOptimizer.java | 46 ++++-
.../clientpositive/metadata_only_queries.q | 15 ++
.../clientpositive/metadata_only_queries.q.out | 158 +++++++++++++++++
.../spark/metadata_only_queries.q.out | 170 +++++++++++++++++++
.../tez/metadata_only_queries.q.out | 170 +++++++++++++++++++
5 files changed, 552 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index bc8d8f7..5a21e6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -235,10 +237,23 @@ public class StatsOptimizer implements Transform {
return null;
}
Operator<?> last = (Operator<?>) stack.get(5);
+ SelectOperator cselOp = null;
+ Map<Integer,Object> posToConstant = new HashMap<>();
if (last instanceof SelectOperator) {
- SelectOperator cselOp = (SelectOperator) last;
+ cselOp = (SelectOperator) last;
if (!cselOp.isIdentitySelect()) {
- return null; // todo we can do further by providing operator to fetch task
+ for (int pos = 0; pos < cselOp.getConf().getColList().size(); pos++) {
+ ExprNodeDesc desc = cselOp.getConf().getColList().get(pos);
+ if (desc instanceof ExprNodeConstantDesc) {
+ //We store the position to the constant value for later use.
+ posToConstant.put(pos, ((ExprNodeConstantDesc)desc).getValue());
+ } else {
+ if (!(desc instanceof ExprNodeColumnDesc)) {
+ // Probably an expression, cant handle that
+ return null;
+ }
+ }
+ }
}
last = (Operator<?>) stack.get(6);
}
@@ -588,13 +603,30 @@ public class StatsOptimizer implements Transform {
List<List<Object>> allRows = new ArrayList<List<Object>>();
- allRows.add(oneRow);
-
List<String> colNames = new ArrayList<String>();
List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
- for (ColumnInfo colInfo: cgbyOp.getSchema().getSignature()) {
- colNames.add(colInfo.getInternalName());
- ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+ if (cselOp == null) {
+ allRows.add(oneRow);
+ for (ColumnInfo colInfo : cgbyOp.getSchema().getSignature()) {
+ colNames.add(colInfo.getInternalName());
+ ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+ }
+ } else {
+ int aggrPos = 0;
+ List<Object> oneRowWithConstant = new ArrayList<>();
+ for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); pos++) {
+ if (posToConstant.containsKey(pos)) {
+ // This position is a constant.
+ oneRowWithConstant.add(posToConstant.get(pos));
+ } else {
+ // This position is an aggregation.
+ oneRowWithConstant.add(oneRow.get(aggrPos++));
+ }
+ ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos);
+ colNames.add(colInfo.getInternalName());
+ ois.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(colInfo.getType()));
+ }
+ allRows.add(oneRowWithConstant);
}
StandardStructObjectInspector sOI = ObjectInspectorFactory.
getStandardStructObjectInspector(colNames, ois);
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/queries/clientpositive/metadata_only_queries.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/metadata_only_queries.q b/ql/src/test/queries/clientpositive/metadata_only_queries.q
index 56f3a78..70fac92 100644
--- a/ql/src/test/queries/clientpositive/metadata_only_queries.q
+++ b/ql/src/test/queries/clientpositive/metadata_only_queries.q
@@ -57,6 +57,11 @@ select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), co
explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl;
+explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part;
+
analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin;
analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin;
@@ -69,6 +74,12 @@ explain
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl;
+explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl;
+
+
+
explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part;
@@ -76,6 +87,10 @@ explain
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;
select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part;
+explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part;
+
explain select count(ts) from stats_tbl_part;
drop table stats_tbl;
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
index 2dcd437..65a4dfa 100644
--- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
@@ -276,6 +276,114 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl_part
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
PREHOOK: type: QUERY
PREHOOK: Input: default@stats_tbl
@@ -364,6 +472,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
PREHOOK: type: QUERY
@@ -414,6 +547,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl_part
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index b2221fc..0d85f4e 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -288,6 +288,126 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl_part
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
PREHOOK: type: QUERY
PREHOOK: Input: default@stats_tbl
@@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
PREHOOK: type: QUERY
@@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl_part
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part
http://git-wip-us.apache.org/repos/asf/hive/blob/cdaf3567/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
index f43440e..ab86ab0 100644
--- a/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
@@ -288,6 +288,126 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: stats_tbl_part
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
+ outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: double), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7.0 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin
PREHOOK: type: QUERY
PREHOOK: Input: default@stats_tbl
@@ -376,6 +496,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part
PREHOOK: type: QUERY
@@ -426,6 +571,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@stats_tbl_part
#### A masked pattern was here ####
65536 65791 4294967296 4294967551 0.01 99.98 0.01 50.0
+PREHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+POSTHOOK: query: select min(i), '1' as one, max(i), min(b), max(b), min(f), max(f), 3+4.0 as three, min(d), max(d) from stats_tbl_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stats_tbl_part
+#### A masked pattern was here ####
+65536 1 65791 4294967296 4294967551 0.01 99.98 7.0 0.01 50.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part