You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ct...@apache.org on 2016/01/13 15:10:29 UTC
hive git commit: HIVE-12788 Setting hive.optimize.union.remove to
TRUE will break UNION ALL with aggregate functions (Chaoyu Tang,
reviewed by Pengcheng Xiong)
Repository: hive
Updated Branches:
refs/heads/master de30fe4e2 -> eba302492
HIVE-12788 Setting hive.optimize.union.remove to TRUE will break UNION ALL with aggregate functions (Chaoyu Tang, reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eba30249
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eba30249
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eba30249
Branch: refs/heads/master
Commit: eba30249284bea02df1411bc36f054634e3bb1f8
Parents: de30fe4
Author: ctang <ct...@gmail.com>
Authored: Wed Jan 13 09:09:37 2016 -0500
Committer: ctang <ct...@gmail.com>
Committed: Wed Jan 13 09:09:37 2016 -0500
----------------------------------------------------------------------
.../hive/ql/optimizer/StatsOptimizer.java | 48 +-
.../queries/clientpositive/union_remove_26.q | 111 +++
.../clientpositive/union_remove_26.q.out | 823 +++++++++++++++++++
3 files changed, 974 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/eba30249/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 03dcf9f..2f9a831 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -118,7 +118,8 @@ public class StatsOptimizer extends Transform {
opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS),
new MetaDataProcessor(pctx));
- Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
+ NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext();
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
@@ -127,6 +128,10 @@ public class StatsOptimizer extends Transform {
return pctx;
}
+ private static class StatsOptimizerProcContext implements NodeProcessorCtx {
+ boolean stopProcess = false;
+ }
+
private static class MetaDataProcessor implements NodeProcessor {
private final ParseContext pctx;
@@ -225,7 +230,21 @@ public class StatsOptimizer extends Transform {
// 3. Connect to metastore and get the stats
// 4. Compose rows and add it in FetchWork
// 5. Delete GBY - RS - GBY - SEL from the pipeline.
+ StatsOptimizerProcContext soProcCtx = (StatsOptimizerProcContext) procCtx;
+
+ // If the optimization has been stopped for the reasons like being not qualified,
+ // or lack of the stats data. we do not continue this process. For an example,
+ // for a query select max(value) from src1 union all select max(value) from src2
+ // if it has been union remove optimized, the AST tree will become
+ // TS[0]->SEL[1]->GBY[2]-RS[3]->GBY[4]->FS[17]
+ // TS[6]->SEL[7]->GBY[8]-RS[9]->GBY[10]->FS[18]
+ // if TS[0] branch for src1 is not optimized because src1 does not have column stats
+ // there is no need to continue processing TS[6] branch
+ if (soProcCtx.stopProcess) {
+ return null;
+ }
+ boolean isOptimized = false;
try {
TableScanOperator tsOp = (TableScanOperator) stack.get(0);
if (tsOp.getNumParent() > 0) {
@@ -621,7 +640,6 @@ public class StatsOptimizer extends Transform {
}
}
-
List<List<Object>> allRows = new ArrayList<List<Object>>();
List<String> colNames = new ArrayList<String>();
List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
@@ -648,19 +666,33 @@ public class StatsOptimizer extends Transform {
}
allRows.add(oneRowWithConstant);
}
- StandardStructObjectInspector sOI = ObjectInspectorFactory.
- getStandardStructObjectInspector(colNames, ois);
- FetchWork fWork = new FetchWork(allRows, sOI);
- FetchTask fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf());
- fWork.setLimit(allRows.size());
- pctx.setFetchTask(fTask);
+ FetchWork fWork = null;
+ FetchTask fTask = pctx.getFetchTask();
+ if (fTask != null) {
+ fWork = fTask.getWork();
+ fWork.getRowsComputedUsingStats().addAll(allRows);
+ } else {
+ StandardStructObjectInspector sOI = ObjectInspectorFactory.
+ getStandardStructObjectInspector(colNames, ois);
+ fWork = new FetchWork(allRows, sOI);
+ fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf());
+ pctx.setFetchTask(fTask);
+ }
+ fWork.setLimit(fWork.getRowsComputedUsingStats().size());
+ isOptimized = true;
return null;
} catch (Exception e) {
// this is best effort optimization, bail out in error conditions and
// try generate and execute slower plan
Logger.debug("Failed to optimize using metadata optimizer", e);
return null;
+ } finally {
+ // If StatOptimization is not applied for any reason, the FetchTask should still not have been set
+ if (!isOptimized) {
+ soProcCtx.stopProcess = true;
+ pctx.setFetchTask(null);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/eba30249/ql/src/test/queries/clientpositive/union_remove_26.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_26.q b/ql/src/test/queries/clientpositive/union_remove_26.q
new file mode 100644
index 0000000..d35d4e2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_remove_26.q
@@ -0,0 +1,111 @@
+set hive.stats.autogather=true;
+
+-- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile;
+create table inputSrcTbl2(key string, val int) stored as textfile;
+create table inputSrcTbl3(key string, val int) stored as textfile;
+
+load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1;
+load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2;
+load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3;
+
+create table inputTbl1(key string, val int) stored as textfile;
+create table inputTbl2(key string, val int) stored as textfile;
+create table inputTbl3(key string, val int) stored as textfile;
+
+insert into inputTbl1 select * from inputSrcTbl1;
+insert into inputTbl2 select * from inputSrcTbl2;
+insert into inputTbl3 select * from inputSrcTbl3;
+
+set hive.compute.query.using.stats=true;
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+--- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns;
+analyze table inputTbl3 compute statistics for columns;
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+--- union remove optimization and stats optimization are effective after inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns;
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+--- union remove optimization effects but stats optimization does not (with group by) though it is on
+explain
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key;
+
+select count(*) from (
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key) t;
+
+
+set hive.compute.query.using.stats=false;
+set hive.optimize.union.remove=true;
+set mapred.input.dir.recursive=true;
+
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
+
+
+set hive.compute.query.using.stats=false;
+set hive.optimize.union.remove=false;
+
+explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3;
+
+
+select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/eba30249/ql/src/test/results/clientpositive/union_remove_26.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_remove_26.q.out b/ql/src/test/results/clientpositive/union_remove_26.q.out
new file mode 100644
index 0000000..8afaf08
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_remove_26.q.out
@@ -0,0 +1,823 @@
+PREHOOK: query: -- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl1
+POSTHOOK: query: -- This is to test the union remove optimization with stats optimization
+
+create table inputSrcTbl1(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl1
+PREHOOK: query: create table inputSrcTbl2(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl2
+POSTHOOK: query: create table inputSrcTbl2(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl2
+PREHOOK: query: create table inputSrcTbl3(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputSrcTbl3
+POSTHOOK: query: create table inputSrcTbl3(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputSrcTbl3
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputSrcTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl1
+PREHOOK: query: load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl2
+POSTHOOK: query: load data local inpath '../../data/files/T2.txt' into table inputSrcTbl2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl2
+PREHOOK: query: load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputsrctbl3
+POSTHOOK: query: load data local inpath '../../data/files/T3.txt' into table inputSrcTbl3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputsrctbl3
+PREHOOK: query: create table inputTbl1(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: create table inputTbl1(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table inputTbl2(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl2
+POSTHOOK: query: create table inputTbl2(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl2
+PREHOOK: query: create table inputTbl3(key string, val int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl3
+POSTHOOK: query: create table inputTbl3(key string, val int) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl3
+PREHOOK: query: insert into inputTbl1 select * from inputSrcTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl1
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: insert into inputTbl1 select * from inputSrcTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl1
+POSTHOOK: Output: default@inputtbl1
+POSTHOOK: Lineage: inputtbl1.key SIMPLE [(inputsrctbl1)inputsrctbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl1.val SIMPLE [(inputsrctbl1)inputsrctbl1.FieldSchema(name:val, type:int, comment:null), ]
+PREHOOK: query: insert into inputTbl2 select * from inputSrcTbl2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl2
+PREHOOK: Output: default@inputtbl2
+POSTHOOK: query: insert into inputTbl2 select * from inputSrcTbl2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl2
+POSTHOOK: Output: default@inputtbl2
+POSTHOOK: Lineage: inputtbl2.key SIMPLE [(inputsrctbl2)inputsrctbl2.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl2.val SIMPLE [(inputsrctbl2)inputsrctbl2.FieldSchema(name:val, type:int, comment:null), ]
+PREHOOK: query: insert into inputTbl3 select * from inputSrcTbl3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputsrctbl3
+PREHOOK: Output: default@inputtbl3
+POSTHOOK: query: insert into inputTbl3 select * from inputSrcTbl3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputsrctbl3
+POSTHOOK: Output: default@inputtbl3
+POSTHOOK: Lineage: inputtbl3.key SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: inputtbl3.val SIMPLE [(inputsrctbl3)inputsrctbl3.FieldSchema(name:val, type:int, comment:null), ]
+PREHOOK: query: --- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: --- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available
+analyze table inputTbl1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+#### A masked pattern was here ####
+PREHOOK: query: analyze table inputTbl3 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table inputTbl3 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-1
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: --- union remove optimization and stats optimization are effective after inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl2
+#### A masked pattern was here ####
+POSTHOOK: query: --- union remove optimization and stats optimization are effective after inputTbl2 column stats is calculated
+analyze table inputTbl2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 3
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: --- union remove optimization effects but stats optimization does not (with group by) though it is on
+explain
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: --- union remove optimization effects but stats optimization does not (with group by) though it is on
+explain
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), val (type: int)
+ outputColumnNames: key, val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), val (type: int)
+ outputColumnNames: key, val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), val (type: int)
+ outputColumnNames: key, val
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2 group by key
+ UNION ALL
+ SELECT key, count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 group by key) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+14
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 is a root stage
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3, Stage-4
+ Stage-3 is a root stage
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl2
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl3
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: val (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1), min(val), max(val)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Input: default@inputtbl2
+PREHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl2
+ UNION ALL
+ SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Input: default@inputtbl2
+POSTHOOK: Input: default@inputtbl3
+#### A masked pattern was here ####
+3