You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/04/23 03:46:45 UTC
[1/5] hive git commit: HIVE-16493: Skip column stats when colStats is
empty (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 8a946ccb3 -> eaa439e39
HIVE-16493: Skip column stats when colStats is empty (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b6a48d0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b6a48d0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b6a48d0
Branch: refs/heads/master
Commit: 0b6a48d0131948e6cbfa1af89c7e331475b56de8
Parents: 8a946cc
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Sat Apr 22 20:45:00 2017 -0700
Committer: Pengcheng Xiong <px...@hortonworks.com>
Committed: Sat Apr 22 20:45:00 2017 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0b6a48d0/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index cb16fb7..d96f432 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -386,7 +386,9 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
- stats.add(colStats);
+ if (!statsObjs.isEmpty()) {
+ stats.add(colStats);
+ }
}
ftOp.clearFetchContext();
return stats;
@@ -413,6 +415,9 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(db);
// Persist the column statistics object to the metastore
// Note, this function is shared for both table and partition column stats.
+ if (colStats.isEmpty()) {
+ return 0;
+ }
SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) {
request.setNeedMerge(true);
[2/5] hive git commit: HIVE-16421 Runtime filtering breaks user-level
explain (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/ql/src/test/results/clientpositive/udf_round_2_auto_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_round_2_auto_stats.q.out b/ql/src/test/results/clientpositive/udf_round_2_auto_stats.q.out
new file mode 100644
index 0000000..4dbe8fc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_round_2_auto_stats.q.out
@@ -0,0 +1,55 @@
+PREHOOK: query: create table tstTbl1(n double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tstTbl1
+POSTHOOK: query: create table tstTbl1(n double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tstTbl1
+PREHOOK: query: insert overwrite table tstTbl1
+select 'NaN' from src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tsttbl1
+POSTHOOK: query: insert overwrite table tstTbl1
+select 'NaN' from src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tsttbl1
+POSTHOOK: Lineage: tsttbl1.n EXPRESSION []
+PREHOOK: query: select * from tstTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tsttbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tstTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tsttbl1
+#### A masked pattern was here ####
+NaN
+PREHOOK: query: select round(n, 1) from tstTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tsttbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select round(n, 1) from tstTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tsttbl1
+#### A masked pattern was here ####
+NaN
+PREHOOK: query: select round(n) from tstTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tsttbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select round(n) from tstTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tsttbl1
+#### A masked pattern was here ####
+NaN
+PREHOOK: query: select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL NULL NULL NULL
[4/5] hive git commit: HIVE-16421 Runtime filtering breaks user-level
explain (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
new file mode 100644
index 0000000..614c4fd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
@@ -0,0 +1,1495 @@
+PREHOOK: query: create table alltypesorc_int ( cint int, cstring string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesorc_int
+POSTHOOK: query: create table alltypesorc_int ( cint int, cstring string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesorc_int
+PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date
+PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-08
+PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+PREHOOK: query: insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_int
+POSTHOOK: query: insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_int
+POSTHOOK: Lineage: alltypesorc_int.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_int.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table alltypesorc_int compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Output: default@alltypesorc_int
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table alltypesorc_int compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Output: default@alltypesorc_int
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_date compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+PREHOOK: Output: default@srcpart_small
+PREHOOK: Output: default@srcpart_small@ds=2008-04-08
+PREHOOK: Output: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 5 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3 llap
+ File Output Operator [FS_14]
+ Group By Operator [GBY_12] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ Group By Operator [GBY_10] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_29] (rows=195 width=8)
+ Conds:RS_6._col0=RS_7._col0(Inner)
+ <-Map 4 [SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_7]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_18] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_6]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_17] (rows=2000 width=87)
+ predicate:(key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))))
+ TableScan [TS_0] (rows=2000 width=87)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+ <-Reducer 5 [BROADCAST_EDGE] llap
+ BROADCAST [RS_23]
+ Group By Operator [GBY_22] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"]
+ <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_21]
+ Group By Operator [GBY_20] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"]
+ Select Operator [SEL_19] (rows=20 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_5]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+176
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3 llap
+ File Output Operator [FS_14]
+ Group By Operator [GBY_12] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ Group By Operator [GBY_10] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_22] (rows=195 width=8)
+ Conds:RS_6._col0=RS_7._col0(Inner)
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_6]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_17] (rows=2000 width=87)
+ predicate:key is not null
+ TableScan [TS_0] (rows=2000 width=87)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+ Dynamic Partitioning Event Operator [EVENT_21] (rows=205 width=87)
+ Group By Operator [GBY_20] (rows=205 width=87)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_19] (rows=2000 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_2]
+ <-Map 4 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_7]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=184)
+ Output:["_col0"]
+ TableScan [TS_3] (rows=20 width=360)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:COMPLETE
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3 llap
+ File Output Operator [FS_18]
+ Group By Operator [GBY_16] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_15]
+ Group By Operator [GBY_14] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_28] (rows=320 width=8)
+ Conds:RS_9._col0=RS_10._col0(Inner),RS_10._col0=RS_11._col0(Inner)
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_9]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=9174 width=70)
+ Output:["_col0"]
+ Filter Operator [FIL_25] (rows=9174 width=70)
+ predicate:cstring is not null
+ TableScan [TS_0] (rows=12288 width=70)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
+ <-Map 4 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_10]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_26] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Map 5 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_11]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_27] (rows=2000 width=87)
+ predicate:key is not null
+ TableScan [TS_6] (rows=2000 width=87)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+Map 7 <- Reducer 6 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3 llap
+ File Output Operator [FS_18]
+ Group By Operator [GBY_16] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_15]
+ Group By Operator [GBY_14] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_58] (rows=320 width=8)
+ Conds:RS_9._col0=RS_10._col0(Inner),RS_10._col0=RS_11._col0(Inner)
+ <-Map 4 [SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_10]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_26] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Map 7 [SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_27] (rows=2000 width=87)
+ predicate:(key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter))))
+ TableScan [TS_6] (rows=2000 width=87)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+ <-Reducer 6 [BROADCAST_EDGE] llap
+ BROADCAST [RS_57]
+ Group By Operator [GBY_56] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"]
+ <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_55]
+ Group By Operator [GBY_54] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"]
+ Select Operator [SEL_53] (rows=20 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_5]
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_9]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=9174 width=70)
+ Output:["_col0"]
+ Filter Operator [FIL_25] (rows=9174 width=70)
+ predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_cstring_min) AND DynamicValue(RS_11_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_cstring_bloom_filter))))
+ TableScan [TS_0] (rows=12288 width=70)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
+ <-Reducer 5 [BROADCAST_EDGE] llap
+ BROADCAST [RS_32]
+ Group By Operator [GBY_31] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"]
+ <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_30]
+ Group By Operator [GBY_29] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"]
+ Select Operator [SEL_28] (rows=20 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_5]
+ <-Reducer 8 [BROADCAST_EDGE] llap
+ BROADCAST [RS_37]
+ Group By Operator [GBY_36] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=410)"]
+ <-Map 7 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_35]
+ Group By Operator [GBY_34] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=410)"]
+ Select Operator [SEL_33] (rows=2000 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_8]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3 llap
+ File Output Operator [FS_14]
+ Group By Operator [GBY_12] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ Group By Operator [GBY_10] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_19] (rows=1 width=8)
+ Conds:RS_6._col0, _col1=RS_7._col0, _col1(Inner)
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_6]
+ PartitionCols:_col0, _col1
+ Select Operator [SEL_2] (rows=2000 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_17] (rows=2000 width=178)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_0] (rows=2000 width=178)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Map 4 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_7]
+ PartitionCols:_col0, _col1
+ Select Operator [SEL_5] (rows=20 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_18] (rows=20 width=178)
+ predicate:(key1 is not null and value1 is not null)
+ TableScan [TS_3] (rows=20 width=178)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1","value1"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+176
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 3 llap
+ File Output Operator [FS_14]
+ Group By Operator [GBY_12] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ Group By Operator [GBY_10] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_39] (rows=1 width=8)
+ Conds:RS_6._col0, _col1=RS_7._col0, _col1(Inner)
+ <-Map 4 [SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_7]
+ PartitionCols:_col0, _col1
+ Select Operator [SEL_5] (rows=20 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_18] (rows=20 width=178)
+ predicate:(key1 is not null and value1 is not null)
+ TableScan [TS_3] (rows=20 width=178)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1","value1"]
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_6]
+ PartitionCols:_col0, _col1
+ Select Operator [SEL_2] (rows=2000 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_17] (rows=2000 width=178)
+ predicate:(key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value_bloom_filter))))
+ TableScan [TS_0] (rows=2000 width=178)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Reducer 5 [BROADCAST_EDGE] llap
+ BROADCAST [RS_23]
+ Group By Operator [GBY_22] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"]
+ <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_21]
+ Group By Operator [GBY_20] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"]
+ Select Operator [SEL_19] (rows=20 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_5]
+ <-Reducer 6 [BROADCAST_EDGE] llap
+ BROADCAST [RS_28]
+ Group By Operator [GBY_27] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=36)"]
+ <-Map 4 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_26]
+ Group By Operator [GBY_25] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=36)"]
+ Select Operator [SEL_24] (rows=20 width=91)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_5]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+176
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 4 llap
+ File Output Operator [FS_20]
+ Group By Operator [GBY_18] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_17]
+ Group By Operator [GBY_16] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_30] (rows=319 width=8)
+ Conds:RS_12._col1=RS_13._col0(Inner)
+ <-Map 6 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_13]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=9174 width=70)
+ Output:["_col0"]
+ Filter Operator [FIL_28] (rows=9174 width=70)
+ predicate:cstring is not null
+ TableScan [TS_6] (rows=12288 width=70)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
+ <-Reducer 2 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_12]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_29] (rows=195 width=91)
+ Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"]
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_9]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=2000 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_26] (rows=2000 width=178)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_0] (rows=2000 width=178)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Map 5 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_10]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_27] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 7 (BROADCAST_EDGE)
+Map 8 <- Reducer 5 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 4 llap
+ File Output Operator [FS_20]
+ Group By Operator [GBY_18] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_17]
+ Group By Operator [GBY_16] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Merge Join Operator [MERGEJOIN_50] (rows=319 width=8)
+ Conds:RS_12._col1=RS_13._col0(Inner)
+ <-Reducer 2 [SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_12]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_49] (rows=195 width=91)
+ Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"]
+ <-Map 6 [SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_10]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_27] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Map 1 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_9]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=2000 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_26] (rows=2000 width=178)
+ predicate:(key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter))))
+ TableScan [TS_0] (rows=2000 width=178)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Reducer 7 [BROADCAST_EDGE] llap
+ BROADCAST [RS_33]
+ Group By Operator [GBY_32] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=32)"]
+ <-Map 6 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_31]
+ Group By Operator [GBY_30] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=32)"]
+ Select Operator [SEL_29] (rows=20 width=87)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_5]
+ <-Map 8 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_13]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=9174 width=70)
+ Output:["_col0"]
+ Filter Operator [FIL_28] (rows=9174 width=70)
+ predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter))))
+ TableScan [TS_6] (rows=12288 width=70)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
+ <-Reducer 5 [BROADCAST_EDGE] llap
+ BROADCAST [RS_48]
+ Group By Operator [GBY_47] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=42)"]
+ <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_46]
+ Group By Operator [GBY_45] (rows=1 width=552)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=42)"]
+ Select Operator [SEL_44] (rows=195 width=91)
+ Output:["_col0"]
+ Please refer to the previous Merge Join Operator [MERGEJOIN_49]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN extended select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN extended select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_date
+ filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 0
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+#### A masked pattern was here ####
+ name default.srcpart_date
+ numFiles 1
+ numRows 1000
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 176000
+ serialization.ddl struct srcpart_date { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 3038
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart_date
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct srcpart_date { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.srcpart_date
+ name: default.srcpart_date
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+#### A masked pattern was here ####
+ name default.srcpart_date
+ numFiles 1
+ numRows 1000
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 176000
+ serialization.ddl struct srcpart_date { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 3038
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart_date
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct srcpart_date { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.srcpart_date
+ name: default.srcpart_date
+ Truncated Path -> Alias:
+ /srcpart_date/ds=2008-04-08 [srcpart_date]
+ /srcpart_date/ds=2008-04-09 [srcpart_date]
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_small
+ filterExpr: key1 is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: key1 is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: key1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+ tag: 1
+ auto parallelism: true
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
+ tag: -1
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ auto parallelism: false
+ quick start: true
+ Execution mode: llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-08
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ partition values:
+ ds 2008-04-08
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+#### A masked pattern was here ####
+ name default.srcpart_small
+ numFiles 0
+ numRows 0
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 0
+ serialization.ddl struct srcpart_small { string key1, string value1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 0
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count -1
+ column.name.delimiter ,
+ columns key1,value1
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct srcpart_small { string key1, string value1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.srcpart_small
+ name: default.srcpart_small
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2008-04-09
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ partition values:
+ ds 2008-04-09
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key1":"true","value1":"true"}}
+ bucket_count -1
+#### A masked pattern was here ####
+ name default.srcpart_small
+ numFiles 1
+ numRows 20
+ partition_columns ds
+ partition_columns.types string
+ rawDataSize 3520
+ serialization.ddl struct srcpart_small { string key1, string value1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 459
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count -1
+ column.name.delimiter ,
+ columns key1,value1
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.srcpart_small
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct srcpart_small { string key1, string value1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.srcpart_small
+ name: default.srcpart_small
+ Truncated Path -> Alias:
+ /srcpart_small/ds=2008-04-08 [srcpart_small]
+ /srcpart_small/ds=2008-04-09 [srcpart_small]
+ Reducer 2
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Position of Big Table: 0
+ Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: PARTIAL
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Reducer 5
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
+ tag: -1
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ auto parallelism: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 llap
+ File Output Operator [FS_14]
+ Group By Operator [GBY_12] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ Group By Operator [GBY_10] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Map Join Operator [MAPJOIN_19] (rows=195 width=8)
+ Conds:SEL_2._col0=RS_7._col0(Inner)
+ <-Map 3 [BROADCAST_EDGE] llap
+ BROADCAST [RS_7]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_18] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Select Operator [SEL_2] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_17] (rows=2000 width=87)
+ predicate:key is not null
+ TableScan [TS_0] (rows=2000 width=87)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+176
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 llap
+ File Output Operator [FS_14]
+ Group By Operator [GBY_12] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_11]
+ Group By Operator [GBY_10] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Map Join Operator [MAPJOIN_29] (rows=195 width=8)
+ Conds:SEL_2._col0=RS_7._col0(Inner)
+ <-Map 3 [BROADCAST_EDGE] llap
+ BROADCAST [RS_7]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_18] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Select Operator [SEL_2] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_17] (rows=2000 width=87)
+ predicate:key is not null
+ TableScan [TS_0] (rows=2000 width=87)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+176
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 2 (BROADCAST_EDGE)
+Map 3 <- Map 1 (BROADCAST_EDGE)
+Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 4 llap
+ File Output Operator [FS_20]
+ Group By Operator [GBY_18] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 3 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_17]
+ Group By Operator [GBY_16] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Map Join Operator [MAPJOIN_30] (rows=319 width=8)
+ Conds:RS_12._col1=SEL_8._col0(Inner)
+ <-Map 1 [BROADCAST_EDGE] llap
+ BROADCAST [RS_12]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_29] (rows=195 width=91)
+ Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1"]
+ <-Map 2 [BROADCAST_EDGE] llap
+ BROADCAST [RS_10]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_27] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Select Operator [SEL_2] (rows=2000 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_26] (rows=2000 width=178)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_0] (rows=2000 width=178)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Select Operator [SEL_8] (rows=9174 width=70)
+ Output:["_col0"]
+ Filter Operator [FIL_28] (rows=9174 width=70)
+ predicate:cstring is not null
+ TableScan [TS_6] (rows=12288 width=70)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 2 (BROADCAST_EDGE)
+Map 3 <- Map 1 (BROADCAST_EDGE)
+Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 4 llap
+ File Output Operator [FS_20]
+ Group By Operator [GBY_18] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 3 [CUSTOM_SIMPLE_EDGE] llap
+ PARTITION_ONLY_SHUFFLE [RS_17]
+ Group By Operator [GBY_16] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Map Join Operator [MAPJOIN_50] (rows=319 width=8)
+ Conds:RS_12._col1=SEL_8._col0(Inner)
+ <-Map 1 [BROADCAST_EDGE] llap
+ BROADCAST [RS_12]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_49] (rows=195 width=91)
+ Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1"]
+ <-Map 2 [BROADCAST_EDGE] llap
+ BROADCAST [RS_10]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_27] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_3] (rows=20 width=87)
+ default@srcpart_small,srcpart_small,Tbl:COMPLETE,Col:PARTIAL,Output:["key1"]
+ <-Select Operator [SEL_2] (rows=2000 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_26] (rows=2000 width=178)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_0] (rows=2000 width=178)
+ default@srcpart_date,srcpart_date,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Select Operator [SEL_8] (rows=9174 width=70)
+ Output:["_col0"]
+ Filter Operator [FIL_28] (rows=9174 width=70)
+ predicate:cstring is not null
+ TableScan [TS_6] (rows=12288 width=70)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
+
+PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+0
+PREHOOK: query: explain select * from alltypesorc_int join
+ (select srcpart_date.key as key from srcpart_date
+ union all
+ select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from alltypesorc_int join
+ (select srcpart_date.key as key from srcpart_date
+ union all
+ select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key)
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Union 3 (BROADCAST_EDGE)
+Map 2 <- Union 3 (CONTAINS)
+Map 4 <- Union 3 (CONTAINS)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Map 1 llap
+ File Output Operator [FS_15]
+ Map Join Operator [MAPJOIN_37] (rows=3314 width=185)
+ Conds:SEL_2._col1=Union 3._col0(Inner),Output:["_col0","_col1","_col2"]
+ <-Union 3 [BROADCAST_EDGE]
+ <-Map 2 [CONTAINS] llap
+ Reduce Output Operator [RS_12]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=2000 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_20] (rows=2000 width=87)
+ predicate:key is not null
+ TableScan [TS_3] (rows=2000 width=87)
+ Output:["key"]
+ <-Map 4 [CONTAINS] llap
+ Reduce Output Operator [RS_12]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=20 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_21] (rows=20 width=87)
+ predicate:key1 is not null
+ TableScan [TS_6] (rows=20 width=87)
+ Output:["key1"]
+ <-Select Operator [SEL_2] (rows=9174 width=73)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_19] (rows=9174 width=73)
+ predicate:cstring is not null
+ TableScan [TS_0] (rows=12288 width=73)
+ default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cstring"]
+
+PREHOOK: query: drop table srcpart_date
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: drop table srcpart_date
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date
+PREHOOK: query: drop table srcpart_small
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: drop table srcpart_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small
+PREHOOK: query: drop table alltypesorc_int
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@alltypesorc_int
+PREHOOK: Output: default@alltypesorc_int
+POSTHOOK: query: drop table alltypesorc_int
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@alltypesorc_int
+POSTHOOK: Output: default@alltypesorc_int
[3/5] hive git commit: HIVE-16421 Runtime filtering breaks user-level
explain (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
index c291473..2a27479 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out
@@ -2044,511 +2044,216 @@ SELECT x.key, y.value
FROM src1 x JOIN src1 y ON (x.key = y.key)
JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value)
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+Plan optimized by CBO.
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Map 12 <- Union 9 (CONTAINS)
- Map 13 <- Union 9 (CONTAINS)
- Map 16 <- Map 17 (BROADCAST_EDGE)
- Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Map 8 <- Union 9 (CONTAINS)
- Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
- Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS)
- Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: src1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1
- input vertices:
- 1 Map 6
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 12
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 13
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 14
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 15
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 16
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 17
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 17
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 18
- Map Operator Tree:
- TableScan
- alias: src1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: no inputs
- Map 19
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: no inputs
- Map 20
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: no inputs
- Map 21
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: no inputs
- Map 5
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1
- input vertices:
- 1 Map 6
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 6
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 7
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 8
- Map Operator Tree:
- TableScan
- alias: src1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Reducer 10
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE
- Reducer 11
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col4
- Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col4 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col4
- Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col4 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 2
- Vertex: Union 2
- Union 4
- Vertex: Union 4
- Union 9
- Vertex: Union 9
+Vertex dependency in root stage
+Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Map 12 <- Union 9 (CONTAINS)
+Map 13 <- Union 9 (CONTAINS)
+Map 16 <- Map 17 (BROADCAST_EDGE)
+Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Map 8 <- Union 9 (CONTAINS)
+Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS)
+Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Union 4
+ <-Map 18 [CONTAINS] llap
+ File Output Operator [FS_78]
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_123] (rows=1677 width=10)
+ Conds:RS_73._col1=SEL_56._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_73]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_122] (rows=27 width=7)
+ Conds:SEL_50._col0=RS_71._col0(Inner),Output:["_col0","_col1","_col3"]
+ <-Map 17 [BROADCAST_EDGE] llap
+ BROADCAST [RS_71]
+ PartitionCols:_col0
+ Select Operator [SEL_53] (rows=25 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_113] (rows=25 width=7)
+ predicate:key is not null
+ TableScan [TS_51] (rows=25 width=7)
+ default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Select Operator [SEL_50] (rows=25 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_112] (rows=25 width=7)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_48] (rows=25 width=7)
+ default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Select Operator [SEL_56] (rows=25 width=7)
+ Output:["_col0"]
+ Filter Operator [FIL_114] (rows=25 width=7)
+ predicate:value is not null
+ TableScan [TS_54] (rows=25 width=7)
+ Output:["value"]
+ <-Map 19 [CONTAINS] llap
+ File Output Operator [FS_78]
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_123] (rows=1677 width=10)
+ Conds:RS_128._col1=SEL_59._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_128]
+ PartitionCols:_col1
+ Please refer to the previous Map Join Operator [MAPJOIN_122]
+ <-Select Operator [SEL_59] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_115] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_57] (rows=500 width=10)
+ Output:["value"]
+ <-Map 20 [CONTAINS] llap
+ File Output Operator [FS_78]
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_123] (rows=1677 width=10)
+ Conds:RS_129._col1=SEL_64._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_129]
+ PartitionCols:_col1
+ Please refer to the previous Map Join Operator [MAPJOIN_122]
+ <-Select Operator [SEL_64] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_116] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_62] (rows=500 width=10)
+ Output:["value"]
+ <-Map 21 [CONTAINS] llap
+ File Output Operator [FS_78]
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_123] (rows=1677 width=10)
+ Conds:RS_130._col1=SEL_68._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_130]
+ PartitionCols:_col1
+ Please refer to the previous Map Join Operator [MAPJOIN_122]
+ <-Select Operator [SEL_68] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_117] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_66] (rows=500 width=10)
+ Output:["value"]
+ <-Reducer 11 [CONTAINS] llap
+ File Output Operator [FS_78]
+ Select Operator [SEL_45] (rows=1239 width=10)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_121] (rows=1239 width=10)
+ Conds:RS_42._col1=RS_43._col0(Inner),Output:["_col1","_col4"]
+ <-Map 15 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_43]
+ PartitionCols:_col0
+ Select Operator [SEL_38] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_111] (rows=500 width=10)
+ predicate:key is not null
+ TableScan [TS_36] (rows=500 width=10)
+ default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Reducer 10 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_42]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_120] (rows=1127 width=10)
+ Conds:Union 9._col0=RS_40._col1(Inner),Output:["_col1"]
+ <-Map 14 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_40]
+ PartitionCols:_col1
+ Select Operator [SEL_35] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_110] (rows=500 width=10)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_33] (rows=500 width=10)
+ default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Union 9 [SIMPLE_EDGE]
+ <-Map 12 [CONTAINS] llap
+ Reduce Output Operator [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_26] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_108] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_24] (rows=500 width=10)
+ Output:["value"]
+ <-Map 13 [CONTAINS] llap
+ Reduce Output Operator [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_31] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_109] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_29] (rows=500 width=10)
+ Output:["value"]
+ <-Map 8 [CONTAINS] llap
+ Reduce Output Operator [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_23] (rows=25 width=7)
+ Output:["_col0"]
+ Filter Operator [FIL_107] (rows=25 width=7)
+ predicate:value is not null
+ TableScan [TS_21] (rows=25 width=7)
+ Output:["value"]
+ <-Reducer 3 [CONTAINS] llap
+ File Output Operator [FS_78]
+ Select Operator [SEL_20] (rows=634 width=10)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_119] (rows=634 width=10)
+ Conds:Union 2._col1=RS_18._col0(Inner),Output:["_col1","_col4"]
+ <-Map 7 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_18]
+ PartitionCols:_col0
+ Select Operator [SEL_13] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_106] (rows=500 width=10)
+ predicate:key is not null
+ TableScan [TS_11] (rows=500 width=10)
+ default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Union 2 [SIMPLE_EDGE]
+ <-Map 1 [CONTAINS] llap
+ Reduce Output Operator [RS_17]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_118] (rows=577 width=10)
+ Conds:SEL_2._col0=RS_15._col1(Inner),Output:["_col1"]
+ <-Map 6 [BROADCAST_EDGE] llap
+ BROADCAST [RS_15]
+ PartitionCols:_col1
+ Select Operator [SEL_10] (rows=25 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_105] (rows=25 width=7)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_8] (rows=25 width=7)
+ default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Select Operator [SEL_2] (rows=25 width=7)
+ Output:["_col0"]
+ Filter Operator [FIL_103] (rows=25 width=7)
+ predicate:value is not null
+ TableScan [TS_0] (rows=25 width=7)
+ Output:["value"]
+ <-Map 5 [CONTAINS] llap
+ Reduce Output Operator [RS_17]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_118] (rows=577 width=10)
+ Conds:SEL_5._col0=RS_124._col1(Inner),Output:["_col1"]
+ <-Map 6 [BROADCAST_EDGE] llap
+ BROADCAST [RS_124]
+ PartitionCols:_col1
+ Please refer to the previous Select Operator [SEL_10]
+ <-Select Operator [SEL_5] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_104] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_3] (rows=500 width=10)
+ Output:["value"]
PREHOOK: query: explain
SELECT x.key, y.value
@@ -2921,655 +2626,274 @@ INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value
INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value
INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value
POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-4 depends on stages: Stage-3
- Stage-0 depends on stages: Stage-4
- Stage-5 depends on stages: Stage-0
- Stage-1 depends on stages: Stage-4
- Stage-6 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-4
- Stage-7 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-3
- Tez
-#### A masked pattern was here ####
- Edges:
- Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Map 12 <- Union 9 (CONTAINS)
- Map 13 <- Union 9 (CONTAINS)
- Map 16 <- Map 17 (BROADCAST_EDGE)
- Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
- Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
- Map 8 <- Union 9 (CONTAINS)
- Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
- Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS)
- Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: src1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1
- input vertices:
- 1 Map 6
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 12
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 13
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 14
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 15
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 16
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 17
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col3 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 17
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 18
- Map Operator Tree:
- TableScan
- alias: src1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
- Execution mode: llap
- LLAP IO: no inputs
- Map 19
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
- Execution mode: llap
- LLAP IO: no inputs
- Map 20
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
- Execution mode: llap
- LLAP IO: no inputs
- Map 21
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 0 Map 16
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
- Execution mode: llap
- LLAP IO: no inputs
- Map 5
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1
- input vertices:
- 1 Map 6
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Map 6
- Map Operator Tree:
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 7
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Execution mode: llap
- LLAP IO: no inputs
- Map 8
- Map Operator Tree:
- TableScan
- alias: src1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: no inputs
- Reducer 10
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE
- Reducer 11
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col4
- Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col4 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1239 Data size: 13085 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col4
- Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col4 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 634 Data size: 6658 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
- File Output Operator
- compressed: false
- Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
- Union 2
- Vertex: Union 2
- Union 4
- Vertex: Union 4
- Union 9
- Vertex: Union 9
-
- Stage: Stage-4
- Dependency Collection
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.a
-
- Stage: Stage-5
- Stats-Aggr Operator
-
- Stage: Stage-1
- Move Operator
- tables:
- replace: true
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.b
-
- Stage: Stage-6
- Stats-Aggr Operator
+Plan optimized by CBO.
- Stage: Stage-2
- Move Operator
- tables:
- replace: true
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.c
+Vertex dependency in root stage
+Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Map 12 <- Union 9 (CONTAINS)
+Map 13 <- Union 9 (CONTAINS)
+Map 16 <- Map 17 (BROADCAST_EDGE)
+Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS)
+Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS)
+Map 8 <- Union 9 (CONTAINS)
+Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS)
+Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
- Stage: Stage-7
- Stats-Aggr Operator
+Stage-5
+ Stats-Aggr Operator
+ Stage-0
+ Move Operator
+ table:{"name:":"default.a"}
+ Stage-4
+ Dependency Collection{}
+ Stage-3
+ Union 4
+ <-Map 18 [CONTAINS] llap
+ File Output Operator [FS_79]
+ table:{"name:":"default.a"}
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_128] (rows=1677 width=10)
+ Conds:RS_73._col1=SEL_56._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_73]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_127] (rows=27 width=7)
+ Conds:SEL_50._col0=RS_71._col0(Inner),Output:["_col0","_col1","_col3"]
+ <-Map 17 [BROADCAST_EDGE] llap
+ BROADCAST [RS_71]
+ PartitionCols:_col0
+ Select Operator [SEL_53] (rows=25 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_118] (rows=25 width=7)
+ predicate:key is not null
+ TableScan [TS_51] (rows=25 width=7)
+ default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Select Operator [SEL_50] (rows=25 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_117] (rows=25 width=7)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_48] (rows=25 width=7)
+ default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Select Operator [SEL_56] (rows=25 width=7)
+ Output:["_col0"]
+ Filter Operator [FIL_119] (rows=25 width=7)
+ predicate:value is not null
+ TableScan [TS_54] (rows=25 width=7)
+ Output:["value"]
+ File Output Operator [FS_81]
+ table:{"name:":"default.b"}
+ Please refer to the previous Select Operator [SEL_76]
+ File Output Operator [FS_83]
+ table:{"name:":"default.c"}
+ Please refer to the previous Select Operator [SEL_76]
+ <-Map 19 [CONTAINS] llap
+ File Output Operator [FS_79]
+ table:{"name:":"default.a"}
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_128] (rows=1677 width=10)
+ Conds:RS_133._col1=SEL_59._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_133]
+ PartitionCols:_col1
+ Please refer to the previous Map Join Operator [MAPJOIN_127]
+ <-Select Operator [SEL_59] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_120] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_57] (rows=500 width=10)
+ Output:["value"]
+ File Output Operator [FS_81]
+ table:{"name:":"default.b"}
+ Please refer to the previous Select Operator [SEL_76]
+ File Output Operator [FS_83]
+ table:{"name:":"default.c"}
+ Please refer to the previous Select Operator [SEL_76]
+ <-Map 20 [CONTAINS] llap
+ File Output Operator [FS_79]
+ table:{"name:":"default.a"}
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_128] (rows=1677 width=10)
+ Conds:RS_134._col1=SEL_64._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_134]
+ PartitionCols:_col1
+ Please refer to the previous Map Join Operator [MAPJOIN_127]
+ <-Select Operator [SEL_64] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_121] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_62] (rows=500 width=10)
+ Output:["value"]
+ File Output Operator [FS_81]
+ table:{"name:":"default.b"}
+ Please refer to the previous Select Operator [SEL_76]
+ File Output Operator [FS_83]
+ table:{"name:":"default.c"}
+ Please refer to the previous Select Operator [SEL_76]
+ <-Map 21 [CONTAINS] llap
+ File Output Operator [FS_79]
+ table:{"name:":"default.a"}
+ Select Operator [SEL_76] (rows=1677 width=10)
+ Output:["_col0","_col1"]
+ Map Join Operator [MAPJOIN_128] (rows=1677 width=10)
+ Conds:RS_135._col1=SEL_68._col0(Inner),Output:["_col0","_col3"]
+ <-Map 16 [BROADCAST_EDGE] llap
+ BROADCAST [RS_135]
+ PartitionCols:_col1
+ Please refer to the previous Map Join Operator [MAPJOIN_127]
+ <-Select Operator [SEL_68] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_122] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_66] (rows=500 width=10)
+ Output:["value"]
+ File Output Operator [FS_81]
+ table:{"name:":"default.b"}
+ Please refer to the previous Select Operator [SEL_76]
+ File Output Operator [FS_83]
+ table:{"name:":"default.c"}
+ Please refer to the previous Select Operator [SEL_76]
+ <-Reducer 11 [CONTAINS] llap
+ File Output Operator [FS_79]
+ table:{"name:":"default.a"}
+ Select Operator [SEL_45] (rows=1239 width=10)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_126] (rows=1239 width=10)
+ Conds:RS_42._col1=RS_43._col0(Inner),Output:["_col1","_col4"]
+ <-Map 15 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_43]
+ PartitionCols:_col0
+ Select Operator [SEL_38] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_116] (rows=500 width=10)
+ predicate:key is not null
+ TableScan [TS_36] (rows=500 width=10)
+ default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Reducer 10 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_42]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_125] (rows=1127 width=10)
+ Conds:Union 9._col0=RS_40._col1(Inner),Output:["_col1"]
+ <-Map 14 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_40]
+ PartitionCols:_col1
+ Select Operator [SEL_35] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_115] (rows=500 width=10)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_33] (rows=500 width=10)
+ default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Union 9 [SIMPLE_EDGE]
+ <-Map 12 [CONTAINS] llap
+ Reduce Output Operator [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_26] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_113] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_24] (rows=500 width=10)
+ Output:["value"]
+ <-Map 13 [CONTAINS] llap
+ Reduce Output Operator [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_31] (rows=500 width=10)
+ Output:["_col0"]
+ Filter Operator [FIL_114] (rows=500 width=10)
+ predicate:value is not null
+ TableScan [TS_29] (rows=500 width=10)
+ Output:["value"]
+ <-Map 8 [CONTAINS] llap
+ Reduce Output Operator [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_23] (rows=25 width=7)
+ Output:["_col0"]
+ Filter Operator [FIL_112] (rows=25 width=7)
+ predicate:value is not null
+ TableScan [TS_21] (rows=25 width=7)
+ Output:["value"]
+ File Output Operator [FS_81]
+ table:{"name:":"default.b"}
+ Please refer to the previous Select Operator [SEL_45]
+ File Output Operator [FS_83]
+ table:{"name:":"default.c"}
+ Please refer to the previous Select Operator [SEL_45]
+ <-Reducer 3 [CONTAINS] llap
+ File Output Operator [FS_79]
+ table:{"name:":"default.a"}
+ Select Operator [SEL_20] (rows=634 width=10)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_124] (rows=634 width=10)
+ Conds:Union 2._col1=RS_18._col0(Inner),Output:["_col1","_col4"]
+ <-Map 7 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_18]
+ PartitionCols:_col0
+ Select Operator [SEL_13] (rows=500 width=10)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_111] (rows=500 width=10)
+ predicate:key is not null
+ TableScan [TS_11] (rows=500 width=10)
+ default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Union 2 [SIMPLE_EDGE]
+ <-Map 1 [CONTAINS] llap
+ Reduce Output Operator [RS_17]
+ PartitionCols:_col1
+ Map Join Operator [MAPJOIN_123] (rows=577 width=10)
+ Conds:SEL_2._col0=RS_15._col1(Inner),Output:["_col1"]
+ <-Map 6 [BROADCAST_EDGE] llap
+ BROADCAST [RS_15]
+ PartitionCols:_col1
+ Select Operator [SEL_10] (rows=25 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_110] (rows=25 width=7)
+ predicate:(key is not null and value is not null)
+ TableScan [TS_8] (rows=25 width=7)
+ default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+ <-Select Operator [
<TRUNCATED>
[5/5] hive git commit: HIVE-16421 Runtime filtering breaks user-level
explain (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by px...@apache.org.
HIVE-16421 Runtime filtering breaks user-level explain (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eaa439e3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eaa439e3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eaa439e3
Branch: refs/heads/master
Commit: eaa439e394514de0c38602d01a98d2e6237c0da8
Parents: 0b6a48d
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Sat Apr 22 20:46:22 2017 -0700
Committer: Pengcheng Xiong <px...@hortonworks.com>
Committed: Sat Apr 22 20:46:22 2017 -0700
----------------------------------------------------------------------
.../hive/common/jsonexplain/tez/Connection.java | 7 +-
.../hadoop/hive/common/jsonexplain/tez/Op.java | 58 +-
.../hive/common/jsonexplain/tez/Vertex.java | 45 +-
.../test/resources/testconfiguration.properties | 1 +
.../dynamic_semijoin_user_level.q | 106 ++
.../clientpositive/udf_round_2_auto_stats.q | 16 +
.../llap/dynamic_semijoin_user_level.q.out | 1495 ++++++++++++++++
.../clientpositive/llap/explainuser_2.q.out | 1624 +++++-------------
.../clientpositive/udf_round_2_auto_stats.q.out | 55 +
9 files changed, 2206 insertions(+), 1201 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java
index d341cb1..5cd0e4c 100644
--- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java
+++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Connection.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.hive.common.jsonexplain.tez;
-public final class Connection {
+public final class Connection implements Comparable<Connection>{
public final String type;
public final Vertex from;
@@ -27,4 +27,9 @@ public final class Connection {
this.type = type;
this.from = from;
}
+
+ @Override
+ public int compareTo(Connection o) {
+ return from.compareTo(o.from);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java
index 718791c..96e75c0 100644
--- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java
+++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Op.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.common.jsonexplain.tez;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
@@ -120,19 +121,18 @@ public final class Op {
for (String key : JSONObject.getNames(keys)) {
// first search from the posToVertex
if (posToVertex.containsKey(key)) {
- Vertex vertex = posToVertex.get(key);
- if (vertex.rootOps.size() == 1) {
- posToOpId.put(key, vertex.rootOps.get(0).operatorId);
- } else if ((vertex.rootOps.size() == 0 && vertex.vertexType == VertexType.UNION)) {
- posToOpId.put(key, vertex.name);
+ Vertex v = posToVertex.get(key);
+ if (v.rootOps.size() == 1) {
+ posToOpId.put(key, v.rootOps.get(0).operatorId);
+ } else if ((v.rootOps.size() == 0 && v.vertexType == VertexType.UNION)) {
+ posToOpId.put(key, v.name);
} else {
- Op singleRSOp = vertex.getSingleRSOp();
- if (singleRSOp != null) {
- posToOpId.put(key, singleRSOp.operatorId);
+ Op joinRSOp = v.getJoinRSOp(vertex);
+ if (joinRSOp != null) {
+ posToOpId.put(key, joinRSOp.operatorId);
} else {
throw new Exception(
- "There are none or more than one root operators in a single vertex "
- + vertex.name
+ "Can not find join reduceSinkOp for " + v.name + " to join " + vertex.name
+ " when hive explain user is trying to identify the operator id.");
}
}
@@ -143,20 +143,19 @@ public final class Op {
}
// then assume it is from its own vertex
else if (parentVertexes.size() == 1) {
- Vertex vertex = parentVertexes.iterator().next();
+ Vertex v = parentVertexes.iterator().next();
parentVertexes.clear();
- if (vertex.rootOps.size() == 1) {
- posToOpId.put(key, vertex.rootOps.get(0).operatorId);
- } else if ((vertex.rootOps.size() == 0 && vertex.vertexType == VertexType.UNION)) {
- posToOpId.put(key, vertex.name);
+ if (v.rootOps.size() == 1) {
+ posToOpId.put(key, v.rootOps.get(0).operatorId);
+ } else if ((v.rootOps.size() == 0 && v.vertexType == VertexType.UNION)) {
+ posToOpId.put(key, v.name);
} else {
- Op singleRSOp = vertex.getSingleRSOp();
- if (singleRSOp != null) {
- posToOpId.put(key, singleRSOp.operatorId);
+ Op joinRSOp = v.getJoinRSOp(vertex);
+ if (joinRSOp != null) {
+ posToOpId.put(key, joinRSOp.operatorId);
} else {
throw new Exception(
- "There are none or more than one root operators in a single vertex "
- + vertex.name
+ "Can not find join reduceSinkOp for " + v.name + " to join " + vertex.name
+ " when hive explain user is trying to identify the operator id.");
}
}
@@ -207,12 +206,12 @@ public final class Op {
} else if ((v.rootOps.size() == 0 && v.vertexType == VertexType.UNION)) {
posToOpId.put(entry.getKey(), v.name);
} else {
- Op singleRSOp = v.getSingleRSOp();
- if (singleRSOp != null) {
- posToOpId.put(entry.getKey(), singleRSOp.operatorId);
+ Op joinRSOp = v.getJoinRSOp(vertex);
+ if (joinRSOp != null) {
+ posToOpId.put(entry.getKey(), joinRSOp.operatorId);
} else {
throw new Exception(
- "There are none or more than one root operators in a single vertex " + v.name
+ "Can not find join reduceSinkOp for " + v.name + " to join " + vertex.name
+ " when hive explain user is trying to identify the operator id.");
}
}
@@ -336,8 +335,9 @@ public final class Op {
}
// print inline vertex
if (parser.inlineMap.containsKey(this)) {
- for (int index = 0; index < parser.inlineMap.get(this).size(); index++) {
- Connection connection = parser.inlineMap.get(this).get(index);
+ List<Connection> connections = parser.inlineMap.get(this);
+ Collections.sort(connections);
+ for (Connection connection : connections) {
connection.from.print(printer, indentFlag, connection.type, this.vertex);
}
}
@@ -347,9 +347,9 @@ public final class Op {
}
// print next vertex
else {
- for (int index = 0; index < noninlined.size(); index++) {
- Vertex v = noninlined.get(index).from;
- v.print(printer, indentFlag, noninlined.get(index).type, this.vertex);
+ Collections.sort(noninlined);
+ for (Connection connection : noninlined) {
+ connection.from.print(printer, indentFlag, connection.type, this.vertex);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java
index 3d559bd..13ecac0 100644
--- a/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java
+++ b/common/src/java/org/apache/hadoop/hive/common/jsonexplain/tez/Vertex.java
@@ -20,15 +20,12 @@ package org.apache.hadoop.hive.common.jsonexplain.tez;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.hive.common.jsonexplain.tez.Op.OpType;
-import org.apache.hadoop.util.hash.Hash;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.json.JSONArray;
@@ -53,8 +50,8 @@ public final class Vertex implements Comparable<Vertex>{
// we create a dummy vertex for a mergejoin branch for a self join if this
// vertex is a mergejoin
public final List<Vertex> mergeJoinDummyVertexs = new ArrayList<>();
- // whether this vertex has multiple reduce operators
- public boolean hasMultiReduceOp = false;
+ // this vertex has multiple reduce operators
+ public int numReduceOp = 0;
// execution mode
public String executionMode = "";
// tagToInput for reduce work
@@ -217,7 +214,7 @@ public final class Vertex implements Comparable<Vertex>{
public void print(Printer printer, int indentFlag, String type, Vertex callingVertex)
throws JSONException, Exception {
// print vertexname
- if (parser.printSet.contains(this) && !hasMultiReduceOp) {
+ if (parser.printSet.contains(this) && numReduceOp <= 1) {
if (type != null) {
printer.println(TezJsonParser.prefixString(indentFlag, "<-")
+ " Please refer to the previous " + this.name + " [" + type + "]");
@@ -235,7 +232,7 @@ public final class Vertex implements Comparable<Vertex>{
printer.println(TezJsonParser.prefixString(indentFlag) + this.name + this.executionMode);
}
// print operators
- if (hasMultiReduceOp && !(callingVertex.vertexType == VertexType.UNION)) {
+ if (numReduceOp > 1 && !(callingVertex.vertexType == VertexType.UNION)) {
// find the right op
Op choose = null;
for (Op op : this.rootOps) {
@@ -273,16 +270,15 @@ public final class Vertex implements Comparable<Vertex>{
*/
public void checkMultiReduceOperator() {
// check if it is a reduce vertex and its children is more than 1;
- if (!this.name.contains("Reduce") || this.rootOps.size() < 2) {
+ if (this.rootOps.size() < 2) {
return;
}
// check if all the child ops are reduce output operators
for (Op op : this.rootOps) {
- if (op.type != OpType.RS) {
- return;
+ if (op.type == OpType.RS) {
+ numReduceOp++;
}
}
- this.hasMultiReduceOp = true;
}
public void setType(String type) {
@@ -304,28 +300,35 @@ public final class Vertex implements Comparable<Vertex>{
}
}
- //The following code should be gone after HIVE-11075 using topological order
+ // The following code should be gone after HIVE-11075 using topological order
@Override
public int compareTo(Vertex o) {
- return this.name.compareTo(o.name);
+ // we print the vertex that has more rs before the vertex that has fewer rs.
+ if (numReduceOp != o.numReduceOp) {
+ return -(numReduceOp - o.numReduceOp);
+ } else {
+ return this.name.compareTo(o.name);
+ }
}
- public Op getSingleRSOp() {
+ public Op getJoinRSOp(Vertex joinVertex) {
if (rootOps.size() == 0) {
return null;
+ } else if (rootOps.size() == 1) {
+ if (rootOps.get(0).type == OpType.RS) {
+ return rootOps.get(0);
+ } else {
+ return null;
+ }
} else {
- Op ret = null;
for (Op op : rootOps) {
if (op.type == OpType.RS) {
- if (ret == null) {
- ret = op;
- } else {
- // find more than one RS Op
- return null;
+ if (op.outputVertexName.equals(joinVertex.name)) {
+ return op;
}
}
}
- return ret;
+ return null;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 116d0eb..d684ba8 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -407,6 +407,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
minillap.query.files=acid_bucket_pruning.q,\
bucket5.q,\
bucket6.q,\
+ dynamic_semijoin_user_level.q,\
except_distinct.q,\
explainuser_2.q,\
empty_dir_in_table.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q
new file mode 100644
index 0000000..88ab46e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_user_level.q
@@ -0,0 +1,106 @@
+set hive.explain.user=true;
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+
+-- Create Tables
+create table alltypesorc_int ( cint int, cstring string ) stored as ORC;
+create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC;
+CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC;
+
+-- Add Partitions
+alter table srcpart_date add partition (ds = "2008-04-08");
+alter table srcpart_date add partition (ds = "2008-04-09");
+
+alter table srcpart_small add partition (ds = "2008-04-08");
+alter table srcpart_small add partition (ds = "2008-04-09");
+
+-- Load
+insert overwrite table alltypesorc_int select cint, cstring1 from alltypesorc;
+insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20;
+
+set hive.tez.dynamic.semijoin.reduction=false;
+
+analyze table alltypesorc_int compute statistics for columns;
+analyze table srcpart_date compute statistics for columns;
+analyze table srcpart_small compute statistics for columns;
+
+-- single column, single key
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+set hive.tez.dynamic.semijoin.reduction=true;
+
+-- Mix dynamic partition pruning(DPP) and min/max bloom filter optimizations. Should pick the DPP.
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.ds);
+set hive.tez.dynamic.semijoin.reduction=false;
+
+--multiple sources, single key
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring);
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_small.key1 = alltypesorc_int.cstring);
+set hive.tez.dynamic.semijoin.reduction=false;
+
+-- single source, multiple keys
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1);
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1);
+set hive.tez.dynamic.semijoin.reduction=false;
+
+-- multiple sources, different keys
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+
+-- Explain extended to verify fast start for Reducer in semijoin branch
+EXPLAIN extended select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+set hive.tez.dynamic.semijoin.reduction=false;
+
+-- With Mapjoins.
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=100000000000;
+
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+set hive.tez.dynamic.semijoin.reduction=false;
+
+-- multiple sources, different keys
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring);
+--set hive.tez.dynamic.semijoin.reduction=false;
+
+-- With unions
+explain select * from alltypesorc_int join
+ (select srcpart_date.key as key from srcpart_date
+ union all
+ select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key);
+
+
+drop table srcpart_date;
+drop table srcpart_small;
+drop table alltypesorc_int;
http://git-wip-us.apache.org/repos/asf/hive/blob/eaa439e3/ql/src/test/queries/clientpositive/udf_round_2_auto_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_round_2_auto_stats.q b/ql/src/test/queries/clientpositive/udf_round_2_auto_stats.q
new file mode 100644
index 0000000..2532f81
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_round_2_auto_stats.q
@@ -0,0 +1,16 @@
+set hive.fetch.task.conversion=more;
+set hive.stats.column.autogather=true;
+
+-- test for NaN (not-a-number)
+create table tstTbl1(n double);
+
+insert overwrite table tstTbl1
+select 'NaN' from src tablesample (1 rows);
+
+select * from tstTbl1;
+
+select round(n, 1) from tstTbl1;
+select round(n) from tstTbl1;
+
+-- test for Infinity
+select round(1/0), round(1/0, 2), round(1.0/0.0), round(1.0/0.0, 2) from src tablesample (1 rows);