You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/05/08 20:59:39 UTC
hive git commit: HIVE-19336: Disable SMB/Bucketmap join for external
tables (Jason Dere, reviewed by Deepak Jaiswal)
Repository: hive
Updated Branches:
refs/heads/master 1121ab46e -> 5cfc914e9
HIVE-19336: Disable SMB/Bucketmap join for external tables (Jason Dere, reviewed by Deepak Jaiswal)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5cfc914e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5cfc914e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5cfc914e
Branch: refs/heads/master
Commit: 5cfc914e9380694df1dd4fefb1e566d763000683
Parents: 1121ab4
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue May 8 13:58:50 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue May 8 13:58:50 2018 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 33 ++
.../clientpositive/bucket_map_join_tez2.q | 20 +-
ql/src/test/queries/clientpositive/tez_smb_1.q | 13 +
.../llap/bucket_map_join_tez2.q.out | 424 +++++++++++++++----
.../results/clientpositive/llap/tez_smb_1.q.out | 267 ++++++++----
.../spark/bucket_map_join_tez2.q.out | 270 ++++++++++++
6 files changed, 868 insertions(+), 159 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5cfc914e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 5d4774d..4019f13 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -30,6 +30,7 @@ import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
@@ -572,7 +573,14 @@ public class ConvertJoinMapJoin implements NodeProcessor {
int numBuckets = bigTableRS.getParentOperators().get(0).getOpTraits().getNumBuckets();
int size = -1;
+ boolean shouldCheckExternalTables =
+ context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
+ StringBuilder sb = new StringBuilder();
for (Operator<?> parentOp : joinOp.getParentOperators()) {
+ if (shouldCheckExternalTables && hasExternalTableAncestor(parentOp, sb)) {
+ LOG.debug("External table {} found in join - disabling SMB join.", sb.toString());
+ return false;
+ }
// each side better have 0 or more RS. if either side is unbalanced, cannot convert.
// This is a workaround for now. Right fix would be to refactor code in the
// MapRecordProcessor and ReduceRecordProcessor with respect to the sources.
@@ -690,6 +698,18 @@ public class ConvertJoinMapJoin implements NodeProcessor {
return false;
}
+ boolean shouldCheckExternalTables = tezBucketJoinProcCtx.getConf()
+ .getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
+ if (shouldCheckExternalTables) {
+ StringBuilder sb = new StringBuilder();
+ for (Operator<?> parentOp : joinOp.getParentOperators()) {
+ if (hasExternalTableAncestor(parentOp, sb)) {
+ LOG.debug("External table {} found in join - disabling bucket map join.", sb.toString());
+ return false;
+ }
+ }
+ }
+
/*
* this is the case when the big table is a sub-query and is probably already bucketed by the
* join column in say a group by operation
@@ -1357,4 +1377,17 @@ public class ConvertJoinMapJoin implements NodeProcessor {
// to go a few % over.
return Math.min(Math.round(v), numRows);
}
+
+ private static boolean hasExternalTableAncestor(Operator op, StringBuilder sb) {
+ boolean result = false;
+ Operator ancestor = OperatorUtils.findSingleOperatorUpstream(op, TableScanOperator.class);
+ if (ancestor != null) {
+ TableScanOperator ts = (TableScanOperator) ancestor;
+ if (MetaStoreUtils.isExternalTable(ts.getConf().getTableMetadata().getTTable())) {
+ sb.append(ts.getConf().getTableMetadata().getFullyQualifiedName());
+ return true;
+ }
+ }
+ return result;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/5cfc914e/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
index 1361e32..0ee49fc 100644
--- a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
+++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q
@@ -119,4 +119,22 @@ insert into small values (1),(2),(3),(4),(5),(6);
insert into big partition(k=1) values(1),(3),(5),(7),(9);
insert into big partition(k=2) values(0),(2),(4),(6),(8);
explain select small.i, big.i from small,big where small.i=big.i;
-select small.i, big.i from small,big where small.i=big.i order by small.i, big.i;
\ No newline at end of file
+select small.i, big.i from small,big where small.i=big.i order by small.i, big.i;
+
+-- Bucket map join disabled for external tables
+-- Create external table equivalent of tab_part
+CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab_part_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+analyze table tab_part_ext compute statistics for columns;
+
+set hive.auto.convert.join.noconditionaltask.size=1500;
+set hive.convert.join.bucket.mapjoin.tez = true;
+set hive.disable.unsafe.external.table.operations=true;
+set test.comment=Bucket map join should work here;
+set test.comment;
+explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value;
+
+set test.comment=External tables, bucket map join should be disabled;
+set test.comment;
+explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value;
http://git-wip-us.apache.org/repos/asf/hive/blob/5cfc914e/ql/src/test/queries/clientpositive/tez_smb_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_smb_1.q b/ql/src/test/queries/clientpositive/tez_smb_1.q
index 0f8f22f..e121d52 100644
--- a/ql/src/test/queries/clientpositive/tez_smb_1.q
+++ b/ql/src/test/queries/clientpositive/tez_smb_1.q
@@ -108,3 +108,16 @@ join
(select t2.key as id, t2.value as od from tab_part t2 order by id, od) rt2) vt2
where vt1.id=vt2.id;
+-- SMB disabled for external tables
+set hive.disable.unsafe.external.table.operations=true;
+CREATE EXTERNAL TABLE tab_ext(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+set hive.auto.convert.sortmerge.join = true;
+set hive.auto.convert.join.noconditionaltask.size=500;
+set test.comment=SMB disabled for external tables;
+set test.comment;
+explain
+select count(*) from tab_ext s1 join tab_ext s3 on s1.key=s3.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/5cfc914e/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
index 51b138a..af37ef3 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
@@ -178,7 +178,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
- Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -194,18 +194,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int)
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Map 4
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
Filter Operator
predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
@@ -221,7 +214,7 @@ STAGE PLANS:
value expressions: _col0 (type: int)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: c
@@ -247,16 +240,16 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -264,17 +257,17 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: int), _col2 (type: int)
+ expressions: _col0 (type: int), _col3 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -299,8 +292,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
+ Map 1 <- Map 3 (CUSTOM_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
- Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -315,12 +308,41 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: int)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int)
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 4
@@ -343,25 +365,6 @@ STAGE PLANS:
value expressions: _col0 (type: int)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
- Map Operator Tree:
- TableScan
- alias: c
- Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized, llap
- LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -371,32 +374,15 @@ STAGE PLANS:
keys:
0 _col1 (type: string)
1 _col1 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: int)
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: int), _col2 (type: int)
+ expressions: _col0 (type: int), _col3 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1339 Data size: 10712 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -684,10 +670,10 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -738,10 +724,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
input vertices:
1 Map 2
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -840,10 +826,10 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -894,10 +880,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
input vertices:
1 Map 2
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -996,10 +982,10 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1069,10 +1055,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
input vertices:
0 Map 1
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1857,3 +1843,271 @@ POSTHOOK: Input: default@small
4 4
5 5
6 6
+PREHOOK: query: CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part_ext
+POSTHOOK: query: CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part_ext
+PREHOOK: query: insert overwrite table tab_part_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part_ext@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part_ext@ds=2008-04-08
+POSTHOOK: Lineage: tab_part_ext PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab_part_ext PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: analyze table tab_part_ext compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@tab_part_ext
+PREHOOK: Input: default@tab_part_ext@ds=2008-04-08
+PREHOOK: Output: default@tab_part_ext
+PREHOOK: Output: default@tab_part_ext@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part_ext compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@tab_part_ext
+POSTHOOK: Input: default@tab_part_ext@ds=2008-04-08
+POSTHOOK: Output: default@tab_part_ext
+POSTHOOK: Output: default@tab_part_ext@ds=2008-04-08
+#### A masked pattern was here ####
+test.comment=Bucket map join should work here
+PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+test.comment=External tables, bucket map join should be disabled
+PREHOOK: query: explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/5cfc914e/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
index 4e625e4..c5b765a 100644
--- a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out
@@ -130,40 +130,40 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s3
- Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Map Operator Tree:
TableScan
alias: s1
- Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 382 Data size: 3056 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Execution mode: llap
Reducer 2
@@ -173,10 +173,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -222,37 +222,37 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: vt1
- Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 4
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
@@ -264,15 +264,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
@@ -281,10 +281,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -295,12 +295,12 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
@@ -367,37 +367,37 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 5
Map Operator Tree:
TableScan
alias: vt1
- Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
@@ -406,12 +406,12 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -421,15 +421,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
@@ -438,10 +438,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -513,36 +513,36 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 3
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 4
@@ -550,28 +550,28 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Merge Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Reducer 5
Execution mode: vectorized, llap
@@ -580,10 +580,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -659,36 +659,36 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 5
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
@@ -697,12 +697,12 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -712,15 +712,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
@@ -729,10 +729,10 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -743,12 +743,12 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
@@ -783,3 +783,124 @@ POSTHOOK: Input: default@tab_part
POSTHOOK: Input: default@tab_part@ds=2008-04-08
#### A masked pattern was here ####
480
+PREHOOK: query: CREATE EXTERNAL TABLE tab_ext(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_ext
+POSTHOOK: query: CREATE EXTERNAL TABLE tab_ext(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_ext
+PREHOOK: query: insert overwrite table tab_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin
+PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: Output: default@tab_ext@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin
+POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: Output: default@tab_ext@ds=2008-04-08
+POSTHOOK: Lineage: tab_ext PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab_ext PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ]
+test.comment=SMB disabled for external tables
+PREHOOK: query: explain
+select count(*) from tab_ext s1 join tab_ext s3 on s1.key=s3.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab_ext s1 join tab_ext s3 on s1.key=s3.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/5cfc914e/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
index 8e946d8..0d977cd 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
@@ -1840,3 +1840,273 @@ POSTHOOK: Input: default@small
4 4
5 5
6 6
+PREHOOK: query: CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part_ext
+POSTHOOK: query: CREATE EXTERNAL TABLE tab_part_ext (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part_ext
+PREHOOK: query: insert overwrite table tab_part_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part_ext@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part_ext partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part_ext@ds=2008-04-08
+POSTHOOK: Lineage: tab_part_ext PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tab_part_ext PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: analyze table tab_part_ext compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@tab_part_ext
+PREHOOK: Input: default@tab_part_ext@ds=2008-04-08
+PREHOOK: Output: default@tab_part_ext
+PREHOOK: Output: default@tab_part_ext@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part_ext compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@tab_part_ext
+POSTHOOK: Input: default@tab_part_ext@ds=2008-04-08
+POSTHOOK: Output: default@tab_part_ext
+POSTHOOK: Output: default@tab_part_ext@ds=2008-04-08
+#### A masked pattern was here ####
+test.comment=Bucket map join should work here
+PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+test.comment=External tables, bucket map join should be disabled
+PREHOOK: query: explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from tab_part_ext a join tab_part_ext c on a.key = c.key join tab_part_ext b on a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+