You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/02/05 21:34:56 UTC
[7/7] hive git commit: HIVE-15458 : Fix semi-join conversion rule for
subquery (Vineet Garg via Ashutosh Chauhan)
HIVE-15458 : Fix semi-join conversion rule for subquery (Vineet Garg via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f63dc2d4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f63dc2d4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f63dc2d4
Branch: refs/heads/master
Commit: f63dc2d4fbbf09a04af98c4a9ba047a355a2da0a
Parents: bc0aeec
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Fri Feb 3 18:58:00 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sun Feb 5 13:33:55 2017 -0800
----------------------------------------------------------------------
.../calcite/rules/HiveSemiJoinRule.java | 25 +-
.../calcite/translator/ASTConverter.java | 3 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 20 +-
.../test/queries/clientpositive/leftsemijoin.q | 8 +
.../test/queries/clientpositive/multiMapJoin2.q | 8 +-
.../clientpositive/constprog_partitioner.q.out | 61 +-
.../results/clientpositive/leftsemijoin.q.out | 199 +++
.../llap/dynamic_partition_pruning.q.out | 114 +-
.../clientpositive/llap/explainuser_1.q.out | 444 +++---
.../clientpositive/llap/leftsemijoin.q.out | 206 +++
.../results/clientpositive/llap/lineage3.q.out | 2 +-
.../clientpositive/llap/multiMapJoin2.q.out | 1269 +++++++++---------
.../clientpositive/llap/subquery_exists.q.out | 90 +-
.../clientpositive/llap/subquery_in.q.out | 606 +++------
.../clientpositive/llap/subquery_multi.q.out | 444 +++---
.../clientpositive/llap/subquery_notin.q.out | 76 +-
.../clientpositive/llap/subquery_scalar.q.out | 164 +--
.../clientpositive/llap/subquery_views.q.out | 98 +-
.../llap/vector_mapjoin_reduce.q.out | 175 +--
.../vectorized_dynamic_partition_pruning.q.out | 121 +-
.../test/results/clientpositive/masking_3.q.out | 275 +---
.../test/results/clientpositive/masking_4.q.out | 39 +-
.../results/clientpositive/perf/query70.q.out | 178 ++-
.../spark/constprog_partitioner.q.out | 49 +-
.../clientpositive/spark/leftsemijoin.q.out | 197 +++
.../clientpositive/spark/subquery_exists.q.out | 90 +-
.../clientpositive/spark/subquery_in.q.out | 599 +++------
.../spark/vector_mapjoin_reduce.q.out | 131 +-
.../subq_where_serialization.q.out | 98 +-
.../clientpositive/subquery_exists.q.out | 114 +-
.../clientpositive/subquery_exists_having.q.out | 141 +-
.../clientpositive/subquery_in_having.q.out | 528 ++++----
.../subquery_unqualcolumnrefs.q.out | 122 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 398 ++----
34 files changed, 3291 insertions(+), 3801 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
index 14eb3a6..e400896 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java
@@ -20,6 +20,7 @@ import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Join;
@@ -37,6 +38,7 @@ import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -84,6 +86,11 @@ public class HiveSemiJoinRule extends RelOptRule {
// By the way, neither a super-set nor a sub-set would work.
return;
}
+ if(join.getJoinType() == JoinRelType.LEFT) {
+ // since for LEFT join we are only interested in rows from LEFT we can get rid of right side
+ call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build());
+ return;
+ }
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
@@ -102,7 +109,23 @@ public class HiveSemiJoinRule extends RelOptRule {
final RexNode newCondition =
RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight,
newRightKeys, rexBuilder);
- RelNode semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
+
+ RelNode semi = null;
+ //HIVE-15458: we need to add a Project on top of Join since SemiJoin with Join as it's right input
+ // is not expected further down the pipeline. see jira for more details
+ if(aggregate.getInput() instanceof HepRelVertex
+ && ((HepRelVertex)aggregate.getInput()).getCurrentRel() instanceof Join) {
+ Join rightJoin = (Join)(((HepRelVertex)aggregate.getInput()).getCurrentRel());
+ List<RexNode> projects = new ArrayList<>();
+ for(int i=0; i<rightJoin.getRowType().getFieldCount(); i++){
+ projects.add(rexBuilder.makeInputRef(rightJoin, i));
+ }
+ RelNode topProject = call.builder().push(rightJoin).project(projects, rightJoin.getRowType().getFieldNames(), true).build();
+ semi = call.builder().push(left).push(topProject).semiJoin(newCondition).build();
+ }
+ else {
+ semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
+ }
call.transformTo(call.builder().push(semi).project(project.getProjects(), project.getRowType().getFieldNames()).build());
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index e78c8e9..27990a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -360,7 +360,8 @@ public class ASTConverter {
s = new Schema(left.schema, right.schema);
ASTNode cond = join.getCondition().accept(new RexVisitor(s));
boolean semiJoin = join instanceof SemiJoin;
- if (join.getRight() instanceof Join) {
+ if (join.getRight() instanceof Join && !semiJoin) {
+ // should not be done for semijoin since it will change the semantics
// Invert join inputs; this is done because otherwise the SemanticAnalyzer
// methods to merge joins will not kick in
JoinRelType type;
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index a268d80..96ff5df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -136,8 +136,17 @@ import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.calcite.*;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
@@ -337,10 +346,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
boolean reAnalyzeAST = false;
final boolean materializedView = getQB().isMaterializedView();
- // currently semi-join optimization doesn't work with subqueries
- // so this will be turned off for if we find subqueries and will later be
- // restored to its original state
- boolean originalSemiOptVal = this.conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION);
try {
if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
sinkOp = getOptimizedHiveOPDag();
@@ -446,8 +451,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
super.genResolvedParseTree(ast, new PlannerContext());
skipCalcitePlan = true;
}
- // restore semi-join opt flag
- this.conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, originalSemiOptVal);
}
} else {
this.ctx.setCboInfo("Plan not optimized by CBO.");
@@ -2416,9 +2419,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
.get(srcRel));
relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
this.subqueryId++;
-
- // semi-join opt doesn't work with subqueries
- conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, false);
return filterRel;
} else {
return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause);
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/queries/clientpositive/leftsemijoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/leftsemijoin.q b/ql/src/test/queries/clientpositive/leftsemijoin.q
index 71c3a0d..8974219 100644
--- a/ql/src/test/queries/clientpositive/leftsemijoin.q
+++ b/ql/src/test/queries/clientpositive/leftsemijoin.q
@@ -24,3 +24,11 @@ SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id);
drop table sales;
drop table things;
+
+-- HIVE-15458
+explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
+select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
+
+-- Semi join optmization should take out the right side
+explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
+select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name;
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/queries/clientpositive/multiMapJoin2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q
index c66dc66..38ab1a7 100644
--- a/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -195,7 +195,7 @@ set hive.optimize.correlation=false;
-- HIVE-5891 Alias conflict when merging multiple mapjoin tasks into their common
-- child mapred task
EXPLAIN
-SELECT * FROM (
+SELECT x.key FROM (
SELECT c.key FROM
(SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
JOIN src c ON tmp.key=c.key
@@ -203,9 +203,9 @@ SELECT * FROM (
SELECT c.key FROM
(SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
JOIN src c ON tmp.key=c.key
-) x;
+) x order by x.key;
-SELECT * FROM (
+SELECT x.key FROM (
SELECT c.key FROM
(SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
JOIN src c ON tmp.key=c.key
@@ -213,5 +213,5 @@ SELECT * FROM (
SELECT c.key FROM
(SELECT a.key FROM src a JOIN src b ON a.key=b.key GROUP BY a.key) tmp
JOIN src c ON tmp.key=c.key
-) x;
+) x order by x.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index d4ccb8c..2a44269 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -80,14 +80,13 @@ WHERE li.l_linenumber = 1 AND
li.l_orderkey IN (SELECT l_orderkey FROM lineitem WHERE l_shipmode = 'AIR' AND l_linenumber = li.l_linenumber)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-2 depends on stages: Stage-4
- Stage-3 depends on stages: Stage-2
- Stage-1 depends on stages: Stage-3
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
@@ -154,39 +153,21 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col0, _col3
Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int), _col3 (type: int)
- mode: hash
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int), KEY._col1 (type: int)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-1
Map Reduce
@@ -212,23 +193,23 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 27 Data size: 3239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
- Inner Join 0 to 1
+ Left Semi Join 0 to 1
keys:
0 _col0 (type: int), 1 (type: int)
1 _col0 (type: int), _col1 (type: int)
outputColumnNames: _col1, _col2
- Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/leftsemijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/leftsemijoin.q.out b/ql/src/test/results/clientpositive/leftsemijoin.q.out
index a11bbc4..28229cd 100644
--- a/ql/src/test/results/clientpositive/leftsemijoin.q.out
+++ b/ql/src/test/results/clientpositive/leftsemijoin.q.out
@@ -108,3 +108,202 @@ POSTHOOK: query: drop table things
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@things
POSTHOOK: Output: default@things
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: p1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: p2
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string), p_type (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL
+PREHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_type (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 1b6bb1f..c63daba 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -3730,10 +3730,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 10 <- Union 9 (SIMPLE_EDGE)
- Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS)
+ Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS)
- Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE)
+ Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE)
Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS)
Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS)
#### A masked pattern was here ####
@@ -3756,7 +3755,7 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 11
+ Map 10
Map Operator Tree:
TableScan
alias: srcpart
@@ -3814,50 +3813,7 @@ STAGE PLANS:
value expressions: _col0 (type: string)
Execution mode: llap
LLAP IO: no inputs
- Reducer 10
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Dynamic Partitioning Event Operator
- Target column: ds (string)
- Target Input: srcpart
- Partition key expr: ds
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 1
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Dynamic Partitioning Event Operator
- Target column: ds (string)
- Target Input: srcpart
- Partition key expr: ds
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 5
- Reducer 12
+ Reducer 11
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3878,6 +3834,36 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Target Vertex: Map 1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Target Vertex: Map 5
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -3896,7 +3882,7 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Inner Join 0 to 1
+ Left Semi Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
@@ -3943,6 +3929,36 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Target Vertex: Map 1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Dynamic Partitioning Event Operator
+ Target column: ds (string)
+ Target Input: srcpart
+ Partition key expr: ds
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Target Vertex: Map 5
Union 3
Vertex: Union 3
Union 9
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index c3f8071..621f337 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1991,59 +1991,55 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 2 llap
- File Output Operator [FS_23]
- Merge Join Operator [MERGEJOIN_33] (rows=1 width=178)
- Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"]
+ File Output Operator [FS_21]
+ Merge Join Operator [MERGEJOIN_31] (rows=1 width=178)
+ Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Semi),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_19]
+ SHUFFLE [RS_17]
PartitionCols:_col0, _col1
Select Operator [SEL_1] (rows=500 width=178)
Output:["_col0","_col1"]
TableScan [TS_0] (rows=500 width=178)
default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
- <-Reducer 5 [SIMPLE_EDGE] llap
- SHUFFLE [RS_20]
+ <-Reducer 4 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_18]
PartitionCols:_col0, _col1
- Group By Operator [GBY_17] (rows=1 width=178)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Reducer 4 [SIMPLE_EDGE] llap
- SHUFFLE [RS_16]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_15] (rows=1 width=178)
- Output:["_col0","_col1"],keys:_col2, _col3
- Merge Join Operator [MERGEJOIN_32] (rows=1 width=178)
- Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
- <-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_11]
- PartitionCols:_col0, _col1
- Select Operator [SEL_4] (rows=166 width=178)
- Output:["_col0","_col1"]
- Filter Operator [FIL_30] (rows=166 width=178)
- predicate:(value > 'val_9')
- TableScan [TS_2] (rows=500 width=178)
- default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
- <-Reducer 7 [SIMPLE_EDGE] llap
- SHUFFLE [RS_12]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_9] (rows=250 width=178)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Map 6 [SIMPLE_EDGE] llap
- SHUFFLE [RS_8]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_7] (rows=250 width=178)
- Output:["_col0","_col1"],keys:key, value
- TableScan [TS_5] (rows=500 width=178)
- default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
+ Group By Operator [GBY_16] (rows=1 width=178)
+ Output:["_col0","_col1"],keys:_col0, _col1
+ Select Operator [SEL_14] (rows=1 width=178)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_30] (rows=1 width=178)
+ Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
+ <-Map 3 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_11]
+ PartitionCols:_col0, _col1
+ Select Operator [SEL_4] (rows=166 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_28] (rows=166 width=178)
+ predicate:(value > 'val_9')
+ TableScan [TS_2] (rows=500 width=178)
+ default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Reducer 6 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_12]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_9] (rows=250 width=178)
+ Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+ <-Map 5 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_8]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_7] (rows=250 width=178)
+ Output:["_col0","_col1"],keys:key, value
+ TableScan [TS_5] (rows=500 width=178)
+ default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
PREHOOK: query: explain select *
from (select *
@@ -2066,59 +2062,55 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 2 llap
- File Output Operator [FS_23]
- Merge Join Operator [MERGEJOIN_33] (rows=1 width=178)
- Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"]
+ File Output Operator [FS_21]
+ Merge Join Operator [MERGEJOIN_31] (rows=1 width=178)
+ Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Semi),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_19]
+ SHUFFLE [RS_17]
PartitionCols:_col0, _col1
Select Operator [SEL_1] (rows=500 width=178)
Output:["_col0","_col1"]
TableScan [TS_0] (rows=500 width=178)
default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
- <-Reducer 5 [SIMPLE_EDGE] llap
- SHUFFLE [RS_20]
+ <-Reducer 4 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_18]
PartitionCols:_col0, _col1
- Group By Operator [GBY_17] (rows=1 width=178)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Reducer 4 [SIMPLE_EDGE] llap
- SHUFFLE [RS_16]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_15] (rows=1 width=178)
- Output:["_col0","_col1"],keys:_col2, _col3
- Merge Join Operator [MERGEJOIN_32] (rows=1 width=178)
- Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
- <-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_11]
- PartitionCols:_col0, _col1
- Select Operator [SEL_4] (rows=166 width=178)
- Output:["_col0","_col1"]
- Filter Operator [FIL_30] (rows=166 width=178)
- predicate:(value > 'val_9')
- TableScan [TS_2] (rows=500 width=178)
- default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
- <-Reducer 7 [SIMPLE_EDGE] llap
- SHUFFLE [RS_12]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_9] (rows=250 width=178)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Map 6 [SIMPLE_EDGE] llap
- SHUFFLE [RS_8]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_7] (rows=250 width=178)
- Output:["_col0","_col1"],keys:key, value
- TableScan [TS_5] (rows=500 width=178)
- default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ Group By Operator [GBY_16] (rows=1 width=178)
+ Output:["_col0","_col1"],keys:_col0, _col1
+ Select Operator [SEL_14] (rows=1 width=178)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_30] (rows=1 width=178)
+ Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"]
+ <-Map 3 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_11]
+ PartitionCols:_col0, _col1
+ Select Operator [SEL_4] (rows=166 width=178)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_28] (rows=166 width=178)
+ predicate:(value > 'val_9')
+ TableScan [TS_2] (rows=500 width=178)
+ default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ <-Reducer 6 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_12]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_9] (rows=250 width=178)
+ Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+ <-Map 5 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_8]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_7] (rows=250 width=178)
+ Output:["_col0","_col1"],keys:key, value
+ TableScan [TS_5] (rows=500 width=178)
+ default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
PREHOOK: query: explain select *
from src_cbo
@@ -2131,40 +2123,36 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 2 llap
- File Output Operator [FS_14]
- Merge Join Operator [MERGEJOIN_19] (rows=166 width=178)
- Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"]
+ File Output Operator [FS_12]
+ Merge Join Operator [MERGEJOIN_17] (rows=166 width=178)
+ Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_10]
+ SHUFFLE [RS_8]
PartitionCols:_col0
Select Operator [SEL_2] (rows=166 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_17] (rows=166 width=178)
+ Filter Operator [FIL_15] (rows=166 width=178)
predicate:(key > '9')
TableScan [TS_0] (rows=500 width=178)
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
- <-Reducer 4 [SIMPLE_EDGE] llap
- SHUFFLE [RS_11]
+ <-Map 3 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_9]
PartitionCols:_col0
- Group By Operator [GBY_8] (rows=69 width=87)
- Output:["_col0"],keys:KEY._col0
- <-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_7]
- PartitionCols:_col0
- Group By Operator [GBY_6] (rows=69 width=87)
- Output:["_col0"],keys:key
- Filter Operator [FIL_18] (rows=166 width=87)
- predicate:(key > '9')
- TableScan [TS_3] (rows=500 width=87)
- default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+ Group By Operator [GBY_7] (rows=69 width=87)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_5] (rows=166 width=87)
+ Output:["_col0"]
+ Filter Operator [FIL_16] (rows=166 width=87)
+ predicate:(key > '9')
+ TableScan [TS_3] (rows=500 width=87)
+ default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
PREHOOK: query: explain select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
@@ -2179,41 +2167,40 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 10 <- Map 9 (SIMPLE_EDGE)
-Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE)
-Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 4 llap
- File Output Operator [FS_46]
- Select Operator [SEL_45] (rows=5 width=8)
+ File Output Operator [FS_44]
+ Select Operator [SEL_43] (rows=5 width=8)
Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_67] (rows=5 width=8)
- Conds:RS_42._col1, _col4=RS_43._col0, _col1(Inner),Output:["_col0","_col3"]
+ Merge Join Operator [MERGEJOIN_65] (rows=5 width=8)
+ Conds:RS_40._col1, _col4=RS_41._col0, _col1(Left Semi),Output:["_col0","_col3"]
<-Reducer 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_42]
+ SHUFFLE [RS_40]
PartitionCols:_col1, _col4
- Merge Join Operator [MERGEJOIN_64] (rows=5 width=16)
- Conds:RS_39._col0=RS_40._col1(Inner),Output:["_col0","_col1","_col3","_col4"]
+ Merge Join Operator [MERGEJOIN_62] (rows=5 width=16)
+ Conds:RS_35._col0=RS_36._col1(Inner),Output:["_col0","_col1","_col3","_col4"]
<-Map 5 [SIMPLE_EDGE] llap
- SHUFFLE [RS_40]
+ SHUFFLE [RS_36]
PartitionCols:_col1
Select Operator [SEL_9] (rows=17 width=16)
Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_60] (rows=17 width=16)
+ Filter Operator [FIL_58] (rows=17 width=16)
predicate:((l_linenumber = 1) and l_partkey is not null)
TableScan [TS_7] (rows=100 width=16)
default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
<-Reducer 2 [SIMPLE_EDGE] llap
- SHUFFLE [RS_39]
+ SHUFFLE [RS_35]
PartitionCols:_col0
Group By Operator [GBY_5] (rows=50 width=4)
Output:["_col0"],keys:KEY._col0
@@ -2222,66 +2209,63 @@ Stage-0
PartitionCols:_col0
Group By Operator [GBY_3] (rows=50 width=4)
Output:["_col0"],keys:l_partkey
- Filter Operator [FIL_59] (rows=100 width=4)
+ Filter Operator [FIL_57] (rows=100 width=4)
predicate:l_partkey is not null
TableScan [TS_0] (rows=100 width=4)
default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
- <-Reducer 8 [SIMPLE_EDGE] llap
- SHUFFLE [RS_43]
+ <-Reducer 7 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_41]
PartitionCols:_col0, _col1
- Group By Operator [GBY_37] (rows=4 width=8)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Reducer 7 [SIMPLE_EDGE] llap
- SHUFFLE [RS_36]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_35] (rows=4 width=8)
- Output:["_col0","_col1"],keys:_col0, _col3
- Merge Join Operator [MERGEJOIN_66] (rows=14 width=8)
- Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"]
- <-Map 6 [SIMPLE_EDGE] llap
- SHUFFLE [RS_31]
- PartitionCols:_col1
- Select Operator [SEL_12] (rows=14 width=95)
- Output:["_col0","_col1"]
- Filter Operator [FIL_61] (rows=14 width=96)
- predicate:(l_shipmode = 'AIR')
- TableScan [TS_10] (rows=100 width=96)
- default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
- <-Reducer 12 [SIMPLE_EDGE] llap
- SHUFFLE [RS_32]
- PartitionCols:_col0
- Group By Operator [GBY_29] (rows=3 width=4)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 11 [SIMPLE_EDGE] llap
- SHUFFLE [RS_28]
- PartitionCols:_col0
- Group By Operator [GBY_27] (rows=3 width=4)
- Output:["_col0"],keys:_col2
- Merge Join Operator [MERGEJOIN_65] (rows=34 width=4)
- Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"]
- <-Map 13 [SIMPLE_EDGE] llap
- SHUFFLE [RS_24]
- PartitionCols:_col0
- Select Operator [SEL_22] (rows=100 width=8)
- Output:["_col0","_col1"]
- Filter Operator [FIL_63] (rows=100 width=8)
- predicate:l_partkey is not null
- TableScan [TS_20] (rows=100 width=8)
- default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"]
- <-Reducer 10 [SIMPLE_EDGE] llap
- SHUFFLE [RS_23]
- PartitionCols:_col0
- Group By Operator [GBY_18] (rows=50 width=4)
- Output:["_col0"],keys:KEY._col0
- <-Map 9 [SIMPLE_EDGE] llap
- SHUFFLE [RS_17]
- PartitionCols:_col0
- Group By Operator [GBY_16] (rows=50 width=4)
- Output:["_col0"],keys:l_partkey
- Filter Operator [FIL_62] (rows=100 width=4)
- predicate:l_partkey is not null
- TableScan [TS_13] (rows=100 width=4)
- default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
+ Group By Operator [GBY_39] (rows=4 width=8)
+ Output:["_col0","_col1"],keys:_col0, _col1
+ Select Operator [SEL_34] (rows=14 width=8)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_64] (rows=14 width=8)
+ Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"]
+ <-Map 6 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_31]
+ PartitionCols:_col1
+ Select Operator [SEL_12] (rows=14 width=95)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_59] (rows=14 width=96)
+ predicate:(l_shipmode = 'AIR')
+ TableScan [TS_10] (rows=100 width=96)
+ default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
+ <-Reducer 11 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_32]
+ PartitionCols:_col0
+ Group By Operator [GBY_29] (rows=3 width=4)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 10 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_28]
+ PartitionCols:_col0
+ Group By Operator [GBY_27] (rows=3 width=4)
+ Output:["_col0"],keys:_col2
+ Merge Join Operator [MERGEJOIN_63] (rows=34 width=4)
+ Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"]
+ <-Map 12 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_24]
+ PartitionCols:_col0
+ Select Operator [SEL_22] (rows=100 width=8)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_61] (rows=100 width=8)
+ predicate:l_partkey is not null
+ TableScan [TS_20] (rows=100 width=8)
+ default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"]
+ <-Reducer 9 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_23]
+ PartitionCols:_col0
+ Group By Operator [GBY_18] (rows=50 width=4)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 8 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_17]
+ PartitionCols:_col0
+ Group By Operator [GBY_16] (rows=50 width=4)
+ Output:["_col0"],keys:l_partkey
+ Filter Operator [FIL_60] (rows=100 width=4)
+ predicate:l_partkey is not null
+ TableScan [TS_13] (rows=100 width=4)
+ default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"]
PREHOOK: query: explain select key, value, count(*)
from src_cbo b
@@ -2300,23 +2284,22 @@ Plan optimized by CBO.
Vertex dependency in root stage
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
Reducer 6 <- Map 5 (SIMPLE_EDGE)
Reducer 8 <- Map 7 (SIMPLE_EDGE)
-Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 4 llap
- File Output Operator [FS_36]
- Merge Join Operator [MERGEJOIN_49] (rows=34 width=186)
- Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col0","_col1","_col2"]
+ File Output Operator [FS_33]
+ Merge Join Operator [MERGEJOIN_46] (rows=34 width=186)
+ Conds:RS_29._col2=RS_30._col0(Left Semi),Output:["_col0","_col1","_col2"]
<-Reducer 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_32]
+ SHUFFLE [RS_29]
PartitionCols:_col2
- Filter Operator [FIL_42] (rows=83 width=186)
+ Filter Operator [FIL_39] (rows=83 width=186)
predicate:_col2 is not null
Group By Operator [GBY_16] (rows=83 width=186)
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1
@@ -2325,14 +2308,14 @@ Stage-0
PartitionCols:_col0, _col1
Group By Operator [GBY_14] (rows=83 width=186)
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
- Merge Join Operator [MERGEJOIN_48] (rows=166 width=178)
+ Merge Join Operator [MERGEJOIN_45] (rows=166 width=178)
Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_10]
PartitionCols:_col0
Select Operator [SEL_2] (rows=166 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_43] (rows=166 width=178)
+ Filter Operator [FIL_40] (rows=166 width=178)
predicate:(key > '8')
TableScan [TS_0] (rows=500 width=178)
default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
@@ -2346,35 +2329,32 @@ Stage-0
PartitionCols:_col0
Group By Operator [GBY_6] (rows=69 width=87)
Output:["_col0"],keys:key
- Filter Operator [FIL_44] (rows=166 width=87)
+ Filter Operator [FIL_41] (rows=166 width=87)
predicate:(key > '8')
TableScan [TS_3] (rows=500 width=87)
default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
- <-Reducer 9 [SIMPLE_EDGE] llap
- SHUFFLE [RS_33]
+ <-Reducer 8 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_30]
PartitionCols:_col0
- Group By Operator [GBY_30] (rows=34 width=8)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 8 [SIMPLE_EDGE] llap
- SHUFFLE [RS_29]
- PartitionCols:_col0
- Group By Operator [GBY_28] (rows=34 width=8)
- Output:["_col0"],keys:_col1
- Filter Operator [FIL_45] (rows=69 width=8)
- predicate:_col1 is not null
- Select Operator [SEL_47] (rows=69 width=8)
- Output:["_col1"]
- Group By Operator [GBY_24] (rows=69 width=95)
- Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
- <-Map 7 [SIMPLE_EDGE] llap
- SHUFFLE [RS_23]
- PartitionCols:_col0
- Group By Operator [GBY_22] (rows=69 width=95)
- Output:["_col0","_col1"],aggregations:["count()"],keys:key
- Filter Operator [FIL_46] (rows=166 width=87)
- predicate:(key > '9')
- TableScan [TS_19] (rows=500 width=87)
- default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+ Group By Operator [GBY_28] (rows=34 width=8)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_26] (rows=69 width=8)
+ Output:["_col0"]
+ Filter Operator [FIL_42] (rows=69 width=8)
+ predicate:_col1 is not null
+ Select Operator [SEL_44] (rows=69 width=8)
+ Output:["_col1"]
+ Group By Operator [GBY_24] (rows=69 width=95)
+ Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
+ <-Map 7 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_23]
+ PartitionCols:_col0
+ Group By Operator [GBY_22] (rows=69 width=95)
+ Output:["_col0","_col1"],aggregations:["count()"],keys:key
+ Filter Operator [FIL_43] (rows=166 width=87)
+ predicate:(key > '9')
+ TableScan [TS_19] (rows=500 width=87)
+ default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
PREHOOK: query: explain select p_mfgr, p_name, avg(p_size)
from part
@@ -2392,20 +2372,19 @@ Plan optimized by CBO.
Vertex dependency in root stage
Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 5 <- Map 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 3 llap
- File Output Operator [FS_23]
- Merge Join Operator [MERGEJOIN_28] (rows=6 width=227)
- Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0","_col1","_col2"]
+ File Output Operator [FS_21]
+ Merge Join Operator [MERGEJOIN_26] (rows=6 width=227)
+ Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"]
<-Reducer 2 [SIMPLE_EDGE] llap
- SHUFFLE [RS_19]
+ SHUFFLE [RS_17]
PartitionCols:_col1
Select Operator [SEL_6] (rows=13 width=227)
Output:["_col0","_col1","_col2"]
@@ -2416,33 +2395,28 @@ Stage-0
PartitionCols:_col0, _col1
Group By Operator [GBY_3] (rows=13 width=295)
Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr
- Filter Operator [FIL_26] (rows=26 width=223)
+ Filter Operator [FIL_24] (rows=26 width=223)
predicate:p_name is not null
TableScan [TS_0] (rows=26 width=223)
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
- <-Reducer 6 [SIMPLE_EDGE] llap
- SHUFFLE [RS_20]
+ <-Reducer 5 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_18]
PartitionCols:_col0
- Group By Operator [GBY_17] (rows=13 width=184)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 5 [SIMPLE_EDGE] llap
- SHUFFLE [RS_16]
- PartitionCols:_col0
- Group By Operator [GBY_15] (rows=13 width=184)
- Output:["_col0"],keys:_col0
- Select Operator [SEL_11] (rows=26 width=491)
- Output:["_col0"]
- Filter Operator [FIL_27] (rows=26 width=491)
- predicate:first_value_window_0 is not null
- PTF Operator [PTF_10] (rows=26 width=491)
- Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
- Select Operator [SEL_9] (rows=26 width=491)
- Output:["_col1","_col2","_col5"]
- <-Map 4 [SIMPLE_EDGE] llap
- SHUFFLE [RS_8]
- PartitionCols:p_mfgr
- TableScan [TS_7] (rows=26 width=223)
- default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
+ Group By Operator [GBY_16] (rows=13 width=184)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_11] (rows=26 width=184)
+ Output:["_col0"]
+ Filter Operator [FIL_25] (rows=26 width=491)
+ predicate:first_value_window_0 is not null
+ PTF Operator [PTF_10] (rows=26 width=491)
+ Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
+ Select Operator [SEL_9] (rows=26 width=491)
+ Output:["_col1","_col2","_col5"]
+ <-Map 4 [SIMPLE_EDGE] llap
+ SHUFFLE [RS_8]
+ PartitionCols:p_mfgr
+ TableScan [TS_7] (rows=26 width=223)
+ default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
PREHOOK: query: explain select *
from src_cbo
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out b/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
index a11bbc4..611d929 100644
--- a/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out
@@ -108,3 +108,209 @@ POSTHOOK: query: drop table things
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@things
POSTHOOK: Output: default@things
+Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_name (type: string), p_type (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: p1
+ Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: p2
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 676 Data size: 81796 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL
+PREHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: part
+ Select Operator
+ expressions: p_type (type: string)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+ECONOMY BRUSHED COPPER
+ECONOMY BURNISHED STEEL
+ECONOMY PLATED COPPER
+ECONOMY POLISHED STEEL
+LARGE BRUSHED BRASS
+LARGE BRUSHED STEEL
+LARGE BURNISHED STEEL
+MEDIUM ANODIZED COPPER
+MEDIUM BURNISHED BRASS
+MEDIUM BURNISHED COPPER
+MEDIUM BURNISHED TIN
+MEDIUM BURNISHED TIN
+PROMO ANODIZED TIN
+PROMO BURNISHED NICKEL
+PROMO PLATED TIN
+PROMO PLATED TIN
+PROMO POLISHED STEEL
+SMALL BRUSHED BRASS
+SMALL PLATED BRASS
+SMALL PLATED STEEL
+SMALL POLISHED NICKEL
+STANDARD ANODIZED STEEL
+STANDARD ANODIZED TIN
+STANDARD BURNISHED TIN
+STANDARD PLATED TIN
+STANDARD POLISHED STEEL
http://git-wip-us.apache.org/repos/asf/hive/blob/f63dc2d4/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index f092967..495ad09 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -178,7 +178,7 @@ PREHOOK: type: QUERY
PREHOOK: Input: default@alltypesorc
PREHOOK: Input: default@src1
#### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = (. (tok_table_or_col $hdt$_1) key))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltyp
esorc.ctinyint"}]}
311 val_311
Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select key, value from src1