You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/07/14 20:31:10 UTC
hive git commit: HIVE-14228 : Better row count estimates for outer
join during physical planning (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/master 3398fd732 -> 7f27c83a9
HIVE-14228 : Better row count estimates for outer join during physical planning (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7f27c83a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7f27c83a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7f27c83a
Branch: refs/heads/master
Commit: 7f27c83a99d402c41aa1d012d8af3dc7ee2904ed
Parents: 3398fd7
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Jul 13 09:17:05 2016 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Jul 14 13:27:17 2016 -0700
----------------------------------------------------------------------
.../stats/annotation/StatsRulesProcFactory.java | 39 ++-
.../clientpositive/annotate_stats_join.q | 11 +
.../clientpositive/annotate_stats_join.q.out | 259 +++++++++++++++++
.../spark/annotate_stats_join.q.out | 291 +++++++++++++++++++
.../clientpositive/tez/explainuser_1.q.out | 16 +-
5 files changed, 603 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7f27c83a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 5625091..2d0417a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
@@ -61,6 +62,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -1469,8 +1471,8 @@ public class StatsRulesProcFactory {
// update join statistics
stats.setColumnStats(outColStats);
- long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom);
- updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
+ long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop);
+ updateColStats(stats, newRowCount, jop, rowCountParents);
jop.setStatistics(stats);
if (isDebugEnabled) {
@@ -1644,7 +1646,7 @@ public class StatsRulesProcFactory {
newNumRows = newrows;
} else {
// there is more than one FK
- newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
+ newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals), jop);
}
return newNumRows;
}
@@ -1764,7 +1766,7 @@ public class StatsRulesProcFactory {
return result;
}
- private void updateStatsForJoinType(Statistics stats, long newNumRows,
+ private void updateColStats(Statistics stats, long newNumRows,
CommonJoinOperator<? extends JoinDesc> jop,
Map<Integer, Long> rowCountParents) {
@@ -1812,7 +1814,7 @@ public class StatsRulesProcFactory {
stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize));
}
- private long computeNewRowCount(List<Long> rowCountParents, long denom) {
+ private long computeNewRowCount(List<Long> rowCountParents, long denom, CommonJoinOperator<? extends JoinDesc> join) {
double factor = 0.0d;
long result = 1;
long max = rowCountParents.get(0);
@@ -1838,6 +1840,33 @@ public class StatsRulesProcFactory {
result = (long) (result * factor);
+ if (join.getConf().getConds().length == 1) {
+ JoinCondDesc joinCond = join.getConf().getConds()[0];
+ switch (joinCond.getType()) {
+ case JoinDesc.INNER_JOIN:
+ // only dealing with special join types here.
+ break;
+ case JoinDesc.LEFT_OUTER_JOIN :
+ // all rows from left side will be present in resultset
+ result = Math.max(rowCountParents.get(joinCond.getLeft()),result);
+ break;
+ case JoinDesc.RIGHT_OUTER_JOIN :
+ // all rows from right side will be present in resultset
+ result = Math.max(rowCountParents.get(joinCond.getRight()),result);
+ break;
+ case JoinDesc.FULL_OUTER_JOIN :
+ // all rows from both side will be present in resultset
+ result = Math.max(StatsUtils.safeAdd(rowCountParents.get(joinCond.getRight()), rowCountParents.get(joinCond.getLeft())),result);
+ break;
+ case JoinDesc.LEFT_SEMI_JOIN :
+ // max # of rows = rows from left side
+ result = Math.min(rowCountParents.get(joinCond.getLeft()),result);
+ break;
+ default:
+ LOG.debug("Unhandled join type in stats estimation: " + joinCond.getType());
+ break;
+ }
+ }
return result;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7f27c83a/ql/src/test/queries/clientpositive/annotate_stats_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_join.q b/ql/src/test/queries/clientpositive/annotate_stats_join.q
index bd5f642..015c647 100644
--- a/ql/src/test/queries/clientpositive/annotate_stats_join.q
+++ b/ql/src/test/queries/clientpositive/annotate_stats_join.q
@@ -68,3 +68,14 @@ explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on
-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1
explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state);
+-- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
http://git-wip-us.apache.org/repos/asf/hive/blob/7f27c83a/ql/src/test/results/clientpositive/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out
index 223a7ce..4398f1b 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out
@@ -687,3 +687,262 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (lastname is not null and deptid is not null) (type: boolean)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (deptid is not null and deptname is not null) (type: boolean)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptname (type: string), deptid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/7f27c83a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
index 2a42b3c..30d10f7 100644
--- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
@@ -749,3 +749,294 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left outer join
+explain select * from emp left outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- left semi join
+explain select * from emp left semi join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (lastname is not null and deptid is not null) (type: boolean)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (deptid is not null and deptname is not null) (type: boolean)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptname (type: string), deptid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col0 (type: string), _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- right outer join
+explain select * from emp right outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- full outer join
+explain select * from emp full outer join dept on emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: emp
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: lastname (type: string), deptid (type: int), locid (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: dept
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: deptid (type: int), deptname (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: int)
+ 1 _col1 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 54 Data size: 10476 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/7f27c83a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 406df88..b8383fd 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -1569,9 +1569,9 @@ Stage-0
Stage-1
Reducer 2
File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=11 width=4)
+ Select Operator [SEL_11] (rows=9 width=4)
Output:["_col0"]
- Merge Join Operator [MERGEJOIN_17] (rows=11 width=4)
+ Merge Join Operator [MERGEJOIN_17] (rows=9 width=4)
Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_8]
@@ -1849,7 +1849,7 @@ Stage-0
Output:["_col0","_col1"]
Filter Operator [FIL_12] (rows=1 width=269)
predicate:_col3 is null
- Merge Join Operator [MERGEJOIN_17] (rows=193 width=269)
+ Merge Join Operator [MERGEJOIN_17] (rows=500 width=269)
Conds:RS_9._col1=RS_10._col1(Left Outer),Output:["_col0","_col1","_col3"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_9]
@@ -1911,7 +1911,7 @@ Stage-0
Output:["_col0","_col1"]
Filter Operator [FIL_12] (rows=1 width=265)
predicate:_col3 is null
- Merge Join Operator [MERGEJOIN_17] (rows=1 width=265)
+ Merge Join Operator [MERGEJOIN_17] (rows=250 width=265)
Conds:RS_9._col0, _col1=RS_10._col1, _col0(Left Outer),Output:["_col0","_col1","_col3"]
<-Map 4 [SIMPLE_EDGE]
SHUFFLE [RS_10]
@@ -2341,7 +2341,7 @@ Stage-0
Output:["_col0","_col1"]
Filter Operator [FIL_21] (rows=1 width=265)
predicate:_col3 is null
- Merge Join Operator [MERGEJOIN_29] (rows=404 width=265)
+ Merge Join Operator [MERGEJOIN_29] (rows=500 width=265)
Conds:RS_18._col0=RS_19._col0(Left Outer),Output:["_col0","_col1","_col3"]
<-Map 7 [SIMPLE_EDGE]
SHUFFLE [RS_19]
@@ -2413,7 +2413,7 @@ Stage-0
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_20] (rows=1 width=344)
predicate:_col4 is null
- Merge Join Operator [MERGEJOIN_27] (rows=1 width=344)
+ Merge Join Operator [MERGEJOIN_27] (rows=26 width=344)
Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4"]
<-Map 6 [SIMPLE_EDGE]
SHUFFLE [RS_18]
@@ -2491,7 +2491,7 @@ Stage-0
Output:["_col0","_col1"]
Filter Operator [FIL_31] (rows=1 width=133)
predicate:_col3 is null
- Merge Join Operator [MERGEJOIN_41] (rows=1 width=133)
+ Merge Join Operator [MERGEJOIN_41] (rows=26 width=133)
Conds:RS_28.UDFToDouble(_col1)=RS_29._col0(Left Outer),Output:["_col0","_col1","_col3"]
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_28]
@@ -2583,7 +2583,7 @@ Stage-0
Output:["_col0","_col1"]
Filter Operator [FIL_33] (rows=1 width=204)
predicate:_col3 is null
- Merge Join Operator [MERGEJOIN_42] (rows=1 width=204)
+ Merge Join Operator [MERGEJOIN_42] (rows=5 width=204)
Conds:RS_30._col0, _col1=RS_31._col0, _col1(Left Outer),Output:["_col0","_col1","_col3"]
<-Reducer 10 [SIMPLE_EDGE]
SHUFFLE [RS_31]