You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2015/07/07 18:21:46 UTC
[9/9] hive git commit: HIVE-11171 : Join reordering algorithm might
introduce projects between joins (Jesus Camacho Rodriguez via Ashutosh
Chauhan)
HIVE-11171 : Join reordering algorithm might introduce projects between joins (Jesus Camacho Rodriguez via Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b963769d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b963769d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b963769d
Branch: refs/heads/master
Commit: b963769d46f714bc69470faf1471f3c80124cd43
Parents: 7b98e37
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Tue Jul 7 09:20:37 2015 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jul 7 09:20:37 2015 -0700
----------------------------------------------------------------------
.../calcite/rules/HiveJoinCommuteRule.java | 96 ++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 19 +-
.../results/clientpositive/auto_join12.q.out | 54 +-
.../results/clientpositive/auto_join5.q.out | 8 +-
.../constantPropagateForSubQuery.q.out | 38 +-
.../clientpositive/correlationoptimizer15.q.out | 120 +-
.../clientpositive/correlationoptimizer6.q.out | 818 ++++++-------
ql/src/test/results/clientpositive/join12.q.out | 28 +-
ql/src/test/results/clientpositive/join5.q.out | 20 +-
.../join_merge_multi_expressions.q.out | 46 +-
.../results/clientpositive/join_merging.q.out | 117 +-
.../results/clientpositive/join_nulls.q.out | 2 +-
.../results/clientpositive/limit_pushdown.q.out | 98 +-
.../test/results/clientpositive/lineage3.q.out | 29 +-
.../clientpositive/louter_join_ppr.q.out | 74 +-
.../results/clientpositive/optional_outer.q.out | 36 +-
.../clientpositive/outer_join_ppr.q.java1.7.out | 168 ++-
.../results/clientpositive/ppd_gby_join.q.out | 104 +-
.../test/results/clientpositive/ppd_join.q.out | 106 +-
.../test/results/clientpositive/ppd_join2.q.out | 88 +-
.../test/results/clientpositive/ppd_join3.q.out | 114 +-
.../clientpositive/ppd_outer_join4.q.out | 88 +-
.../results/clientpositive/ppd_random.q.out | 80 +-
.../clientpositive/rcfile_null_value.q.out | 20 +-
.../clientpositive/router_join_ppr.q.out | 170 ++-
.../test/results/clientpositive/skewjoin.q.out | 46 +-
.../clientpositive/spark/auto_join12.q.out | 54 +-
.../clientpositive/spark/auto_join5.q.out | 10 +-
.../results/clientpositive/spark/join12.q.out | 26 +-
.../results/clientpositive/spark/join5.q.out | 20 +-
.../spark/join_merge_multi_expressions.q.out | 46 +-
.../clientpositive/spark/join_merging.q.out | 115 +-
.../clientpositive/spark/limit_pushdown.q.out | 80 +-
.../clientpositive/spark/louter_join_ppr.q.out | 432 ++++---
.../spark/outer_join_ppr.q.java1.7.out | 328 +++---
.../clientpositive/spark/ppd_gby_join.q.out | 106 +-
.../results/clientpositive/spark/ppd_join.q.out | 110 +-
.../clientpositive/spark/ppd_join2.q.out | 86 +-
.../clientpositive/spark/ppd_join3.q.out | 116 +-
.../clientpositive/spark/ppd_outer_join4.q.out | 124 +-
.../clientpositive/spark/router_join_ppr.q.out | 370 +++---
.../results/clientpositive/spark/skewjoin.q.out | 46 +-
.../clientpositive/tez/explainuser_1.q.out | 1110 +++++++++---------
.../clientpositive/tez/limit_pushdown.q.out | 78 +-
.../test/results/clientpositive/tez/mrr.q.out | 82 +-
.../results/clientpositive/tez/skewjoin.q.out | 26 +-
.../results/clientpositive/tez/tez_union.q.out | 88 +-
47 files changed, 2940 insertions(+), 3100 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b963769d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
new file mode 100644
index 0000000..f73affc
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.rules.JoinCommuteRule;
+import org.apache.calcite.util.Permutation;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+
+/**
+ * Planner rule that permutes the inputs of a Join, if it has a Project on top
+ * that simply swaps the fields of both inputs.
+ */
+public class HiveJoinCommuteRule extends RelOptRule {
+
+ public static final HiveJoinCommuteRule INSTANCE = new HiveJoinCommuteRule(
+ HiveProject.class, HiveJoin.class);
+
+
+ public HiveJoinCommuteRule(Class<? extends Project> projClazz,
+ Class<? extends Join> joinClazz) {
+ super(operand(projClazz,
+ operand(joinClazz, any())));
+ }
+
+ public void onMatch(final RelOptRuleCall call) {
+ Project topProject = call.rel(0);
+ Join join = call.rel(1);
+
+ // 1. We check if it is a permutation project. If it is
+ // not, or this is the identity, the rule will do nothing
+ final Permutation topPermutation = topProject.getPermutation();
+ if (topPermutation == null) {
+ return;
+ }
+ if (topPermutation.isIdentity()) {
+ return;
+ }
+
+ // 2. We swap the join
+ final RelNode swapped = JoinCommuteRule.swap(join,true);
+ if (swapped == null) {
+ return;
+ }
+
+ // 3. The result should have a project on top, otherwise we
+ // bail out.
+ if (swapped instanceof Join) {
+ return;
+ }
+
+ // 4. We check if it is a permutation project. If it is
+ // not, or this is the identity, the rule will do nothing
+ final Project bottomProject = (Project) swapped;
+ final Permutation bottomPermutation = bottomProject.getPermutation();
+ if (bottomPermutation == null) {
+ return;
+ }
+ if (bottomPermutation.isIdentity()) {
+ return;
+ }
+
+ // 5. If the product of the topPermutation and bottomPermutation yields
+ // the identity, then we can swap the join and remove the project on
+ // top.
+ final Permutation product = topPermutation.product(bottomPermutation);
+ if (!product.isIdentity()) {
+ return;
+ }
+
+ // 6. Return the new join as a replacement
+ final Join swappedJoin = (Join) bottomProject.getInput(0);
+ call.transformTo(swappedJoin);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/b963769d/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 7fd8c85..3b5dbe2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -65,7 +65,6 @@ import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.rules.FilterAggregateTransposeRule;
import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
-import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
import org.apache.calcite.rel.rules.LoptOptimizeJoinRule;
import org.apache.calcite.rel.rules.ProjectMergeRule;
@@ -139,6 +138,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransp
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule;
@@ -862,7 +862,16 @@ public class CalcitePlanner extends SemanticAnalyzer {
calciteOptimizedPlan = hepPlanner.findBestExp();
- // 4. Run rule to fix windowing issue when it is done over
+ // 4. Run rule to try to remove projects on top of join operators
+ hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
+ hepPgmBldr.addRuleInstance(HiveJoinCommuteRule.INSTANCE);
+ hepPlanner = new HepPlanner(hepPgmBldr.build());
+ hepPlanner.registerMetadataProviders(list);
+ cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner));
+ hepPlanner.setRoot(calciteOptimizedPlan);
+ calciteOptimizedPlan = hepPlanner.findBestExp();
+
+ // 5. Run rule to fix windowing issue when it is done over
// aggregation columns (HIVE-10627)
hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
hepPgmBldr.addRuleInstance(HiveWindowingFixRule.INSTANCE);
@@ -872,9 +881,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
hepPlanner.setRoot(calciteOptimizedPlan);
calciteOptimizedPlan = hepPlanner.findBestExp();
- // 5. Run rules to aid in translation from Calcite tree to Hive tree
+ // 6. Run rules to aid in translation from Calcite tree to Hive tree
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
- // 5.1. Merge join into multijoin operators (if possible)
+ // 6.1. Merge join into multijoin operators (if possible)
hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
hepPgmBldr.addRuleInstance(HiveJoinToMultiJoinRule.INSTANCE);
hepPgmBldr = hepPgmBldr.addRuleCollection(ImmutableList.of(
@@ -894,7 +903,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);
calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan);
- // 5.2. Introduce exchange operators below join/multijoin operators
+ // 6.2. Introduce exchange operators below join/multijoin operators
hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP);
hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN);
hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);
http://git-wip-us.apache.org/repos/asf/hive/blob/b963769d/ql/src/test/results/clientpositive/auto_join12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out
index e97d7e6..7d8db0a 100644
--- a/ql/src/test/results/clientpositive/auto_join12.q.out
+++ b/ql/src/test/results/clientpositive/auto_join12.q.out
@@ -32,7 +32,7 @@ STAGE PLANS:
$hdt$_0:$hdt$_0:$hdt$_0:src
Fetch Operator
limit: -1
- $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src
+ $hdt$_0:$hdt$_1:$hdt$_1:src
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
@@ -51,7 +51,7 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_1:src
+ $hdt$_0:$hdt$_1:$hdt$_1:src
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -59,8 +59,8 @@ STAGE PLANS:
predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ expressions: key (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
@@ -77,8 +77,8 @@ STAGE PLANS:
predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean)
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -86,33 +86,29 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col3
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col1,_col3) (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: hash(_col1,_col3) (type: int)
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col0)
- mode: hash
- outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ value expressions: _col0 (type: bigint)
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/b963769d/ql/src/test/results/clientpositive/auto_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out
index 69b7aab..3209d07 100644
--- a/ql/src/test/results/clientpositive/auto_join5.q.out
+++ b/ql/src/test/results/clientpositive/auto_join5.q.out
@@ -46,11 +46,11 @@ STAGE PLANS:
Stage: Stage-5
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:$hdt$_1:src1
+ $hdt$_0:$hdt$_0:src1
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_1:$hdt$_1:src1
+ $hdt$_0:$hdt$_0:src1
TableScan
alias: src1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
@@ -81,14 +81,14 @@ STAGE PLANS:
Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
- Left Outer Join0 to 1
+ Right Outer Join0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col0) (type: int), _col1 (type: string)
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/b963769d/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
index 40d2dd4..b52b475 100644
--- a/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
+++ b/ql/src/test/results/clientpositive/constantPropagateForSubQuery.q.out
@@ -80,20 +80,6 @@ STAGE PLANS:
Map Reduce
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- value expressions: _col0 (type: string), _col1 (type: string)
- auto parallelism: false
- TableScan
alias: a
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
@@ -108,9 +94,23 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- tag: 1
+ tag: 0
value expressions: _col1 (type: string)
auto parallelism: false
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ tag: 1
+ value expressions: _col0 (type: string), _col1 (type: string)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -203,8 +203,8 @@ STAGE PLANS:
name: default.src1
name: default.src1
Truncated Path -> Alias:
- /src [$hdt$_1:a]
- /src1 [$hdt$_0:b]
+ /src [$hdt$_0:a]
+ /src1 [$hdt$_1:b]
Needs Tagging: true
Reduce Operator Tree:
Join Operator
@@ -213,10 +213,10 @@ STAGE PLANS:
keys:
0
1
- outputColumnNames: _col0, _col1, _col3
+ outputColumnNames: _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: '429' (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string)
+ expressions: '429' (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/b963769d/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
index d5f45da..43ba27d 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out
@@ -23,14 +23,14 @@ JOIN src yy
ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-4 depends on stages: Stage-3
- Stage-1 depends on stages: Stage-4
+ Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -85,7 +85,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-4
+ Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
@@ -109,10 +109,17 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ TableScan
alias: yy
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
@@ -127,13 +134,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Join Operator
condition map:
@@ -143,18 +143,14 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-2
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
@@ -259,20 +255,20 @@ STAGE PLANS:
Map Reduce
Map Operator Tree:
TableScan
- alias: yy
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ alias: x
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
TableScan
alias: x
Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
@@ -289,44 +285,23 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ alias: yy
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Demux Operator
Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE
- Mux Operator
- Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Join Operator
condition map:
Inner Join 0 to 1
@@ -353,17 +328,30 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: bigint), _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Mux Operator
+ Statistics: Num rows: 277 Data size: 2854 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator