You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/17 03:54:53 UTC
svn commit: r1646142 [1/4] - in /hive/branches/spark/ql/src:
java/org/apache/hadoop/hive/ql/exec/spark/
java/org/apache/hadoop/hive/ql/optimizer/spark/
java/org/apache/hadoop/hive/ql/parse/spark/
test/results/clientpositive/spark/
Author: xuefu
Date: Wed Dec 17 02:54:52 2014
New Revision: 1646142
URL: http://svn.apache.org/r1646142
Log:
HIVE-9041: Generate better plan for queries containing both union and multi-insert [Spark Branch] (Chao via Xuefu)
Removed:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/IdentityTran.java
Modified:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join27.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/stats1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/temp_table.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union13.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union14.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union16.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union18.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union19.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union25.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union28.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union29.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union30.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union33.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_ppr.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_17.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_18.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_19.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_20.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_21.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_24.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_25.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_6.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java Wed Dec 17 02:54:52 2014
@@ -90,26 +90,10 @@ public class SparkPlanGenerator {
for (BaseWork work : sparkWork.getAllWork()) {
SparkTran tran;
- if (work instanceof MapWork) {
- SparkTran mapInput = generateParentTran(sparkPlan, sparkWork, work);
- tran = generate((MapWork)work);
- sparkPlan.addTran(tran);
- sparkPlan.connect(mapInput, tran);
- } else if (work instanceof ReduceWork) {
- SparkTran shuffleTran = generateParentTran(sparkPlan, sparkWork, work);
- tran = generate((ReduceWork)work);
- sparkPlan.addTran(tran);
- sparkPlan.connect(shuffleTran, tran);
- } else {
- List<BaseWork> parentWorks = sparkWork.getParents(work);
- tran = new IdentityTran();
- sparkPlan.addTran(tran);
- for (BaseWork parentWork : parentWorks) {
- SparkTran parentTran = workToTranMap.get(parentWork);
- sparkPlan.connect(parentTran, tran);
- }
- }
-
+ SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
+ tran = generate(work);
+ sparkPlan.addTran(tran);
+ sparkPlan.connect(parentTran, tran);
workToTranMap.put(work, tran);
}
@@ -137,8 +121,8 @@ public class SparkPlanGenerator {
sparkPlan.connect(workToTranMap.get(parentWork), result);
}
} else {
- throw new IllegalStateException("AssertionError: generateParentTran() only expect MapWork or ReduceWork," +
- " but found " + work.getClass().getName());
+ throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " +
+ "but found " + work.getClass().getName());
}
if (cloneToWork.containsKey(work)) {
@@ -199,23 +183,24 @@ public class SparkPlanGenerator {
return new ShuffleTran(shuffler, edge.getNumPartitions(), toCache);
}
- private MapTran generate(MapWork mw) throws Exception {
- initStatsPublisher(mw);
- MapTran result = new MapTran();
- JobConf newJobConf = cloneJobConf(mw);
- byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf);
- HiveMapFunction mapFunc = new HiveMapFunction(confBytes, sparkReporter);
- result.setMapFunction(mapFunc);
- return result;
- }
-
- private ReduceTran generate(ReduceWork rw) throws Exception {
- ReduceTran result = new ReduceTran();
- JobConf newJobConf = cloneJobConf(rw);
+ private SparkTran generate(BaseWork work) throws Exception {
+ initStatsPublisher(work);
+ JobConf newJobConf = cloneJobConf(work);
byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf);
- HiveReduceFunction redFunc = new HiveReduceFunction(confBytes, sparkReporter);
- result.setReduceFunction(redFunc);
- return result;
+ if (work instanceof MapWork) {
+ MapTran mapTran = new MapTran();
+ HiveMapFunction mapFunc = new HiveMapFunction(confBytes, sparkReporter);
+ mapTran.setMapFunction(mapFunc);
+ return mapTran;
+ } else if (work instanceof ReduceWork) {
+ ReduceTran reduceTran = new ReduceTran();
+ HiveReduceFunction reduceFunc = new HiveReduceFunction(confBytes, sparkReporter);
+ reduceTran.setReduceFunction(reduceFunc);
+ return reduceTran;
+ } else {
+ throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " +
+ "but found " + work.getClass().getName());
+ }
}
private JobConf cloneJobConf(BaseWork work) throws Exception {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java Wed Dec 17 02:54:52 2014
@@ -150,16 +150,10 @@ public class SparkReduceSinkMapJoinProc
*
*/
mapJoinWork = context.mapJoinWorkMap.get(mapJoinOp);
- BaseWork parentWork;
- if (context.unionWorkMap.containsKey(parentRS)) {
- parentWork = context.unionWorkMap.get(parentRS);
- } else {
- int workMapSize = context.childToWorkMap.get(parentRS).size();
- Preconditions.checkArgument(workMapSize == 1,
- "AssertionError: expected context.childToWorkMap.get(parentRS).size() to be 1, but was " +
- workMapSize);
- parentWork = context.childToWorkMap.get(parentRS).get(0);
- }
+ int workMapSize = context.childToWorkMap.get(parentRS).size();
+ Preconditions.checkArgument(workMapSize == 1,
+ "AssertionError: expected context.childToWorkMap.get(parentRS).size() to be 1, but was " + workMapSize);
+ BaseWork parentWork = context.childToWorkMap.get(parentRS).get(0);
// set the link between mapjoin and parent vertex
int pos = context.mapJoinParentMap.get(mapJoinOp).indexOf(parentRS);
@@ -204,7 +198,6 @@ public class SparkReduceSinkMapJoinProc
}
// remember the output name of the reduce sink
r.getConf().setOutputName(myWork.getName());
- context.connectedReduceSinks.add(r);
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java Wed Dec 17 02:54:52 2014
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.parse.spark;
+import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -25,7 +26,6 @@ import org.apache.hadoop.hive.ql.exec.Ma
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.plan.De
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty;
import org.apache.hadoop.hive.ql.plan.SparkWork;
@@ -86,9 +87,9 @@ public class GenSparkProcContext impleme
// one.
public BaseWork preceedingWork;
- // map that keeps track of the last operator of a task to the work
- // that follows it. This is used for connecting them later.
- public final Map<Operator<?>, BaseWork> leafOperatorToFollowingWork;
+ // map that keeps track of the last operator of a task to the following work
+ // of this operator. This is used for connecting them later.
+ public final Map<ReduceSinkOperator, ObjectPair<SparkEdgeProperty, ReduceWork>> leafOpToFollowingWorkInfo;
// a map that keeps track of work that need to be linked while
// traversing an operator tree
@@ -132,9 +133,6 @@ public class GenSparkProcContext impleme
public final Set<FileSinkOperator> fileSinkSet;
public final Map<FileSinkOperator, List<FileSinkOperator>> fileSinkMap;
- // remember which reducesinks we've already connected
- public final Set<ReduceSinkOperator> connectedReduceSinks;
-
// Alias to operator map, from the semantic analyzer.
// This is necessary as sometimes semantic analyzer's mapping is different than operator's own alias.
public final Map<String, Operator<? extends OperatorDesc>> topOps;
@@ -153,7 +151,8 @@ public class GenSparkProcContext impleme
this.currentTask = (SparkTask) TaskFactory.get(
new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf);
this.rootTasks.add(currentTask);
- this.leafOperatorToFollowingWork = new LinkedHashMap<Operator<?>, BaseWork>();
+ this.leafOpToFollowingWorkInfo =
+ new LinkedHashMap<ReduceSinkOperator, ObjectPair<SparkEdgeProperty, ReduceWork>>();
this.linkOpWithWorkMap = new LinkedHashMap<Operator<?>, Map<BaseWork, SparkEdgeProperty>>();
this.linkWorkWithReduceSinkMap = new LinkedHashMap<BaseWork, List<ReduceSinkOperator>>();
this.smbJoinWorkMap = new LinkedHashMap<SMBMapJoinOperator, MapWork>();
@@ -173,6 +172,5 @@ public class GenSparkProcContext impleme
this.clonedReduceSinks = new LinkedHashSet<ReduceSinkOperator>();
this.fileSinkSet = new LinkedHashSet<FileSinkOperator>();
this.fileSinkMap = new LinkedHashMap<FileSinkOperator, List<FileSinkOperator>>();
- this.connectedReduceSinks = new LinkedHashSet<ReduceSinkOperator>();
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Wed Dec 17 02:54:52 2014
@@ -89,13 +89,6 @@ public class GenSparkUtils {
sequenceNumber = 0;
}
- public UnionWork createUnionWork(GenSparkProcContext context, Operator<?> operator, SparkWork sparkWork) {
- UnionWork unionWork = new UnionWork("Union "+ (++sequenceNumber));
- context.unionWorkMap.put(operator, unionWork);
- sparkWork.add(unionWork);
- return unionWork;
- }
-
public ReduceWork createReduceWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork) throws SemanticException {
Preconditions.checkArgument(!root.getParentOperators().isEmpty(),
"AssertionError: expected root.getParentOperators() to be non-empty");
@@ -122,10 +115,7 @@ public class GenSparkUtils {
SparkEdgeProperty edgeProp = getEdgeProperty(reduceSink, reduceWork);
- sparkWork.connect(
- context.preceedingWork,
- reduceWork, edgeProp);
- context.connectedReduceSinks.add(reduceSink);
+ sparkWork.connect(context.preceedingWork, reduceWork, edgeProp);
return reduceWork;
}
@@ -220,7 +210,7 @@ public class GenSparkUtils {
for (Operator<?> op : opQueue) {
Operator<?> newOp = newOpQueue_it.next();
if (op instanceof FileSinkOperator) {
- List<FileSinkOperator> fileSinkList = context.fileSinkMap.get((FileSinkOperator)op);
+ List<FileSinkOperator> fileSinkList = context.fileSinkMap.get(op);
if (fileSinkList == null) {
fileSinkList = new LinkedList<FileSinkOperator>();
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java Wed Dec 17 02:54:52 2014
@@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.parse.
import com.google.common.base.Preconditions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -36,11 +36,11 @@ import org.apache.hadoop.hive.ql.optimiz
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty;
import org.apache.hadoop.hive.ql.plan.SparkWork;
-import org.apache.hadoop.hive.ql.plan.UnionWork;
import java.util.ArrayList;
import java.util.LinkedList;
@@ -83,9 +83,8 @@ public class GenSparkWork implements Nod
Preconditions.checkArgument(context.currentRootOperator != null,
"AssertionError: expected context.currentRootOperator to be not null");
- // Operator is a file sink or reduce sink. Something that forces
- // a new vertex.
- Operator<?> operator = (Operator<?>) nd;
+ // Operator is a file sink or reduce sink. Something that forces a new vertex.
+ Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) nd;
// root is the start of the operator pipeline we're currently
// packing into a vertex, typically a table scan, union or join
@@ -102,7 +101,6 @@ public class GenSparkWork implements Nod
SparkWork sparkWork = context.currentTask.getWork();
-
if (GenSparkUtils.getChildOperator(root, DummyStoreOperator.class) != null) {
/*
* SMB join case:
@@ -120,7 +118,7 @@ public class GenSparkWork implements Nod
*/
return null;
}
- SMBMapJoinOperator smbOp = (SMBMapJoinOperator) GenSparkUtils.getChildOperator(root, SMBMapJoinOperator.class);
+ SMBMapJoinOperator smbOp = GenSparkUtils.getChildOperator(root, SMBMapJoinOperator.class);
// Right now the work graph is pretty simple. If there is no
// Preceding work we have a root and will generate a map
@@ -140,9 +138,9 @@ public class GenSparkWork implements Nod
// create a new vertex
if (context.preceedingWork == null) {
if (smbOp != null) {
- //This logic is for SortMergeBucket MapJoin case.
- //This MapWork (of big-table, see above..) is later initialized by SparkMapJoinFactory processor, so don't initialize it here.
- //Just keep track of it in the context, for later processing.
+ // This logic is for SortMergeBucket MapJoin case.
+ // This MapWork (of big-table, see above..) is later initialized by SparkMapJoinFactory
+ // processor, so don't initialize it here. Just keep track of it in the context, for later processing.
work = utils.createMapWork(context, root, sparkWork, null, true);
if (context.smbJoinWorkMap.get(smbOp) != null) {
throw new SemanticException("Each SMBMapJoin should be associated only with one Mapwork");
@@ -169,8 +167,7 @@ public class GenSparkWork implements Nod
if (!context.currentMapJoinOperators.isEmpty()) {
for (MapJoinOperator mj: context.currentMapJoinOperators) {
LOG.debug("Processing map join: " + mj);
- // remember the mapping in case we scan another branch of the
- // mapjoin later
+ // remember the mapping in case we scan another branch of the mapjoin later
if (!context.mapJoinWorkMap.containsKey(mj)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
@@ -211,8 +208,7 @@ public class GenSparkWork implements Nod
// need to set up output name for reduce sink now that we know the name
// of the downstream work
- for (ReduceSinkOperator r:
- context.linkWorkWithReduceSinkMap.get(parentWork)) {
+ for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
@@ -221,7 +217,6 @@ public class GenSparkWork implements Nod
context.clonedReduceSinks.add(r);
}
r.getConf().setOutputName(work.getName());
- context.connectedReduceSinks.add(r);
}
}
}
@@ -231,42 +226,35 @@ public class GenSparkWork implements Nod
context.currentMapJoinOperators.clear();
}
- // This is where we cut the tree as described above. We also remember that
- // we might have to connect parent work with this work later.
- for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
- context.leafOperatorToFollowingWork.put(parent, work);
- LOG.debug("Removing " + parent + " as parent from " + root);
- root.removeParent(parent);
+ // Here we are disconnecting root with its parents. However, we need to save
+ // a few information, since in future we may reach the parent operators via a
+ // different path, and we may need to connect parent works with the work associated
+ // with this root operator.
+ if (root.getNumParent() > 0) {
+ Preconditions.checkArgument(work instanceof ReduceWork,
+ "AssertionError: expected work to be a ReduceWork, but was " + work.getClass().getName());
+ ReduceWork reduceWork = (ReduceWork) work;
+ for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
+ Preconditions.checkArgument(parent instanceof ReduceSinkOperator,
+ "AssertionError: expected operator to be a ReduceSinkOperator, but was " + parent.getClass().getName());
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) parent;
+ SparkEdgeProperty edgeProp = GenSparkUtils.getEdgeProperty(rsOp, reduceWork);
+
+ rsOp.getConf().setOutputName(reduceWork.getName());
+ GenMapRedUtils.setKeyAndValueDesc(reduceWork, rsOp);
+
+ context.leafOpToFollowingWorkInfo.put(rsOp, ObjectPair.create(edgeProp, reduceWork));
+ LOG.debug("Removing " + parent + " as parent from " + root);
+ root.removeParent(parent);
+ }
}
+ // If `currentUnionOperators` is not empty, it means we are creating BaseWork whose operator tree
+ // contains union operators. In this case, we need to save these BaseWorks, and remove
+ // the union operators from the operator tree later.
if (!context.currentUnionOperators.isEmpty()) {
- // if there are union all operators we need to add the work to the set
- // of union operators.
-
- UnionWork unionWork;
- if (context.unionWorkMap.containsKey(operator)) {
- // we've seen this terminal before and have created a union work object.
- // just need to add this work to it. There will be no children of this one
- // since we've passed this operator before.
- Preconditions.checkArgument(operator.getChildOperators().isEmpty(),
- "AssertionError: expected operator.getChildOperators() to be empty");
- unionWork = (UnionWork) context.unionWorkMap.get(operator);
-
- } else {
- // first time through. we need to create a union work object and add this
- // work to it. Subsequent work should reference the union and not the actual
- // work.
- unionWork = utils.createUnionWork(context, operator, sparkWork);
- }
-
- // finally hook everything up
- LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
- SparkEdgeProperty edgeProp = new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE);
- sparkWork.connect(work, unionWork, edgeProp);
- unionWork.addUnionOperators(context.currentUnionOperators);
context.currentUnionOperators.clear();
context.workWithUnionOperators.add(work);
- work = unionWork;
}
// We're scanning a tree from roots to leaf (this is not technically
@@ -280,39 +268,36 @@ public class GenSparkWork implements Nod
//
// Also note: the concept of leaf and root is reversed in hive for historical
// reasons. Roots are data sources, leaves are data sinks. I know.
- if (context.leafOperatorToFollowingWork.containsKey(operator)) {
-
- BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
- long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
-
- LOG.debug("Second pass. Leaf operator: "+operator
- +" has common downstream work:"+followingWork);
-
- // need to add this branch to the key + value info
- Preconditions.checkArgument(operator instanceof ReduceSinkOperator,
- "AssertionError: expected operator to be an instance of ReduceSinkOperator, but was " +
- operator.getClass().getName());
- Preconditions.checkArgument(followingWork instanceof ReduceWork,
- "AssertionError: expected followingWork to be an instance of ReduceWork, but was " +
- followingWork.getClass().getName());
- ReduceSinkOperator rs = (ReduceSinkOperator) operator;
- ReduceWork rWork = (ReduceWork) followingWork;
- GenMapRedUtils.setKeyAndValueDesc(rWork, rs);
-
- // remember which parent belongs to which tag
- rWork.getTagToInput().put(rs.getConf().getTag(), work.getName());
-
- // remember the output name of the reduce sink
- rs.getConf().setOutputName(rWork.getName());
-
- if (!context.connectedReduceSinks.contains(rs)) {
- // add dependency between the two work items
- SparkEdgeProperty edgeProp = GenSparkUtils.getEdgeProperty(rs, rWork);
- sparkWork.connect(work, rWork, edgeProp);
- context.connectedReduceSinks.add(rs);
+ if (context.leafOpToFollowingWorkInfo.containsKey(operator)) {
+ ObjectPair<SparkEdgeProperty, ReduceWork> childWorkInfo = context.leafOpToFollowingWorkInfo.get(operator);
+ SparkEdgeProperty edgeProp = childWorkInfo.getFirst();
+ ReduceWork childWork = childWorkInfo.getSecond();
+
+ LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + childWork);
+
+ // We may have already connected `work` with `childWork`, in case, for example, lateral view:
+ // TS
+ // |
+ // ...
+ // |
+ // LVF
+ // | \
+ // SEL SEL
+ // | |
+ // LVJ-UDTF
+ // |
+ // SEL
+ // |
+ // RS
+ // Here, RS can be reached from TS via two different paths. If there is any child work after RS,
+ // we don't want to connect them with the work associated with TS more than once.
+ if (sparkWork.getEdgeProperty(work, childWork) == null) {
+ sparkWork.connect(work, childWork, edgeProp);
+ } else {
+ LOG.debug("work " + work.getName() + " is already connected to " + childWork.getName() + " before");
}
} else {
- LOG.debug("First pass. Leaf operator: "+operator);
+ LOG.debug("First pass. Leaf operator: " + operator);
}
// No children means we're at the bottom. If there are more operators to scan
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join27.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join27.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join27.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join27.q.out Wed Dec 17 02:54:52 2014
@@ -30,10 +30,9 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1)
- Reducer 6 <- Map 5 (GROUP, 1)
- Reducer 4 <- Reducer 3 (GROUP, 1)
- Union 2 <- Map 1 (NONE, 0), Reducer 6 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1)
+ Reducer 5 <- Map 4 (GROUP, 1)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -49,7 +48,7 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: src
@@ -67,7 +66,7 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: src
@@ -84,7 +83,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
@@ -102,7 +101,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Reducer 4
+ Reducer 3
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -120,7 +119,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
+ Reducer 5
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
@@ -133,8 +132,6 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out Wed Dec 17 02:54:52 2014
@@ -72,9 +72,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 3), Union 2 (PARTITION-LEVEL SORT, 3)
- Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0)
- Reducer 4 <- Reducer 3 (GROUP, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Reducer 2 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -93,7 +92,7 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: a
@@ -109,7 +108,7 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: a
@@ -126,7 +125,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
@@ -144,7 +143,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Reducer 4
+ Reducer 3
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -162,8 +161,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/column_access_stats.q.out Wed Dec 17 02:54:52 2014
@@ -185,8 +185,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -205,7 +203,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: t1
@@ -221,8 +219,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator
@@ -267,8 +263,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -287,7 +281,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: t1
@@ -303,8 +297,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out Wed Dec 17 02:54:52 2014
@@ -1948,8 +1948,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2051,7 +2049,7 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: t1
@@ -2150,8 +2148,6 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
@@ -2313,8 +2309,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (GROUP, 3)
- Union 2 <- Map 1 (NONE, 0), Reducer 4 (NONE, 0)
+ Reducer 3 <- Map 2 (GROUP, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2419,7 +2414,7 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: t1
@@ -2496,7 +2491,7 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Reducer 4
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -2537,8 +2532,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out Wed Dec 17 02:54:52 2014
@@ -2020,8 +2020,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2123,7 +2121,7 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: t1
@@ -2222,8 +2220,6 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
@@ -2385,9 +2381,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 3)
- Reducer 5 <- Reducer 4 (GROUP, 3)
- Union 2 <- Map 1 (NONE, 0), Reducer 5 (NONE, 0)
+ Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 3)
+ Reducer 4 <- Reducer 3 (GROUP, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2492,7 +2487,7 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: t1
@@ -2569,7 +2564,7 @@ STAGE PLANS:
name: default.t1
Truncated Path -> Alias:
/t1 [t1]
- Reducer 4
+ Reducer 3
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -2586,7 +2581,7 @@ STAGE PLANS:
tag: -1
value expressions: _col1 (type: bigint)
auto parallelism: false
- Reducer 5
+ Reducer 4
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -2627,8 +2622,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join34.q.out Wed Dec 17 02:54:52 2014
@@ -147,8 +147,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1)
- Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -218,7 +217,7 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
/src [x]
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: x1
@@ -285,7 +284,7 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
/src [x1]
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: x
@@ -352,7 +351,7 @@ STAGE PLANS:
name: default.src1
Truncated Path -> Alias:
/src1 [x]
- Reducer 3
+ Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Join Operator
@@ -393,8 +392,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join35.q.out Wed Dec 17 02:54:52 2014
@@ -156,9 +156,8 @@ STAGE PLANS:
Spark
Edges:
Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1)
- Reducer 6 <- Map 5 (GROUP, 1)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0)
+ Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1)
+ Reducer 5 <- Map 4 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -234,7 +233,7 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
/src [x]
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: x1
@@ -307,7 +306,7 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
/src [x1]
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: x
@@ -389,7 +388,7 @@ STAGE PLANS:
tag: 0
value expressions: _col1 (type: bigint)
auto parallelism: false
- Reducer 4
+ Reducer 3
Needs Tagging: true
Reduce Operator Tree:
Join Operator
@@ -430,7 +429,7 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
- Reducer 6
+ Reducer 5
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -445,8 +444,6 @@ STAGE PLANS:
tag: 0
value expressions: _col1 (type: bigint)
auto parallelism: false
- Union 3
- Vertex: Union 3
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out Wed Dec 17 02:54:52 2014
@@ -64,8 +64,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -84,7 +82,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part13
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -100,8 +98,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part13
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out Wed Dec 17 02:54:52 2014
@@ -63,9 +63,8 @@ STAGE PLANS:
Spark
Edges:
Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 5 <- Map 4 (GROUP, 1)
- Reducer 7 <- Map 6 (GROUP, 1)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0), Reducer 7 (NONE, 0)
+ Reducer 4 <- Map 3 (GROUP, 1)
+ Reducer 6 <- Map 5 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -84,7 +83,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string), _col1 (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: src
@@ -100,7 +99,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string), _col1 (type: string)
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: src
@@ -130,7 +129,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part14
- Reducer 5
+ Reducer 4
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
@@ -144,7 +143,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part14
- Reducer 7
+ Reducer 6
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), VALUE._col1 (type: string)
@@ -158,8 +157,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.nzhang_part14
- Union 3
- Vertex: Union 3
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert.q.out Wed Dec 17 02:54:52 2014
@@ -1170,8 +1170,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1199,7 +1197,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1224,8 +1222,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
@@ -1345,8 +1341,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1374,7 +1368,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1399,8 +1393,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
@@ -1520,8 +1512,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1549,7 +1539,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1574,8 +1564,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
@@ -1695,8 +1683,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1724,7 +1710,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1749,8 +1735,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out Wed Dec 17 02:54:52 2014
@@ -1203,8 +1203,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1232,7 +1230,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1257,8 +1255,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-3
Dependency Collection
@@ -1382,8 +1378,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1411,7 +1405,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1436,8 +1430,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-3
Dependency Collection
@@ -1561,8 +1553,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1590,7 +1580,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1615,8 +1605,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-3
Dependency Collection
@@ -1740,8 +1728,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-2
Spark
- Edges:
- Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1769,7 +1755,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1794,8 +1780,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_multi2
- Union 2
- Vertex: Union 2
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_join_union.q.out Wed Dec 17 02:54:52 2014
@@ -62,7 +62,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: b
@@ -83,8 +83,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Union 2 (PARTITION-LEVEL SORT, 3)
- Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -101,7 +100,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: string)
value expressions: _col0 (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: src14
@@ -115,7 +114,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: string)
value expressions: _col0 (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: a
@@ -134,7 +133,7 @@ STAGE PLANS:
1 key (type: string)
outputColumnNames: _col0, _col1, _col5, _col6
input vertices:
- 1 Map 6
+ 1 Map 5
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col6 (type: string)
@@ -144,7 +143,7 @@ STAGE PLANS:
value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
Local Work:
Map Reduce Local Work
- Reducer 3
+ Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
@@ -165,8 +164,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out Wed Dec 17 02:54:52 2014
@@ -650,8 +650,7 @@ STAGE PLANS:
Spark
Edges:
Reducer 2 <- Map 1 (GROUP, 1)
- Reducer 5 <- Map 4 (GROUP, 1)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 5 (NONE, 0)
+ Reducer 4 <- Map 3 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -724,7 +723,7 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
-mr-10003default.src{} [src]
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: srcpart
@@ -967,7 +966,7 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Reducer 5
+ Reducer 4
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
@@ -997,8 +996,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Union 3
- Vertex: Union 3
Stage: Stage-0
Fetch Operator
@@ -1527,8 +1524,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3)
- Union 2 <- Map 1 (NONE, 0), Reducer 4 (NONE, 0)
+ Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1614,7 +1610,7 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
/src [src]
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: src
@@ -1680,8 +1676,8 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
-mr-10003default.src{} [src]
- Map 5
- Reducer 4
+ Map 4
+ Reducer 3
Needs Tagging: true
Reduce Operator Tree:
Join Operator
@@ -1714,8 +1710,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator
@@ -1803,9 +1797,9 @@ STAGE PLANS:
value expressions: key (type: string)
auto parallelism: false
Path -> Alias:
- -mr-10004default.src{} [s1]
+ -mr-10003default.src{} [s1]
Path -> Partition:
- -mr-10004default.src{}
+ -mr-10003default.src{}
Partition
base file name: src
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
@@ -1850,7 +1844,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- -mr-10004default.src{} [s1]
+ -mr-10003default.src{} [s1]
Map 3
Map Operator Tree:
TableScan
@@ -1870,9 +1864,9 @@ STAGE PLANS:
value expressions: key (type: string)
auto parallelism: false
Path -> Alias:
- -mr-10003default.src{} [s2]
+ -mr-10004default.src{} [s2]
Path -> Partition:
- -mr-10003default.src{}
+ -mr-10004default.src{}
Partition
base file name: src
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
@@ -1917,7 +1911,7 @@ STAGE PLANS:
name: default.src
name: default.src
Truncated Path -> Alias:
- -mr-10003default.src{} [s2]
+ -mr-10004default.src{} [s2]
Reducer 2
Needs Tagging: true
Reduce Operator Tree:
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out Wed Dec 17 02:54:52 2014
@@ -68,9 +68,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
- Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -87,7 +86,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -101,7 +100,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -115,7 +114,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: a
@@ -147,7 +146,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
+ Reducer 5
Reduce Operator Tree:
Join Operator
condition map:
@@ -165,8 +164,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 3
- Vertex: Union 3
Stage: Stage-0
Fetch Operator
@@ -210,9 +207,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -229,7 +225,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -243,7 +239,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -257,7 +253,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: a
@@ -289,7 +285,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
+ Reducer 5
Reduce Operator Tree:
Join Operator
condition map:
@@ -307,8 +303,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 3
- Vertex: Union 3
Stage: Stage-0
Fetch Operator
@@ -360,9 +354,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -379,7 +372,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -393,7 +386,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -407,7 +400,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: a
@@ -440,7 +433,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Reducer 6
+ Reducer 5
Reduce Operator Tree:
Join Operator
condition map:
@@ -459,8 +452,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Union 3
- Vertex: Union 3
Stage: Stage-0
Move Operator
@@ -520,9 +511,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
- Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -539,7 +529,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -553,7 +543,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -567,7 +557,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: a
@@ -600,7 +590,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Reducer 6
+ Reducer 5
Reduce Operator Tree:
Join Operator
condition map:
@@ -619,8 +609,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
- Union 3
- Vertex: Union 3
Stage: Stage-0
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out?rev=1646142&r1=1646141&r2=1646142&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out Wed Dec 17 02:54:52 2014
@@ -80,9 +80,8 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1)
- Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1)
- Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -99,7 +98,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 4
+ Map 3
Map Operator Tree:
TableScan
alias: b
@@ -113,7 +112,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: c
@@ -127,7 +126,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: b
@@ -141,7 +140,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 8
+ Map 7
Map Operator Tree:
TableScan
alias: c
@@ -155,7 +154,7 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: val (type: string)
- Map 9
+ Map 8
Map Operator Tree:
TableScan
alias: a
@@ -189,7 +188,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
+ Reducer 6
Reduce Operator Tree:
Join Operator
condition map:
@@ -209,8 +208,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Union 3
- Vertex: Union 3
Stage: Stage-0
Fetch Operator