You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/11/12 19:07:22 UTC
svn commit: r1638907 [1/3] - in /hive/branches/spark/ql/src:
java/org/apache/hadoop/hive/ql/exec/spark/
java/org/apache/hadoop/hive/ql/optimizer/spark/
java/org/apache/hadoop/hive/ql/parse/spark/
java/org/apache/hadoop/hive/ql/plan/ test/results/client...
Author: xuefu
Date: Wed Nov 12 18:07:16 2014
New Revision: 1638907
URL: http://svn.apache.org/r1638907
Log:
HIVE-8793: Refactor to make splitting SparkWork a physical resolver [Spark Branch] (Rui via Xuefu)
Added:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java
Modified:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby9.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/input1_limit.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/insert_into3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java Wed Nov 12 18:07:16 2014
@@ -18,23 +18,15 @@
package org.apache.hadoop.hive.ql.exec.spark;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
import com.google.common.base.Preconditions;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat;
import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
@@ -70,7 +62,7 @@ public class SparkPlanGenerator {
private Context context;
private Path scratchDir;
private SparkReporter sparkReporter;
- private final Map<BaseWork, BaseWork> cloneToWork;
+ private Map<BaseWork, BaseWork> cloneToWork;
private final Map<BaseWork, SparkTran> workToTranMap;
private final Map<BaseWork, SparkTran> workToParentWorkTranMap;
@@ -85,7 +77,6 @@ public class SparkPlanGenerator {
this.context = context;
this.jobConf = jobConf;
this.scratchDir = scratchDir;
- this.cloneToWork = new HashMap<BaseWork, BaseWork>();
this.workToTranMap = new HashMap<BaseWork, SparkTran>();
this.workToParentWorkTranMap = new HashMap<BaseWork, SparkTran>();
this.sparkReporter = sparkReporter;
@@ -93,12 +84,10 @@ public class SparkPlanGenerator {
public SparkPlan generate(SparkWork sparkWork) throws Exception {
SparkPlan sparkPlan = new SparkPlan();
- cloneToWork.clear();
+ cloneToWork = sparkWork.getCloneToWork();
workToTranMap.clear();
workToParentWorkTranMap.clear();
- splitSparkWork(sparkWork);
-
for (BaseWork work : sparkWork.getAllWork()) {
SparkTran tran;
if (work instanceof MapWork) {
@@ -159,105 +148,6 @@ public class SparkPlanGenerator {
return result;
}
-
- private void splitSparkWork(SparkWork sparkWork) {
- // do a BFS on the sparkWork graph, and look for any work that has more than one child.
- // If we found such a work, we split it into multiple ones, one for each of its child.
- Queue<BaseWork> queue = new LinkedList<BaseWork>();
- Set<BaseWork> visited = new HashSet<BaseWork>();
- queue.addAll(sparkWork.getRoots());
- while (!queue.isEmpty()) {
- BaseWork work = queue.poll();
- if (!visited.add(work)) {
- continue;
- }
-
- List<BaseWork> childWorks = sparkWork.getChildren(work);
- // First, add all children of this work into queue, to be processed later.
- for (BaseWork w : childWorks) {
- queue.add(w);
- }
-
- // Second, check if this work has multiple reduceSinks. If so, do split.
- splitBaseWork(sparkWork, work, childWorks);
- }
- }
-
- private Set<Operator<?>> getAllReduceSinks(BaseWork work) {
- Set<Operator<?>> resultSet = work.getAllLeafOperators();
- Iterator<Operator<?>> it = resultSet.iterator();
- while (it.hasNext()) {
- if (!(it.next() instanceof ReduceSinkOperator)) {
- it.remove();
- }
- }
- return resultSet;
- }
-
- // Split work into multiple branches, one for each childWork in childWorks.
- // It also set up the connection between each parent work and child work.
- private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List<BaseWork> childWorks) {
- if (getAllReduceSinks(parentWork).size() <= 1) {
- return;
- }
-
- // Grand-parent works - we need to set these to be the parents of the cloned works.
- List<BaseWork> grandParentWorks = sparkWork.getParents(parentWork);
- boolean isFirst = true;
-
- for (BaseWork childWork : childWorks) {
- BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork);
- String childReducerName = childWork.getName();
- SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork);
-
- // We need to remove those branches that
- // 1, ended with a ReduceSinkOperator, and
- // 2, the ReduceSinkOperator's name is not the same as childReducerName.
- // Also, if the cloned work is not the first, we remove ALL leaf operators except
- // the corresponding ReduceSinkOperator.
- for (Operator<?> op : clonedParentWork.getAllLeafOperators()) {
- if (op instanceof ReduceSinkOperator) {
- if (!((ReduceSinkOperator)op).getConf().getOutputName().equals(childReducerName)) {
- removeOpRecursive(op);
- }
- } else if (!isFirst) {
- removeOpRecursive(op);
- }
- }
-
- isFirst = false;
-
- // Then, we need to set up the graph connection. Especially:
- // 1, we need to connect this cloned parent work with all the grand-parent works.
- // 2, we need to connect this cloned parent work with the corresponding child work.
- sparkWork.add(clonedParentWork);
- for (BaseWork gpw : grandParentWorks) {
- sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork));
- }
- sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty);
- cloneToWork.put(clonedParentWork, parentWork);
- }
-
- sparkWork.remove(parentWork);
- }
-
- // Remove op from all its parents' child list.
- // Recursively remove any of its parent who only have this op as child.
- private void removeOpRecursive(Operator<?> operator) {
- List<Operator<?>> parentOperators = new ArrayList<Operator<?>>();
- for (Operator<?> op : operator.getParentOperators()) {
- parentOperators.add(op);
- }
- for (Operator<?> parentOperator : parentOperators) {
- Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator),
- "AssertionError: parent of " + operator.getName() + " doesn't have it as child.");
- parentOperator.removeChild(operator);
- if (parentOperator.getNumChild() == 0) {
- removeOpRecursive(parentOperator);
- }
- }
- }
-
private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveException {
// MergeFileWork is sub-class of MapWork, we don't need to distinguish here
if (mWork.getInputformat() != null) {
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java?rev=1638907&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java Wed Nov 12 18:07:16 2014
@@ -0,0 +1,185 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.spark;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalPlanResolver;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.spark.GenSparkUtils;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+
+import java.io.Serializable;
+import java.util.*;
+
+/**
+ * Do a BFS on the sparkWork graph, and look for any work that has more than one child.
+ * If we found such a work, we split it into multiple ones, one for each of its child.
+ */
+public class SplitSparkWorkResolver implements PhysicalPlanResolver {
+ @Override
+ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
+ for (Task<? extends Serializable> task : pctx.getRootTasks()) {
+ if (task instanceof SparkTask) {
+ splitSparkWork(((SparkTask) task).getWork());
+ }
+ }
+ return pctx;
+ }
+
+ private void splitSparkWork(SparkWork sparkWork) {
+ Queue<BaseWork> queue = new LinkedList<BaseWork>();
+ Set<BaseWork> visited = new HashSet<BaseWork>();
+ queue.addAll(sparkWork.getRoots());
+ while (!queue.isEmpty()) {
+ BaseWork work = queue.poll();
+ if (!visited.add(work)) {
+ continue;
+ }
+
+ List<BaseWork> childWorks = sparkWork.getChildren(work);
+ // First, add all children of this work into queue, to be processed later.
+ for (BaseWork w : childWorks) {
+ queue.add(w);
+ }
+
+ // Second, check if this work has multiple reduceSinks. If so, do split.
+ splitBaseWork(sparkWork, work, childWorks);
+ }
+ }
+
+ // Split work into multiple branches, one for each childWork in childWorks.
+ // It also set up the connection between each parent work and child work.
+ private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List<BaseWork> childWorks) {
+ if (getAllReduceSinks(parentWork).size() <= 1) {
+ return;
+ }
+
+ // Grand-parent works - we need to set these to be the parents of the cloned works.
+ List<BaseWork> grandParentWorks = sparkWork.getParents(parentWork);
+ boolean isFirst = true;
+
+ for (BaseWork childWork : childWorks) {
+ BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork);
+ // give the cloned work a different name
+ clonedParentWork.setName(clonedParentWork.getName().replaceAll("^([a-zA-Z]+)(\\s+)(\\d+)",
+ "$1$2" + GenSparkUtils.getUtils().getNextSeqNumber()));
+ setStatistics(parentWork, clonedParentWork);
+ String childReducerName = childWork.getName();
+ SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork);
+
+ // We need to remove those branches that
+ // 1, ended with a ReduceSinkOperator, and
+ // 2, the ReduceSinkOperator's name is not the same as childReducerName.
+ // Also, if the cloned work is not the first, we remove ALL leaf operators except
+ // the corresponding ReduceSinkOperator.
+ for (Operator<?> op : clonedParentWork.getAllLeafOperators()) {
+ if (op instanceof ReduceSinkOperator) {
+ if (!((ReduceSinkOperator) op).getConf().getOutputName().equals(childReducerName)) {
+ removeOpRecursive(op);
+ }
+ } else if (!isFirst) {
+ removeOpRecursive(op);
+ }
+ }
+
+ isFirst = false;
+
+ // Then, we need to set up the graph connection. Especially:
+ // 1, we need to connect this cloned parent work with all the grand-parent works.
+ // 2, we need to connect this cloned parent work with the corresponding child work.
+ sparkWork.add(clonedParentWork);
+ for (BaseWork gpw : grandParentWorks) {
+ sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork));
+ }
+ sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty);
+ sparkWork.getCloneToWork().put(clonedParentWork, parentWork);
+ }
+
+ sparkWork.remove(parentWork);
+ }
+
+ private Set<Operator<?>> getAllReduceSinks(BaseWork work) {
+ Set<Operator<?>> resultSet = work.getAllLeafOperators();
+ Iterator<Operator<?>> it = resultSet.iterator();
+ while (it.hasNext()) {
+ if (!(it.next() instanceof ReduceSinkOperator)) {
+ it.remove();
+ }
+ }
+ return resultSet;
+ }
+
+ // Remove op from all its parents' child list.
+ // Recursively remove any of its parent who only have this op as child.
+ private void removeOpRecursive(Operator<?> operator) {
+ List<Operator<?>> parentOperators = new ArrayList<Operator<?>>();
+ for (Operator<?> op : operator.getParentOperators()) {
+ parentOperators.add(op);
+ }
+ for (Operator<?> parentOperator : parentOperators) {
+ Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator),
+ "AssertionError: parent of " + operator.getName() + " doesn't have it as child.");
+ parentOperator.removeChild(operator);
+ if (parentOperator.getNumChild() == 0) {
+ removeOpRecursive(parentOperator);
+ }
+ }
+ }
+
+ // we lost statistics & opTraits through cloning, try to get them back
+ // TODO: make sure this method is sufficient to solve the problem
+ private void setStatistics(BaseWork origin, BaseWork clone) {
+ if (origin instanceof MapWork && clone instanceof MapWork) {
+ MapWork originMW = (MapWork) origin;
+ MapWork cloneMW = (MapWork) clone;
+ for (Map.Entry<String, Operator<? extends OperatorDesc>> entry :
+ originMW.getAliasToWork().entrySet()) {
+ String alias = entry.getKey();
+ Operator<? extends OperatorDesc> cloneOP = cloneMW.getAliasToWork().get(alias);
+ if (cloneOP != null) {
+ setStatistics(entry.getValue(), cloneOP);
+ }
+ }
+ } else if (origin instanceof ReduceWork && clone instanceof ReduceWork) {
+ setStatistics(((ReduceWork) origin).getReducer(), ((ReduceWork) clone).getReducer());
+ }
+ }
+
+ private void setStatistics(Operator<? extends OperatorDesc> origin,
+ Operator<? extends OperatorDesc> clone) {
+ clone.getConf().setStatistics(origin.getConf().getStatistics());
+ clone.getConf().setOpTraits(origin.getConf().getOpTraits());
+ if (origin.getChildOperators().size() == clone.getChildOperators().size()) {
+ for (int i = 0; i < clone.getChildOperators().size(); i++) {
+ setStatistics(origin.getChildOperators().get(i), clone.getChildOperators().get(i));
+ }
+ }
+ }
+}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Wed Nov 12 18:07:16 2014
@@ -410,4 +410,8 @@ public class GenSparkUtils {
}
return null;
}
+
+ public synchronized int getNextSeqNumber() {
+ return ++sequenceNumber;
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Wed Nov 12 18:07:16 2014
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimiz
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism;
import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory;
+import org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver;
import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -262,6 +263,8 @@ public class SparkCompiler extends TaskC
PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks,
pCtx.getFetchTask());
+ physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
+
if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
} else {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java Wed Nov 12 18:07:16 2014
@@ -49,15 +49,20 @@ public class SparkWork extends AbstractO
private final Set<BaseWork> roots = new HashSet<BaseWork>();
private final Set<BaseWork> leaves = new HashSet<BaseWork>();
- protected final Map<BaseWork, List<BaseWork>> workGraph = new HashMap<BaseWork, List<BaseWork>>();
- protected final Map<BaseWork, List<BaseWork>> invertedWorkGraph = new HashMap<BaseWork, List<BaseWork>>();
+ protected final Map<BaseWork, List<BaseWork>> workGraph =
+ new HashMap<BaseWork, List<BaseWork>>();
+ protected final Map<BaseWork, List<BaseWork>> invertedWorkGraph =
+ new HashMap<BaseWork, List<BaseWork>>();
protected final Map<Pair<BaseWork, BaseWork>, SparkEdgeProperty> edgeProperties =
new HashMap<Pair<BaseWork, BaseWork>, SparkEdgeProperty>();
private Map<String, List<String>> requiredCounterPrefix;
+ private final Map<BaseWork, BaseWork> cloneToWork;
+
public SparkWork(String name) {
this.name = name + ":" + (++counter);
+ cloneToWork = new HashMap<BaseWork, BaseWork>();
}
@@ -305,20 +310,25 @@ public class SparkWork extends AbstractO
@Explain(displayName = "Edges")
public Map<String, List<Dependency>> getDependencyMap() {
Map<String, List<Dependency>> result = new LinkedHashMap<String, List<Dependency>>();
- for (Map.Entry<BaseWork, List<BaseWork>> entry: invertedWorkGraph.entrySet()) {
- List<Dependency> dependencies = new LinkedList<Dependency>();
- for (BaseWork d: entry.getValue()) {
- Dependency dependency = new Dependency();
- dependency.w = d;
- dependency.prop = getEdgeProperty(d, entry.getKey());
- dependencies.add(dependency);
- }
- if (!dependencies.isEmpty()) {
- Collections.sort(dependencies);
- result.put(entry.getKey().getName(), dependencies);
+ for (BaseWork baseWork : getAllWork()) {
+ if (invertedWorkGraph.get(baseWork) != null && invertedWorkGraph.get(baseWork).size() > 0) {
+ List<Dependency> dependencies = new LinkedList<Dependency>();
+ for (BaseWork d : invertedWorkGraph.get(baseWork)) {
+ Dependency dependency = new Dependency();
+ dependency.w = d;
+ dependency.prop = getEdgeProperty(d, baseWork);
+ dependencies.add(dependency);
+ }
+ if (!dependencies.isEmpty()) {
+ Collections.sort(dependencies);
+ result.put(baseWork.getName(), dependencies);
+ }
}
}
return result;
}
+ public Map<BaseWork, BaseWork> getCloneToWork() {
+ return cloneToWork;
+ }
}
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby10.q.out Wed Nov 12 18:07:16 2014
@@ -56,9 +56,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 1)
+ Reducer 6 <- Map 1 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -72,34 +73,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: int)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Group By Operator
- aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: double)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -140,6 +113,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double)
Stage: Stage-3
Dependency Collection
@@ -268,9 +273,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 1)
+ Reducer 6 <- Map 1 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -284,34 +290,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: int)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Group By Operator
- aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: double)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -352,6 +330,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double)
Stage: Stage-3
Dependency Collection
@@ -482,9 +492,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 1)
+ Reducer 6 <- Map 1 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -498,34 +509,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: int)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: bigint)
- Group By Operator
- aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:string>)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -566,6 +549,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,input:string>)
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby11.q.out Wed Nov 12 18:07:16 2014
@@ -44,9 +44,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 1)
+ Reducer 6 <- Map 1 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -60,34 +61,6 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: value (type: string), substr(value, 5) (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Group By Operator
- aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
- keys: VALUE._col1 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -128,6 +101,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(KEY._col0), count(DISTINCT KEY._col0)
+ keys: VALUE._col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map.q.out Wed Nov 12 18:07:16 2014
@@ -40,11 +40,11 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 31)
- Reducer 3 <- Map 1 (GROUP, 31)
+ Reducer 2 <- Map 4 (GROUP, 31)
+ Reducer 3 <- Map 5 (GROUP, 31)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 4
Map Operator Tree:
TableScan
alias: src
@@ -65,6 +65,11 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: double)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out Wed Nov 12 18:07:16 2014
@@ -40,13 +40,13 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+ Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31)
Reducer 3 <- Reducer 2 (GROUP, 31)
- Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31)
+ Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 31)
Reducer 5 <- Reducer 4 (GROUP, 31)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 6
Map Operator Tree:
TableScan
alias: src
@@ -67,6 +67,11 @@ STAGE PLANS:
Map-reduce partition columns: rand() (type: double)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: double)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out Wed Nov 12 18:07:16 2014
@@ -40,11 +40,11 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (GROUP, 31)
- Reducer 3 <- Map 1 (GROUP, 31)
+ Reducer 2 <- Map 4 (GROUP, 31)
+ Reducer 3 <- Map 5 (GROUP, 31)
#### A masked pattern was here ####
Vertices:
- Map 1
+ Map 4
Map Operator Tree:
TableScan
alias: src
@@ -59,6 +59,11 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: substr(value, 5) (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: key, value
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 31)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 31)
+ Reducer 6 <- Map 1 (SORT, 31)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -60,44 +61,6 @@ STAGE PLANS:
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: substr(value, 5) (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: double)
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: double)
Reducer 3
Reduce Operator Tree:
Select Operator
@@ -140,6 +103,48 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: double)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: double)
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 1)
+ Reducer 6 <- Map 1 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -56,34 +57,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-3
Dependency Collection
@@ -828,9 +833,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 1)
- Reducer 3 <- Reducer 2 (GROUP, 1)
- Reducer 4 <- Reducer 2 (GROUP, 1)
+ Reducer 3 <- Reducer 5 (GROUP, 1)
+ Reducer 4 <- Reducer 6 (GROUP, 1)
+ Reducer 5 <- Map 1 (SORT, 1)
+ Reducer 6 <- Map 1 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -844,34 +850,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -912,6 +890,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 31)
- Reducer 3 <- Reducer 2 (GROUP, 31)
- Reducer 4 <- Reducer 2 (GROUP, 31)
+ Reducer 3 <- Reducer 5 (GROUP, 31)
+ Reducer 4 <- Reducer 6 (GROUP, 31)
+ Reducer 5 <- Map 1 (SORT, 31)
+ Reducer 6 <- Map 1 (SORT, 31)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -56,34 +57,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 31)
- Reducer 3 <- Reducer 2 (GROUP, 31)
- Reducer 4 <- Reducer 2 (GROUP, 31)
+ Reducer 3 <- Reducer 5 (GROUP, 31)
+ Reducer 4 <- Reducer 6 (GROUP, 31)
+ Reducer 5 <- Map 1 (SORT, 31)
+ Reducer 6 <- Map 1 (SORT, 31)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -56,34 +57,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-3
Dependency Collection
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out?rev=1638907&r1=1638906&r2=1638907&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out Wed Nov 12 18:07:16 2014
@@ -40,9 +40,10 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 2 <- Map 1 (SORT, 31)
- Reducer 3 <- Reducer 2 (GROUP, 31)
- Reducer 4 <- Reducer 2 (GROUP, 31)
+ Reducer 3 <- Reducer 5 (GROUP, 31)
+ Reducer 4 <- Reducer 6 (GROUP, 31)
+ Reducer 5 <- Map 1 (SORT, 31)
+ Reducer 6 <- Map 1 (SORT, 31)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -56,34 +57,6 @@ STAGE PLANS:
Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string)
- Reducer 2
- Reduce Operator Tree:
- Forward
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Group By Operator
- aggregations: count(DISTINCT KEY._col0)
- keys: VALUE._col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
Reducer 3
Reduce Operator Tree:
Group By Operator
@@ -124,6 +97,38 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Reducer 5
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 6
+ Reduce Operator Tree:
+ Forward
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0)
+ keys: VALUE._col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Stage: Stage-3
Dependency Collection