You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/11/10 17:15:57 UTC
svn commit: r1540485 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/
ql/src/java/org/apa...
Author: hashutosh
Date: Sun Nov 10 16:15:56 2013
New Revision: 1540485
URL: http://svn.apache.org/r1540485
Log:
HIVE-4880 : Rearrange explain order of stages simpler (Navis via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java
hive/trunk/ql/src/test/queries/clientpositive/explain_rearrange.q
hive/trunk/ql/src/test/results/clientpositive/explain_rearrange.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/ObjectPair.java
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/common/ObjectPair.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/ObjectPair.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/ObjectPair.java Sun Nov 10 16:15:56 2013
@@ -74,4 +74,8 @@ public class ObjectPair<F, S> {
return this.getFirst().equals(that.getFirst()) &&
this.getSecond().equals(that.getSecond());
}
+
+ public String toString() {
+ return first + ":" + second;
+ }
}
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sun Nov 10 16:15:56 2013
@@ -834,6 +834,10 @@ public class HiveConf extends Configurat
HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false),
HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true),
+
+ // none, idonly, traverse, execution
+ HIVESTAGEIDREARRANGE("hive.stageid.rearrange", "none"),
+ HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES("hive.explain.dependency.append.tasktype", false),
;
public final String varname;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java Sun Nov 10 16:15:56 2013
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.ExplainWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -59,7 +60,7 @@ import org.json.JSONObject;
public class ExplainTask extends Task<ExplainWork> implements Serializable {
private static final long serialVersionUID = 1L;
public static final String EXPL_COLUMN_NAME = "Explain";
- private Set<Operator<? extends OperatorDesc>> visitedOps = new HashSet<Operator<?>>();
+ private Set<Operator<?>> visitedOps = new HashSet<Operator<?>>();
private boolean isLogical = false;
public ExplainTask() {
@@ -158,9 +159,16 @@ public class ExplainTask extends Task<Ex
outJSONObject.put("ABSTRACT SYNTAX TREE", jsonAST);
}
}
+ List<Task<?>> tasks = work.getRootTasks();
- JSONObject jsonDependencies = outputDependencies(out, jsonOutput,
- work.getRootTasks(), 0);
+ List<Task> ordered = StageIDsRearranger.getExplainOrder(conf, tasks);
+ Task<? extends Serializable> fetchTask = work.getFetchTask();
+ if (fetchTask != null) {
+ fetchTask.setRootTask(true); // todo HIVE-3925
+ ordered.add(fetchTask);
+ }
+
+ JSONObject jsonDependencies = outputDependencies(out, work, ordered);
if (out != null) {
out.println();
@@ -171,7 +179,7 @@ public class ExplainTask extends Task<Ex
}
// Go over all the tasks and dump out the plans
- JSONObject jsonPlan = outputStagePlans(out, work, work.getRootTasks(), 0);
+ JSONObject jsonPlan = outputStagePlans(out, work, ordered);
if (jsonOutput) {
outJSONObject.put("STAGE PLANS", jsonPlan);
@@ -558,13 +566,7 @@ public class ExplainTask extends Task<Ex
private JSONObject outputPlan(Task<? extends Serializable> task,
PrintStream out, JSONObject parentJSON, boolean extended,
- boolean jsonOutput, HashSet<Task<? extends Serializable>> displayedSet,
- int indent) throws Exception {
-
- if (displayedSet.contains(task)) {
- return null;
- }
- displayedSet.add(task);
+ boolean jsonOutput, int indent) throws Exception {
if (out != null) {
out.print(indentString(indent));
@@ -583,32 +585,13 @@ public class ExplainTask extends Task<Ex
if (jsonOutput) {
parentJSON.put(task.getId(), jsonOutputPlan);
}
-
- if (task instanceof ConditionalTask
- && ((ConditionalTask) task).getListTasks() != null) {
- for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) {
- outputPlan(con, out, parentJSON, extended, jsonOutput, displayedSet,
- jsonOutput ? 0 : indent);
- }
- }
- if (task.getChildTasks() != null) {
- for (Task<? extends Serializable> child : task.getChildTasks()) {
- outputPlan(child, out, parentJSON, extended, jsonOutput, displayedSet,
- jsonOutput ? 0 : indent);
- }
- }
return null;
}
private JSONObject outputDependencies(Task<? extends Serializable> task,
- Set<Task<? extends Serializable>> dependeciesTaskSet, PrintStream out,
- JSONObject parentJson, boolean jsonOutput, int indent,
- boolean rootTskCandidate) throws Exception {
+ PrintStream out, JSONObject parentJson, boolean jsonOutput, boolean taskType, int indent)
+ throws Exception {
- if (dependeciesTaskSet.contains(task)) {
- return null;
- }
- dependeciesTaskSet.add(task);
boolean first = true;
JSONObject json = jsonOutput ? new JSONObject() : null;
if (out != null) {
@@ -617,7 +600,7 @@ public class ExplainTask extends Task<Ex
}
if ((task.getParentTasks() == null || task.getParentTasks().isEmpty())) {
- if (rootTskCandidate) {
+ if (task.isRootTask()) {
if (out != null) {
out.print(" is a root stage");
}
@@ -678,30 +661,17 @@ public class ExplainTask extends Task<Ex
json.put("CONDITIONAL CHILD TASKS", s.toString());
}
}
-
- if (out != null) {
- out.println();
- }
-
- if (task instanceof ConditionalTask
- && ((ConditionalTask) task).getListTasks() != null) {
- for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) {
- JSONObject jsonOut = outputDependencies(con, dependeciesTaskSet, out,
- parentJson, jsonOutput, jsonOutput ? 0 : indent, false);
- if (jsonOutput && (jsonOut != null)) {
- parentJson.put(con.getId(), jsonOut);
- }
+ if (taskType) {
+ if (out != null) {
+ out.printf(" [%s]", task.getType());
+ }
+ if (jsonOutput) {
+ json.put("TASK TYPE", task.getType().name());
}
}
- if (task.getChildTasks() != null) {
- for (Task<? extends Serializable> child : task.getChildTasks()) {
- JSONObject jsonOut = outputDependencies(child, dependeciesTaskSet, out,
- parentJson, jsonOutput, jsonOutput ? 0 : indent, true);
- if (jsonOutput && (jsonOut != null)) {
- parentJson.put(child.getId(), jsonOut);
- }
- }
+ if (out != null) {
+ out.println();
}
return jsonOutput ? json : null;
}
@@ -718,44 +688,35 @@ public class ExplainTask extends Task<Ex
return jsonOutput ? treeString : null;
}
- public JSONObject outputDependencies(PrintStream out, boolean jsonOutput,
- List<Task<? extends Serializable>> rootTasks, int indent)
+ public JSONObject outputDependencies(PrintStream out, ExplainWork work, List<Task> tasks)
throws Exception {
+ boolean jsonOutput = work.isFormatted();
+ boolean appendTaskType = work.isAppendTaskType();
if (out != null) {
- out.print(indentString(indent));
out.println("STAGE DEPENDENCIES:");
}
JSONObject json = jsonOutput ? new JSONObject() : null;
- Set<Task<? extends Serializable>> dependenciesTaskSet =
- new HashSet<Task<? extends Serializable>>();
-
- for (Task<? extends Serializable> rootTask : rootTasks) {
- JSONObject jsonOut = outputDependencies(rootTask,
- dependenciesTaskSet, out, json, jsonOutput,
- jsonOutput ? 0 : indent + 2, true);
- if (jsonOutput && (jsonOut != null)) {
- json.put(rootTask.getId(), jsonOut);
+ for (Task task : tasks) {
+ JSONObject jsonOut = outputDependencies(task, out, json, jsonOutput, appendTaskType, 2);
+ if (jsonOutput && jsonOut != null) {
+ json.put(task.getId(), jsonOut);
}
}
return jsonOutput ? json : null;
}
- public JSONObject outputStagePlans(PrintStream out, ExplainWork work,
- List<Task<? extends Serializable>> rootTasks, int indent)
+ public JSONObject outputStagePlans(PrintStream out, ExplainWork work, List<Task> tasks)
throws Exception {
boolean jsonOutput = work.isFormatted();
if (out != null) {
- out.print(indentString(indent));
out.println("STAGE PLANS:");
}
JSONObject json = jsonOutput ? new JSONObject() : null;
- HashSet<Task<? extends Serializable>> displayedSet = new HashSet<Task<? extends Serializable>>();
- for (Task<? extends Serializable> rootTask : rootTasks) {
- outputPlan(rootTask, out, json, work.getExtended(), jsonOutput,
- displayedSet, jsonOutput ? 0 : indent + 2);
+ for (Task task : tasks) {
+ outputPlan(task, out, json, work.getExtended(), jsonOutput, 2);
}
return jsonOutput ? json : null;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java Sun Nov 10 16:15:56 2013
@@ -93,6 +93,8 @@ public abstract class Task<T extends Ser
// Bean methods
+ protected boolean rootTask;
+
protected List<Task<? extends Serializable>> childTasks;
protected List<Task<? extends Serializable>> parentTasks;
/**
@@ -172,6 +174,14 @@ public abstract class Task<T extends Ser
return false;
}
+ public boolean isRootTask() {
+ return rootTask;
+ }
+
+ public void setRootTask(boolean rootTask) {
+ this.rootTask = rootTask;
+ }
+
public void setChildTasks(List<Task<? extends Serializable>> childTasks) {
this.childTasks = childTasks;
}
@@ -506,4 +516,8 @@ public abstract class Task<T extends Ser
void setException(Throwable ex) {
exception = ex;
}
+
+ public String toString() {
+ return getId() + ":" + getType();
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java Sun Nov 10 16:15:56 2013
@@ -84,6 +84,9 @@ public class PhysicalOptimizer {
if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
resolvers.add(new Vectorizer());
}
+ if (!"none".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
+ resolvers.add(new StageIDsRearranger());
+ }
}
/**
@@ -98,5 +101,4 @@ public class PhysicalOptimizer {
}
return pctx;
}
-
}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java?rev=1540485&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java Sun Nov 10 16:15:56 2013
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Simple renumbering of stage ids
+ */
+public class StageIDsRearranger implements PhysicalPlanResolver {
+
+ private static final String PREFIX = "Stage-";
+
+ enum ArrangeType {
+ NONE, IDONLY, TRAVERSE, EXECUTION
+ }
+
+ @Override
+ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
+ int counter = 0;
+ for (Task task : getExplainOrder(pctx)) {
+ task.setId(PREFIX + (++counter));
+ }
+ return null;
+ }
+
+ private static List<Task> getExplainOrder(PhysicalContext pctx) {
+ List<Task> tasks = getExplainOrder(pctx.getConf(), pctx.getRootTasks());
+ if (pctx.getFetchTask() != null) {
+ tasks.add(pctx.getFetchTask());
+ }
+ return tasks;
+ }
+
+ public static List<Task> getExplainOrder(HiveConf conf, List<Task<?>> tasks) {
+ for (Task<? extends Serializable> task : tasks) {
+ task.setRootTask(true);
+ }
+ String var = conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE);
+ ArrangeType type = ArrangeType.valueOf(var.toUpperCase());
+ if (type == ArrangeType.EXECUTION) {
+ return executionOrder(tasks);
+ }
+ return traverseOrder(type, tasks);
+ }
+
+ private static List<Task> executionOrder(List<Task<?>> tasks) {
+ final Queue<Task<?>> queue = new ConcurrentLinkedQueue<Task<?>>(tasks);
+
+ TaskTraverse traverse = new TaskTraverse() {
+ @Override
+ protected void accepted(Task<?> task) {
+ List<Task<?>> childTasks = getChildTasks(task);
+ if (childTasks != null && !childTasks.isEmpty()) {
+ queue.addAll(childTasks);
+ }
+ }
+ @Override
+ protected void rejected(Task<?> child) {
+ queue.add(child);
+ }
+ @Override
+ protected List<Task<?>> next(Task<?> task) {
+ return queue.isEmpty() ? null : Arrays.<Task<?>>asList(queue.remove());
+ }
+ };
+ if (!queue.isEmpty()) {
+ traverse.traverse(queue.remove());
+ }
+ return new ArrayList<Task>(traverse.traversed);
+ }
+
+ static List<Task> traverseOrder(final ArrangeType type, List<Task<?>> tasks) {
+
+ TaskTraverse traverse = new TaskTraverse() {
+ @Override
+ protected boolean isReady(Task<?> task) {
+ return type == ArrangeType.NONE || type == ArrangeType.IDONLY || super.isReady(task);
+ }
+ @Override
+ protected List<Task<?>> next(Task<?> task) {
+ return getChildTasks(task);
+ }
+ };
+ for (Task<? extends Serializable> task : tasks) {
+ traverse.traverse(task);
+ }
+ return new ArrayList<Task>(traverse.traversed);
+ }
+
+
+ public static abstract class TaskTraverse {
+
+ protected final Set<Task<?>> traversed = new LinkedHashSet<Task<?>>();
+
+ public void traverse(Task<?> task) {
+ if (traversed.add(task)) {
+ accepted(task);
+ }
+ List<Task<?>> children = next(task);
+ if (children != null && !children.isEmpty()) {
+ for (Task<?> child : children) {
+ if (isReady(child)) {
+ traverse(child);
+ } else {
+ rejected(child);
+ }
+ }
+ }
+ }
+
+ protected boolean isReady(Task<?> task) {
+ return task.getParentTasks() == null || traversed.containsAll(task.getParentTasks());
+ }
+
+ protected void accepted(Task<?> task) {
+ }
+
+ protected void rejected(Task<?> child) {
+ }
+
+ protected abstract List<Task<?>> next(Task<?> task);
+
+ protected List<Task<?>> getChildTasks(Task<?> task) {
+ if (task instanceof ConditionalTask) {
+ return ((ConditionalTask) task).getListTasks();
+ }
+ return task.getChildTasks();
+ }
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java Sun Nov 10 16:15:56 2013
@@ -19,7 +19,7 @@
package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
-import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import org.apache.hadoop.fs.Path;
@@ -60,21 +60,16 @@ public class ExplainSemanticAnalyzer ext
ctx.setExplainLogical(logical);
// Create a semantic analyzer for the query
- BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, (ASTNode) ast
- .getChild(0));
- sem.analyze((ASTNode) ast.getChild(0), ctx);
+ ASTNode input = (ASTNode) ast.getChild(0);
+ BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, input);
+ sem.analyze(input, ctx);
sem.validate();
ctx.setResFile(new Path(ctx.getLocalTmpFileURI()));
List<Task<? extends Serializable>> tasks = sem.getRootTasks();
Task<? extends Serializable> fetchTask = sem.getFetchTask();
if (tasks == null) {
- if (fetchTask != null) {
- tasks = new ArrayList<Task<? extends Serializable>>();
- tasks.add(fetchTask);
- }
- } else if (fetchTask != null) {
- tasks.add(fetchTask);
+ tasks = Collections.emptyList();
}
ParseContext pCtx = null;
@@ -82,17 +77,21 @@ public class ExplainSemanticAnalyzer ext
pCtx = ((SemanticAnalyzer)sem).getParseContext();
}
- Task<? extends Serializable> explTask =
- TaskFactory.get(new ExplainWork(ctx.getResFile().toString(),
+ ExplainWork work = new ExplainWork(ctx.getResFile().toString(),
pCtx,
tasks,
- ((ASTNode) ast.getChild(0)).toStringTree(),
+ fetchTask,
+ input.toStringTree(),
sem.getInputs(),
extended,
formatted,
dependency,
- logical),
- conf);
+ logical);
+
+ work.setAppendTaskType(
+ HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
+
+ Task<? extends Serializable> explTask = TaskFactory.get(work, conf);
fieldList = explTask.getResultSchema();
rootTasks.add(explTask);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java?rev=1540485&r1=1540484&r2=1540485&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java Sun Nov 10 16:15:56 2013
@@ -36,6 +36,7 @@ public class ExplainWork implements Seri
private String resFile;
private ArrayList<Task<? extends Serializable>> rootTasks;
+ private Task<? extends Serializable> fetchTask;
private String astStringTree;
private HashSet<ReadEntity> inputs;
private ParseContext pCtx;
@@ -45,6 +46,8 @@ public class ExplainWork implements Seri
boolean dependency;
boolean logical;
+ boolean appendTaskType;
+
public ExplainWork() {
}
@@ -52,6 +55,7 @@ public class ExplainWork implements Seri
public ExplainWork(String resFile,
ParseContext pCtx,
List<Task<? extends Serializable>> rootTasks,
+ Task<? extends Serializable> fetchTask,
String astStringTree,
HashSet<ReadEntity> inputs,
boolean extended,
@@ -60,6 +64,7 @@ public class ExplainWork implements Seri
boolean logical) {
this.resFile = resFile;
this.rootTasks = new ArrayList<Task<? extends Serializable>>(rootTasks);
+ this.fetchTask = fetchTask;
this.astStringTree = astStringTree;
this.inputs = inputs;
this.extended = extended;
@@ -85,6 +90,14 @@ public class ExplainWork implements Seri
this.rootTasks = rootTasks;
}
+ public Task<? extends Serializable> getFetchTask() {
+ return fetchTask;
+ }
+
+ public void setFetchTask(Task<? extends Serializable> fetchTask) {
+ this.fetchTask = fetchTask;
+ }
+
public String getAstStringTree() {
return astStringTree;
}
@@ -141,4 +154,11 @@ public class ExplainWork implements Seri
this.logical = logical;
}
+ public boolean isAppendTaskType() {
+ return appendTaskType;
+ }
+
+ public void setAppendTaskType(boolean appendTaskType) {
+ this.appendTaskType = appendTaskType;
+ }
}
Added: hive/trunk/ql/src/test/queries/clientpositive/explain_rearrange.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/explain_rearrange.q?rev=1540485&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/explain_rearrange.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/explain_rearrange.q Sun Nov 10 16:15:56 2013
@@ -0,0 +1,98 @@
+-- query from auto_sortmerge_join_9.q
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+set hive.auto.convert.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+
+set hive.explain.dependency.append.tasktype=true;
+
+-- default behavior
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=IDONLY;
+
+-- changes id only
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=TRAVERSE;
+
+-- assign ids in traverse order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+set hive.stageid.rearrange=EXECUTION;
+
+-- assign ids in execution order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
Added: hive/trunk/ql/src/test/results/clientpositive/explain_rearrange.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/explain_rearrange.q.out?rev=1540485&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/explain_rearrange.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/explain_rearrange.q.out Sun Nov 10 16:15:56 2013
@@ -0,0 +1,1626 @@
+PREHOOK: query: -- query from auto_sortmerge_join_9.q
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- query from auto_sortmerge_join_9.q
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: -- default behavior
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- default behavior
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage [MAPRED]
+ Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-10, Stage-11, Stage-3 [CONDITIONAL]
+ Stage-10 has a backup stage: Stage-3 [MAPREDLOCAL]
+ Stage-7 depends on stages: Stage-10 [MAPRED]
+ Stage-4 depends on stages: Stage-3, Stage-7, Stage-8 [MAPRED]
+ Stage-11 has a backup stage: Stage-3 [MAPREDLOCAL]
+ Stage-8 depends on stages: Stage-11 [MAPRED]
+ Stage-3 [MAPRED]
+ Stage-5 is a root stage [MAPRED]
+ Stage-0 is a root stage [FETCH]
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src1:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-9
+ Conditional Operator
+
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ sort order: +++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-11
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME1
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ $INTNAME1
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src2:subq2:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- changes id only
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- changes id only
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage [MAPRED]
+ Stage-2 depends on stages: Stage-1, Stage-9 , consists of Stage-3, Stage-6, Stage-8 [CONDITIONAL]
+ Stage-3 has a backup stage: Stage-8 [MAPREDLOCAL]
+ Stage-4 depends on stages: Stage-3 [MAPRED]
+ Stage-5 depends on stages: Stage-8, Stage-4, Stage-7 [MAPRED]
+ Stage-6 has a backup stage: Stage-8 [MAPREDLOCAL]
+ Stage-7 depends on stages: Stage-6 [MAPRED]
+ Stage-8 [MAPRED]
+ Stage-9 is a root stage [MAPRED]
+ Stage-10 is a root stage [FETCH]
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src1:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Conditional Operator
+
+ Stage: Stage-3
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ sort order: +++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-6
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME1
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ $INTNAME1
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src2:subq2:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-10
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- assign ids in traverse order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- assign ids in traverse order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage [MAPRED]
+ Stage-2 is a root stage [MAPRED]
+ Stage-3 depends on stages: Stage-1, Stage-2 , consists of Stage-4, Stage-6, Stage-8 [CONDITIONAL]
+ Stage-4 has a backup stage: Stage-8 [MAPREDLOCAL]
+ Stage-5 depends on stages: Stage-4 [MAPRED]
+ Stage-6 has a backup stage: Stage-8 [MAPREDLOCAL]
+ Stage-7 depends on stages: Stage-6 [MAPRED]
+ Stage-8 [MAPRED]
+ Stage-9 depends on stages: Stage-8, Stage-5, Stage-7 [MAPRED]
+ Stage-10 is a root stage [FETCH]
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src1:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src2:subq2:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Conditional Operator
+
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-6
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME1
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ $INTNAME1
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ sort order: +++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-10
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- assign ids in execution order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- assign ids in execution order
+
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage [MAPRED]
+ Stage-2 is a root stage [MAPRED]
+ Stage-3 depends on stages: Stage-1, Stage-2 , consists of Stage-4, Stage-5, Stage-6 [CONDITIONAL]
+ Stage-4 has a backup stage: Stage-6 [MAPREDLOCAL]
+ Stage-5 has a backup stage: Stage-6 [MAPREDLOCAL]
+ Stage-6 [MAPRED]
+ Stage-7 depends on stages: Stage-4 [MAPRED]
+ Stage-8 depends on stages: Stage-5 [MAPRED]
+ Stage-9 depends on stages: Stage-6, Stage-7, Stage-8 [MAPRED]
+ Stage-10 is a root stage [FETCH]
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src1:subq1:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src2:subq2:a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Conditional Operator
+
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ $INTNAME1
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: 1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME1
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ sort order: +++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-10
+ Fetch Operator
+ limit: -1
+
+