You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2008/12/15 20:47:22 UTC
svn commit: r726784 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientposi...
Author: zshao
Date: Mon Dec 15 11:47:21 2008
New Revision: 726784
URL: http://svn.apache.org/viewvc?rev=726784&view=rev
Log:
HIVE-168. Fixed join on a subquery with a group by. (Namit Jain via zshao)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/join18.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/join18.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultRuleDispatcher.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/RuleRegExp.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Dec 15 11:47:21 2008
@@ -41,6 +41,8 @@
BUG FIXES
+ HIVE-168. Fixed join on a subquery with a group by. (Namit Jain via zshao)
+
HIVE-169. Fixed configuration parameter used for determining join interval
in JoinOperator. (Namit Jain via zshao)
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java Mon Dec 15 11:47:21 2008
@@ -331,9 +331,10 @@
throws Exception {
out.print(indentString(indent));
out.println("STAGE PLANS:");
+ HashSet<Task<? extends Serializable>> displayedSet = new HashSet<Task<? extends Serializable>>();
for(Task<? extends Serializable> rootTask: rootTasks) {
outputPlan(rootTask, out, work.getExtended(),
- new HashSet<Task<? extends Serializable>>(), indent+2);
+ displayedSet, indent+2);
}
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java Mon Dec 15 11:47:21 2008
@@ -92,8 +92,9 @@
// This will happen in case of joins. The current plan can be thrown away after being merged with the
// original plan
else {
- GenMapRedUtils.joinPlan(opMapTask, ctx);
+ GenMapRedUtils.joinPlan(op, null, opMapTask, ctx);
currTask = opMapTask;
+ ctx.setCurrTask(currTask);
}
mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java Mon Dec 15 11:47:21 2008
@@ -67,8 +67,9 @@
if (opMapTask == null)
GenMapRedUtils.splitPlan(op, ctx);
else {
- GenMapRedUtils.joinPlan(opMapTask, ctx);
+ GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx);
currTask = opMapTask;
+ ctx.setCurrTask(currTask);
}
mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(), ctx.getCurrAliasId()));
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Dec 15 11:47:21 2008
@@ -116,11 +116,19 @@
* @param task for the old task for the current reducer
* @param opProcCtx processing context
*/
- public static void joinPlan(Task<? extends Serializable> task, GenMRProcContext opProcCtx) throws SemanticException {
+ public static void joinPlan(ReduceSinkOperator op,
+ Task<? extends Serializable> oldTask,
+ Task<? extends Serializable> task,
+ GenMRProcContext opProcCtx) throws SemanticException {
Task<? extends Serializable> currTask = task;
mapredWork plan = (mapredWork) currTask.getWork();
Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
+ // terminate the old task and make current task dependent on it
+ if (oldTask != null) {
+ splitTasks(op, oldTask, currTask, opProcCtx);
+ }
+
if (currTopOp != null) {
List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps();
String currAliasId = opProcCtx.getCurrAliasId();
@@ -132,6 +140,8 @@
currTopOp = null;
opProcCtx.setCurrTopOp(currTopOp);
}
+
+ opProcCtx.setCurrTask(currTask);
}
/**
@@ -139,7 +149,8 @@
* @param op the reduce sink operator encountered
* @param opProcCtx processing context
*/
- public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) {
+ public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
+ throws SemanticException {
// Generate a new task
mapredWork cplan = getMapRedWork();
ParseContext parseCtx = opProcCtx.getParseCtx();
@@ -157,75 +168,9 @@
HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
opTaskMap.put(reducer, redTask);
-
- // generate the temporary file
- String scratchDir = opProcCtx.getScratchDir();
- int randomid = opProcCtx.getRandomId();
- int pathid = opProcCtx.getPathId();
-
- String taskTmpDir = scratchDir + File.separator + randomid + '.' + pathid ;
- pathid++;
- opProcCtx.setPathId(pathid);
-
- Operator<? extends Serializable> parent = op.getParentOperators().get(0);
- tableDesc tt_desc =
- PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
-
- // Create a file sink operator for this file name
- Operator<? extends Serializable> fs_op =
- putOpInsertMap(OperatorFactory.get
- (new fileSinkDesc(taskTmpDir, tt_desc,
- parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
- parent.getSchema()), null, parseCtx);
-
- // replace the reduce child with this operator
- List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
- for (int pos = 0; pos < childOpList.size(); pos++) {
- if (childOpList.get(pos) == op) {
- childOpList.set(pos, fs_op);
- break;
- }
- }
-
- List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
- parentOpList.add(parent);
- fs_op.setParentOperators(parentOpList);
-
- // Add the path to alias mapping
- if (cplan.getPathToAliases().get(taskTmpDir) == null) {
- cplan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
- }
-
- String streamDesc;
- if (reducer.getClass() == JoinOperator.class)
- streamDesc = "$INTNAME";
- else
- streamDesc = taskTmpDir;
-
- cplan.getPathToAliases().get(taskTmpDir).add(streamDesc);
- cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
- cplan.getAliasToWork().put(streamDesc, op);
- setKeyAndValueDesc(cplan, op);
-
- // Make this task dependent on the current task
Task<? extends Serializable> currTask = opProcCtx.getCurrTask();
- currTask.addDependentTask(redTask);
-
- // TODO: Allocate work to remove the temporary files and make that
- // dependent on the redTask
- if (reducer.getClass() == JoinOperator.class)
- cplan.setNeedsTagging(true);
-
- Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
- String currAliasId = opProcCtx.getCurrAliasId();
-
- currTopOp = null;
- currAliasId = null;
- currTask = redTask;
- opProcCtx.setCurrTask(currTask);
- opProcCtx.setCurrTopOp(currTopOp);
- opProcCtx.setCurrAliasId(currAliasId);
+ splitTasks(op, currTask, redTask, opProcCtx);
opProcCtx.getRootOps().add(op);
}
@@ -343,4 +288,95 @@
parseCtx.getOpParseCtx().put(op, ctx);
return op;
}
+
+ @SuppressWarnings("nls")
+ /**
+ * Merge the tasks - by creating a temporary file between them.
+ * @param op reduce sink operator being processed
+ * @param oldTask the parent task
+ * @param task the child task
+ * @param opProcCtx context
+ **/
+ private static void splitTasks(ReduceSinkOperator op,
+ Task<? extends Serializable> oldTask,
+ Task<? extends Serializable> task,
+ GenMRProcContext opProcCtx) throws SemanticException {
+ Task<? extends Serializable> currTask = task;
+ mapredWork plan = (mapredWork) currTask.getWork();
+ Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();
+
+ ParseContext parseCtx = opProcCtx.getParseCtx();
+ oldTask.addDependentTask(currTask);
+
+ // generate the temporary file
+ String scratchDir = opProcCtx.getScratchDir();
+ int randomid = opProcCtx.getRandomId();
+ int pathid = opProcCtx.getPathId();
+
+ String taskTmpDir = scratchDir + File.separator + randomid + '.' + pathid ;
+ pathid++;
+ opProcCtx.setPathId(pathid);
+
+ Operator<? extends Serializable> parent = op.getParentOperators().get(0);
+ tableDesc tt_desc =
+ PlanUtils.getBinaryTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
+
+ // Create a file sink operator for this file name
+ Operator<? extends Serializable> fs_op =
+ putOpInsertMap(OperatorFactory.get
+ (new fileSinkDesc(taskTmpDir, tt_desc,
+ parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE)),
+ parent.getSchema()), null, parseCtx);
+
+ // replace the reduce child with this operator
+ List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
+ for (int pos = 0; pos < childOpList.size(); pos++) {
+ if (childOpList.get(pos) == op) {
+ childOpList.set(pos, fs_op);
+ break;
+ }
+ }
+
+ List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
+ parentOpList.add(parent);
+ fs_op.setParentOperators(parentOpList);
+
+ Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
+
+ String streamDesc;
+ mapredWork cplan = (mapredWork) currTask.getWork();
+
+ if (reducer.getClass() == JoinOperator.class) {
+ String origStreamDesc;
+ streamDesc = "$INTNAME";
+ origStreamDesc = streamDesc;
+ int pos = 0;
+ while (cplan.getAliasToWork().get(streamDesc) != null)
+ streamDesc = origStreamDesc.concat(String.valueOf(++pos));
+ }
+ else
+ streamDesc = taskTmpDir;
+
+ // Add the path to alias mapping
+ if (cplan.getPathToAliases().get(taskTmpDir) == null) {
+ cplan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
+ }
+
+ cplan.getPathToAliases().get(taskTmpDir).add(streamDesc);
+ cplan.getPathToPartitionInfo().put(taskTmpDir, new partitionDesc(tt_desc, null));
+ cplan.getAliasToWork().put(streamDesc, op);
+ setKeyAndValueDesc(cplan, op);
+
+ // TODO: Allocate work to remove the temporary files and make that
+ // dependent on the redTask
+ if (reducer.getClass() == JoinOperator.class)
+ cplan.setNeedsTagging(true);
+
+ currTopOp = null;
+ String currAliasId = null;
+
+ opProcCtx.setCurrTopOp(currTopOp);
+ opProcCtx.setCurrAliasId(currAliasId);
+ opProcCtx.setCurrTask(currTask);
+ }
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultRuleDispatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultRuleDispatcher.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultRuleDispatcher.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DefaultRuleDispatcher.java Mon Dec 15 11:47:21 2008
@@ -40,13 +40,17 @@
private Map<Rule, OperatorProcessor> opProcRules;
private OperatorProcessorContext opProcCtx;
-
+ private OperatorProcessor defaultProc;
+
/**
* constructor
+ * @param defaultProc defualt processor to be fired if no rule matches
* @param opp operator processor that handles actual processing of the node
* @param opProcCtx operator processor context, which is opaque to the dispatcher
*/
- public DefaultRuleDispatcher(Map<Rule, OperatorProcessor> opp, OperatorProcessorContext opProcCtx) {
+ public DefaultRuleDispatcher(OperatorProcessor defaultProc,
+ Map<Rule, OperatorProcessor> opp, OperatorProcessorContext opProcCtx) {
+ this.defaultProc = defaultProc;
this.opProcRules = opp;
this.opProcCtx = opProcCtx;
}
@@ -66,14 +70,18 @@
int minCost = Integer.MAX_VALUE;
for (Rule r : opProcRules.keySet()) {
int cost = r.cost(opStack);
- if (cost <= minCost) {
+ if ((cost >= 0) && (cost <= minCost)) {
minCost = cost;
rule = r;
}
}
- assert rule != null;
- OperatorProcessor proc = opProcRules.get(rule);
+ OperatorProcessor proc;
+
+ if (rule == null)
+ proc = defaultProc;
+ else
+ proc = opProcRules.get(rule);
// If the processor has registered a process method for the particular operator, invoke it.
// Otherwise implement the generic function, which would definitely be implemented
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/RuleRegExp.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/RuleRegExp.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/RuleRegExp.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/RuleRegExp.java Mon Dec 15 11:47:21 2008
@@ -37,22 +37,6 @@
private Pattern pattern;
/**
- *
- * @param stack the operators encountered so far
- * @return operators names in the stack in the form of a string
- **/
- private String getOpNameString(Stack<Operator<? extends Serializable>> stack) {
- String name = new String();
- Iterator<Operator<? extends Serializable>> iter = stack.iterator();
- while (iter.hasNext()) {
- Operator<? extends Serializable> op = iter.next();
- name = name.concat(op.getOperatorName());
- }
-
- return name;
- }
-
- /**
* The rule specified by the regular expression. Note that, the regular expression is specified in terms of operator
* name. For eg: TS.*RS -> means TableScan operator followed by anything any number of times followed by ReduceSink
* @param ruleName name of the rule
@@ -71,12 +55,16 @@
* @throws SemanticException
*/
public int cost(Stack<Operator<? extends Serializable>> stack) throws SemanticException {
- String opStackName = getOpNameString(stack);
- Matcher m = pattern.matcher(opStackName);
- if (m.matches())
- return m.group().length();
-
- return Integer.MAX_VALUE;
+ int numElems = stack.size();
+ String name = new String();
+ for (int pos = numElems - 1; pos >= 0; pos--) {
+ name = stack.get(pos).getOperatorName().concat(name);
+ Matcher m = pattern.matcher(name);
+ if (m.matches())
+ return m.group().length();
+ }
+
+ return -1;
}
/**
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=726784&r1=726783&r2=726784&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Dec 15 11:47:21 2008
@@ -3030,15 +3030,14 @@
// create a walker which walks the tree in a DFS manner while maintaining the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, OperatorProcessor> opRules = new LinkedHashMap<Rule, OperatorProcessor>();
- opRules.put(new RuleRegExp(new String("R0"), ".*"), new GenMROperator());
opRules.put(new RuleRegExp(new String("R1"), "TS"), new GenMRTableScan1());
opRules.put(new RuleRegExp(new String("R2"), "TS.*RS"), new GenMRRedSink1());
opRules.put(new RuleRegExp(new String("R3"), "RS.*RS"), new GenMRRedSink2());
- opRules.put(new RuleRegExp(new String("R4"), ".*FS"), new GenMRFileSink1());
+ opRules.put(new RuleRegExp(new String("R4"), "FS"), new GenMRFileSink1());
// The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
- Dispatcher disp = new DefaultRuleDispatcher(opRules, procCtx);
-
+ Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx);
+
OpGraphWalker ogw = new GenMapRedWalker(disp);
ogw.startWalking(this.topOps.values());
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join18.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/join18.q?rev=726784&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/join18.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/join18.q Mon Dec 15 11:47:21 2008
@@ -0,0 +1,24 @@
+EXPLAIN
+ SELECT a.key, a.value, b.key, b.value
+ FROM
+ (
+ SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key
+ ) a
+ FULL OUTER JOIN
+ (
+ SELECT src2.key as key, count(distinct(src2.value)) AS value
+ FROM src1 src2 group by src2.key
+ ) b
+ ON (a.key = b.key);
+
+ SELECT a.key, a.value, b.key, b.value
+ FROM
+ (
+ SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key
+ ) a
+ FULL OUTER JOIN
+ (
+ SELECT src2.key as key, count(distinct(src2.value)) AS value
+ FROM src1 src2 group by src2.key
+ ) b
+ ON (a.key = b.key);
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/join18.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join18.q.out?rev=726784&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/join18.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join18.q.out Mon Dec 15 11:47:21 2008
@@ -0,0 +1,542 @@
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src src1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF src1 key) key) (TOK_SELEXPR (TOK_FUNCTION count (TOK_COLREF src1 value)) value)) (TOK_GROUPBY (TOK_COLREF src1 key)))) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src1 src2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF src2 key) key) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_COLREF src2 value)) value)) (TOK_GROUPBY (TOK_COLREF src2 key)))) b) (= (TOK_COLREF a key) (TOK_COLREF b key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF a key)) (TOK_SELEXPR (TOK_COLREF a value)) (TOK_SELEXPR (TOK_COLREF b key)) (TOK_SELEXPR (TOK_COLREF b value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2, Stage-5
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4
+ Stage-3 depends on stages: Stage-2, Stage-5
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b:src2
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY.1)
+ keys:
+ expr: KEY.0
+ type: string
+ mode: partial1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+ name: binary_table
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ /tmp/hive-njain/73868771/373350713.10002
+ Reduce Output Operator
+ key expressions:
+ expr: 0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: 0
+ type: string
+ tag: -1
+ value expressions:
+ expr: 1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE.0)
+ keys:
+ expr: KEY.0
+ type: string
+ mode: unknown
+ Select Operator
+ expressions:
+ expr: 0
+ type: string
+ expr: 1
+ type: bigint
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+ name: binary_table
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME1
+ Reduce Output Operator
+ key expressions:
+ expr: 0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: 0
+ type: string
+ tag: 0
+ value expressions:
+ expr: 0
+ type: string
+ expr: 1
+ type: bigint
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ condition expressions:
+ 0 {VALUE.0} {VALUE.1}
+ 1 {VALUE.0} {VALUE.1}
+ Select Operator
+ expressions:
+ expr: 0
+ type: string
+ expr: 1
+ type: bigint
+ expr: 2
+ type: string
+ expr: 3
+ type: bigint
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: 0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: 0
+ type: string
+ tag: 1
+ value expressions:
+ expr: 0
+ type: string
+ expr: 1
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ condition expressions:
+ 0 {VALUE.0} {VALUE.1}
+ 1 {VALUE.0} {VALUE.1}
+ Select Operator
+ expressions:
+ expr: 0
+ type: string
+ expr: 1
+ type: bigint
+ expr: 2
+ type: string
+ expr: 3
+ type: bigint
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:src1
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE.0)
+ keys:
+ expr: KEY.0
+ type: string
+ mode: partial1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+ name: binary_table
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ /tmp/hive-njain/73868771/373350713.10004
+ Reduce Output Operator
+ key expressions:
+ expr: 0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: 0
+ type: string
+ tag: -1
+ value expressions:
+ expr: 1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE.0)
+ keys:
+ expr: KEY.0
+ type: string
+ mode: unknown
+ Select Operator
+ expressions:
+ expr: 0
+ type: string
+ expr: 1
+ type: bigint
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.mapred.SequenceFileOutputFormat
+ name: binary_table
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+NULL NULL 6
+0 3 NULL NULL
+10 1 NULL NULL
+100 2 NULL NULL
+103 2 NULL NULL
+104 2 NULL NULL
+105 1 NULL NULL
+11 1 NULL NULL
+111 1 NULL NULL
+113 2 NULL NULL
+114 1 NULL NULL
+116 1 NULL NULL
+118 2 NULL NULL
+119 3 NULL NULL
+12 2 NULL NULL
+120 2 NULL NULL
+125 2 NULL NULL
+126 1 NULL NULL
+128 3 128 0
+129 2 NULL NULL
+131 1 NULL NULL
+133 1 NULL NULL
+134 2 NULL NULL
+136 1 NULL NULL
+137 2 NULL NULL
+138 4 NULL NULL
+143 1 NULL NULL
+145 1 NULL NULL
+146 2 146 1
+149 2 NULL NULL
+15 2 NULL NULL
+150 1 150 1
+152 2 NULL NULL
+153 1 NULL NULL
+155 1 NULL NULL
+156 1 NULL NULL
+157 1 NULL NULL
+158 1 NULL NULL
+160 1 NULL NULL
+162 1 NULL NULL
+163 1 NULL NULL
+164 2 NULL NULL
+165 2 NULL NULL
+166 1 NULL NULL
+167 3 NULL NULL
+168 1 NULL NULL
+169 4 NULL NULL
+17 1 NULL NULL
+170 1 NULL NULL
+172 2 NULL NULL
+174 2 NULL NULL
+175 2 NULL NULL
+176 2 NULL NULL
+177 1 NULL NULL
+178 1 NULL NULL
+179 2 NULL NULL
+18 2 NULL NULL
+180 1 NULL NULL
+181 1 NULL NULL
+183 1 NULL NULL
+186 1 NULL NULL
+187 3 NULL NULL
+189 1 NULL NULL
+19 1 NULL NULL
+190 1 NULL NULL
+191 2 NULL NULL
+192 1 NULL NULL
+193 3 NULL NULL
+194 1 NULL NULL
+195 2 NULL NULL
+196 1 NULL NULL
+197 2 NULL NULL
+199 3 NULL NULL
+2 1 NULL NULL
+20 1 NULL NULL
+200 2 NULL NULL
+201 1 NULL NULL
+202 1 NULL NULL
+203 2 NULL NULL
+205 2 NULL NULL
+207 2 NULL NULL
+208 3 NULL NULL
+209 2 NULL NULL
+213 2 213 1
+214 1 NULL NULL
+216 2 NULL NULL
+217 2 NULL NULL
+218 1 NULL NULL
+219 2 NULL NULL
+221 2 NULL NULL
+222 1 NULL NULL
+223 2 NULL NULL
+224 2 224 0
+226 1 NULL NULL
+228 1 NULL NULL
+229 2 NULL NULL
+230 5 NULL NULL
+233 2 NULL NULL
+235 1 NULL NULL
+237 2 NULL NULL
+238 2 238 1
+239 2 NULL NULL
+24 2 NULL NULL
+241 1 NULL NULL
+242 2 NULL NULL
+244 1 NULL NULL
+247 1 NULL NULL
+248 1 NULL NULL
+249 1 NULL NULL
+252 1 NULL NULL
+255 2 255 1
+256 2 NULL NULL
+257 1 NULL NULL
+258 1 NULL NULL
+26 2 NULL NULL
+260 1 NULL NULL
+262 1 NULL NULL
+263 1 NULL NULL
+265 2 NULL NULL
+266 1 NULL NULL
+27 1 NULL NULL
+272 2 NULL NULL
+273 3 273 1
+274 1 NULL NULL
+275 1 NULL NULL
+277 4 NULL NULL
+278 2 278 1
+28 1 NULL NULL
+280 2 NULL NULL
+281 2 NULL NULL
+282 2 NULL NULL
+283 1 NULL NULL
+284 1 NULL NULL
+285 1 NULL NULL
+286 1 NULL NULL
+287 1 NULL NULL
+288 2 NULL NULL
+289 1 NULL NULL
+291 1 NULL NULL
+292 1 NULL NULL
+296 1 NULL NULL
+298 3 NULL NULL
+30 1 NULL NULL
+302 1 NULL NULL
+305 1 NULL NULL
+306 1 NULL NULL
+307 2 NULL NULL
+308 1 NULL NULL
+309 2 NULL NULL
+310 1 NULL NULL
+311 3 311 1
+315 1 NULL NULL
+316 3 NULL NULL
+317 2 NULL NULL
+318 3 NULL NULL
+321 2 NULL NULL
+322 2 NULL NULL
+323 1 NULL NULL
+325 2 NULL NULL
+327 3 NULL NULL
+33 1 NULL NULL
+331 2 NULL NULL
+332 1 NULL NULL
+333 2 NULL NULL
+335 1 NULL NULL
+336 1 NULL NULL
+338 1 NULL NULL
+339 1 NULL NULL
+34 1 NULL NULL
+341 1 NULL NULL
+342 2 NULL NULL
+344 2 NULL NULL
+345 1 NULL NULL
+348 5 NULL NULL
+35 3 NULL NULL
+351 1 NULL NULL
+353 2 NULL NULL
+356 1 NULL NULL
+360 1 NULL NULL
+362 1 NULL NULL
+364 1 NULL NULL
+365 1 NULL NULL
+366 1 NULL NULL
+367 2 NULL NULL
+368 1 NULL NULL
+369 3 369 0
+37 2 NULL NULL
+373 1 NULL NULL
+374 1 NULL NULL
+375 1 NULL NULL
+377 1 NULL NULL
+378 1 NULL NULL
+379 1 NULL NULL
+382 2 NULL NULL
+384 3 NULL NULL
+386 1 NULL NULL
+389 1 NULL NULL
+392 1 NULL NULL
+393 1 NULL NULL
+394 1 NULL NULL
+395 2 NULL NULL
+396 3 NULL NULL
+397 2 NULL NULL
+399 2 NULL NULL
+4 1 NULL NULL
+400 1 NULL NULL
+401 5 401 1
+402 1 NULL NULL
+403 3 NULL NULL
+404 2 NULL NULL
+406 4 406 1
+407 1 NULL NULL
+409 3 NULL NULL
+41 1 NULL NULL
+411 1 NULL NULL
+413 2 NULL NULL
+414 2 NULL NULL
+417 3 NULL NULL
+418 1 NULL NULL
+419 1 NULL NULL
+42 2 NULL NULL
+421 1 NULL NULL
+424 2 NULL NULL
+427 1 NULL NULL
+429 2 NULL NULL
+43 1 NULL NULL
+430 3 NULL NULL
+431 3 NULL NULL
+432 1 NULL NULL
+435 1 NULL NULL
+436 1 NULL NULL
+437 1 NULL NULL
+438 3 NULL NULL
+439 2 NULL NULL
+44 1 NULL NULL
+443 1 NULL NULL
+444 1 NULL NULL
+446 1 NULL NULL
+448 1 NULL NULL
+449 1 NULL NULL
+452 1 NULL NULL
+453 1 NULL NULL
+454 3 NULL NULL
+455 1 NULL NULL
+457 1 NULL NULL
+458 2 NULL NULL
+459 2 NULL NULL
+460 1 NULL NULL
+462 2 NULL NULL
+463 2 NULL NULL
+466 3 NULL NULL
+467 1 NULL NULL
+468 4 NULL NULL
+469 5 NULL NULL
+47 1 NULL NULL
+470 1 NULL NULL
+472 1 NULL NULL
+475 1 NULL NULL
+477 1 NULL NULL
+478 2 NULL NULL
+479 1 NULL NULL
+480 3 NULL NULL
+481 1 NULL NULL
+482 1 NULL NULL
+483 1 NULL NULL
+484 1 NULL NULL
+485 1 NULL NULL
+487 1 NULL NULL
+489 4 NULL NULL
+490 1 NULL NULL
+491 1 NULL NULL
+492 2 NULL NULL
+493 1 NULL NULL
+494 1 NULL NULL
+495 1 NULL NULL
+496 1 NULL NULL
+497 1 NULL NULL
+498 3 NULL NULL
+5 3 NULL NULL
+51 2 NULL NULL
+53 1 NULL NULL
+54 1 NULL NULL
+57 1 NULL NULL
+58 2 NULL NULL
+64 1 NULL NULL
+65 1 NULL NULL
+66 1 66 1
+67 2 NULL NULL
+69 1 NULL NULL
+70 3 NULL NULL
+72 2 NULL NULL
+74 1 NULL NULL
+76 2 NULL NULL
+77 1 NULL NULL
+78 1 NULL NULL
+8 1 NULL NULL
+80 1 NULL NULL
+82 1 NULL NULL
+83 2 NULL NULL
+84 2 NULL NULL
+85 1 NULL NULL
+86 1 NULL NULL
+87 1 NULL NULL
+9 1 NULL NULL
+90 3 NULL NULL
+92 1 NULL NULL
+95 2 NULL NULL
+96 1 NULL NULL
+97 2 NULL NULL
+98 2 98 1