You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2011/05/11 22:52:17 UTC
svn commit: r1102085 [1/3] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: namit
Date: Wed May 11 20:52:16 2011
New Revision: 1102085
URL: http://svn.apache.org/viewvc?rev=1102085&view=rev
Log:
HIVE-2056 Generate single MR job for multi groupby query
if hive.multigroupby.singlemr is enabled.
(Amareshwari Sriramadasu via namit)
Added:
hive/trunk/ql/src/test/queries/clientpositive/multigroupby_singlemr.q
hive/trunk/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/test/queries/clientpositive/groupby10.q
hive/trunk/ql/src/test/queries/clientpositive/groupby8.q
hive/trunk/ql/src/test/queries/clientpositive/groupby8_noskew.q
hive/trunk/ql/src/test/queries/clientpositive/groupby9.q
hive/trunk/ql/src/test/results/clientpositive/groupby10.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby8.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby9.q.out
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed May 11 20:52:16 2011
@@ -274,6 +274,7 @@ public class HiveConf extends Configurat
HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3),
HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9),
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5),
+ HIVEMULTIGROUPBYSINGLEMR("hive.multigroupby.singlemr", false),
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
Modified: hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml (original)
+++ hive/trunk/conf/hive-default.xml Wed May 11 20:52:16 2011
@@ -362,6 +362,13 @@
</property>
<property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>false</value>
+ <description>Whether to optimize multi group by query to generate single M/R
+ job plan. If the multi group by query has common group by keys, it will be
+ optimized to generate single M/R job.</description>
+</property>
+<property>
<name>hive.join.emit.interval</name>
<value>1000</value>
<description>How many rows in the right-most join operand Hive should buffer before emitting the join result. </description>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed May 11 20:52:16 2011
@@ -73,7 +73,6 @@ import org.apache.hadoop.hive.ql.hooks.W
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
-import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -166,7 +165,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.util.ReflectionUtils;
/**
* Implementation of the semantic analyzer.
@@ -3116,6 +3114,40 @@ public class SemanticAnalyzer extends Ba
}
/**
+ * Generate a Multi Group-By plan using a single map-reduce job.
+ *
+ * @param dest
+ * @param qb
+ * @param input
+ * @return
+ * @throws SemanticException
+ *
+ * Generate a Group-By plan using single map-reduce job, if there is
+ * common group by key. Spray by the
+ * common group by key set and compute
+ * aggregates in the reduce. The agggregation evaluation
+ * functions are as follows:
+ *
+ * Partitioning Key: common group by key set
+ *
+ * Sorting Key: group by keys, distinct keys
+ *
+ * Reducer: iterate/terminate (mode = COMPLETE)
+ *
+ */
+ private Operator<?> genGroupByPlan1MRMultiGroupBy(String dest, QB qb,
+ Operator<?> input) throws SemanticException {
+
+ QBParseInfo parseInfo = qb.getParseInfo();
+
+ // ////// Generate GroupbyOperator
+ Operator<?> groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
+ dest, input, GroupByDesc.Mode.COMPLETE, null);
+
+ return groupByOperatorInfo;
+ }
+
+ /**
* Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be
* inserted):
*
@@ -5446,27 +5478,238 @@ public class SemanticAnalyzer extends Ba
return rsOp;
}
+ // see if there are any distinct expressions
+ private boolean distinctExprsExists(QB qb) {
+ QBParseInfo qbp = qb.getParseInfo();
+
+ TreeSet<String> ks = new TreeSet<String>();
+ ks.addAll(qbp.getClauseNames());
+
+ for (String dest : ks) {
+ List<ASTNode> list = qbp.getDistinctFuncExprsForClause(dest);
+ if (!list.isEmpty()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // return the common group by key set.
+ // Null if there are no common group by keys.
+ private List<ASTNode> getCommonGroupbyKeys(QB qb, Operator input) {
+ RowResolver inputRR = opParseCtx.get(input).getRowResolver();
+ QBParseInfo qbp = qb.getParseInfo();
+
+ Set<String> ks = qbp.getClauseNames();
+ // Go over all the destination tables
+ if (ks.size() <= 1) {
+ return null;
+ }
+
+ List<ASTNode> oldList = null;
+
+ for (String dest : ks) {
+ // If a filter is present, common processing is not possible
+ if (qbp.getWhrForClause(dest) != null) {
+ return null;
+ }
+
+ // if one of the sub-queries does not involve an aggregation, common
+ // processing is not possible
+ List<ASTNode> list = getGroupByForClause(qbp, dest);
+ if (list.isEmpty()) {
+ return null;
+ }
+ if (oldList == null) {
+ oldList = new ArrayList<ASTNode>();
+ oldList.addAll(list);
+ } else {
+ int pos = 0;
+ for (pos = 0; pos < oldList.size(); pos++) {
+ if (pos < list.size()) {
+ if (!oldList.get(pos).toStringTree().equals(list.get(pos).toStringTree())) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ oldList = oldList.subList(0, pos);
+ }
+ if (oldList.isEmpty()) {
+ return null;
+ }
+ }
+ return oldList;
+ }
+
+ /**
+ * Generates reduce sink for multigroupby query for non null common groupby set
+ *
+ *All groupby keys and distinct exprs are added to reduce keys. And rows are
+ *partitioned on common groupby key set.
+ *
+ * @param qb
+ * @param input
+ * @return
+ * @throws SemanticException
+ */
+ private Operator createCommonReduceSink1(QB qb, Operator input)
+ throws SemanticException {
+ // Go over all the tables and get common groupby key
+ List<ASTNode> cmonGbyExprs = getCommonGroupbyKeys(qb, input);
+
+ QBParseInfo qbp = qb.getParseInfo();
+ TreeSet<String> ks = new TreeSet<String>();
+ ks.addAll(qbp.getClauseNames());
+
+ // Pass the entire row
+ RowResolver inputRR = opParseCtx.get(input).getRowResolver();
+ RowResolver reduceSinkOutputRowResolver = new RowResolver();
+ reduceSinkOutputRowResolver.setIsExprResolver(true);
+ ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
+ ArrayList<ExprNodeDesc> reducePartKeys = new ArrayList<ExprNodeDesc>();
+ ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
+ Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
+ List<String> outputColumnNames = new ArrayList<String>();
+ for (String dest : ks) {
+ List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
+ for (int i = 0; i < grpByExprs.size(); ++i) {
+ ASTNode grpbyExpr = grpByExprs.get(i);
+
+ if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) {
+ ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRR);
+ reduceKeys.add(grpByExprNode);
+ String field = Utilities.ReduceField.KEY.toString() + "."
+ + getColumnInternalName(reduceKeys.size() - 1);
+ ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
+ reduceKeys.size() - 1).getTypeInfo(), "", false);
+ reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
+ outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
+ colExprMap.put(colInfo.getInternalName(), grpByExprNode);
+ }
+ }
+ }
+ // Add distinct group-by exprs to reduceKeys
+ List<ASTNode> distExprs = getCommonDistinctExprs(qb, input);
+ if (distExprs != null) {
+ for (ASTNode distn : distExprs) {
+ if (reduceSinkOutputRowResolver.getExpression(distn) == null) {
+ ExprNodeDesc distExpr = genExprNodeDesc(distn, inputRR);
+ reduceKeys.add(distExpr);
+ String field = Utilities.ReduceField.KEY.toString() + "."
+ + getColumnInternalName(reduceKeys.size() - 1);
+ ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
+ reduceKeys.size() - 1).getTypeInfo(), "", false);
+ reduceSinkOutputRowResolver.putExpression(distn, colInfo);
+ outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
+ colExprMap.put(colInfo.getInternalName(), distExpr);
+ }
+ }
+ }
+ // Add common groupby keys to partition keys
+ for (ASTNode gby : cmonGbyExprs) {
+ ExprNodeDesc distExpr = genExprNodeDesc(gby, inputRR);
+ reducePartKeys.add(distExpr);
+ }
+
+ // Go over all the aggregations
+ for (String dest : ks) {
+
+ // For each aggregation
+ HashMap<String, ASTNode> aggregationTrees = qbp
+ .getAggregationExprsForClause(dest);
+ assert (aggregationTrees != null);
+
+ for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
+ ASTNode value = entry.getValue();
+ value.getChild(0).getText();
+
+ // 0 is the function name
+ for (int i = 1; i < value.getChildCount(); i++) {
+ ASTNode paraExpr = (ASTNode) value.getChild(i);
+
+ if (reduceSinkOutputRowResolver.getExpression(paraExpr) == null) {
+ ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR);
+ reduceValues.add(paraExprNode);
+ String field = Utilities.ReduceField.VALUE.toString() + "."
+ + getColumnInternalName(reduceValues.size() - 1);
+ ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(
+ reduceValues.size() - 1).getTypeInfo(), "", false);
+ reduceSinkOutputRowResolver.putExpression(paraExpr, colInfo);
+ outputColumnNames
+ .add(getColumnInternalName(reduceValues.size() - 1));
+ }
+ }
+ }
+ }
+ StringBuilder order = new StringBuilder();
+ for (int i = 0; i < reduceKeys.size(); i++) {
+ order.append("+");
+ }
+
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
+ OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(
+ reduceKeys, reduceValues,
+ outputColumnNames, true, -1,
+ reducePartKeys, order.toString(), -1),
+ new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
+ reduceSinkOutputRowResolver);
+ rsOp.setColumnExprMap(colExprMap);
+ return rsOp;
+ }
+
@SuppressWarnings("nls")
private Operator genBodyPlan(QB qb, Operator input) throws SemanticException {
-
QBParseInfo qbp = qb.getParseInfo();
TreeSet<String> ks = new TreeSet<String>(qbp.getClauseNames());
-
// For multi-group by with the same distinct, we ignore all user hints
// currently. It doesnt matter whether he has asked to do
// map-side aggregation or not. Map side aggregation is turned off
- boolean optimizeMultiGroupBy = (getCommonDistinctExprs(qb, input) != null);
+ List<ASTNode> commonDistinctExprs = getCommonDistinctExprs(qb, input);
+ List<ASTNode> commonGbyKeys = getCommonGroupbyKeys(qb, input);
+ LOG.warn("Common Gby keys:" + commonGbyKeys);
+ boolean optimizeMultiGroupBy = commonDistinctExprs != null;
+ // Generate single MR job for multigroupby query if query has non-null common
+ // groupby key set and there are zero or one common distinct expression.
+ boolean singlemrMultiGroupBy =
+ conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEMR)
+ && commonGbyKeys != null && !commonGbyKeys.isEmpty() &&
+ (!distinctExprsExists(qb) || commonDistinctExprs != null);
+
Operator curr = input;
// If there are multiple group-bys, map-side aggregation is turned off,
- // there are no filters
- // and there is a single distinct, optimize that. Spray initially by the
+ // and there are no filters.
+ // if there is a common groupby key set, spray by the common groupby key set
+ // and generate single mr job
+ if (singlemrMultiGroupBy) {
+ curr = createCommonReduceSink1(qb, input);
+
+ RowResolver currRR = opParseCtx.get(curr).getRowResolver();
+ // create a forward operator
+ input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
+ new RowSchema(currRR.getColumnInfos()), curr), currRR);
+
+ for (String dest : ks) {
+ curr = input;
+ curr = genGroupByPlan1MRMultiGroupBy(dest, qb, curr);
+ curr = genSelectPlan(dest, qb, curr);
+ Integer limit = qbp.getDestLimit(dest);
+ if (limit != null) {
+ curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true);
+ qb.getParseInfo().setOuterQueryLimit(limit.intValue());
+ }
+ curr = genFileSinkPlan(dest, qb, curr);
+ }
+ }
+ // and if there is a single distinct, optimize that. Spray initially by the
// distinct key,
// no computation at the mapper. Have multiple group by operators at the
// reducer - and then
// proceed
- if (optimizeMultiGroupBy) {
+ else if (optimizeMultiGroupBy) {
curr = createCommonReduceSink(qb, input);
RowResolver currRR = opParseCtx.get(curr).getRowResolver();
@@ -7176,7 +7419,7 @@ public class SemanticAnalyzer extends Ba
"Table " + tbl.getTableName()));
}
}
-
+
boolean reworkMapredWork = HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_REWORK_MAPREDWORK);
// validate all tasks
Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby10.q?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby10.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby10.q Wed May 11 20:52:16 2011
@@ -1,10 +1,6 @@
set hive.map.aggr=false;
set hive.groupby.skewindata=true;
-
-
-
-
CREATE TABLE dest1(key INT, val1 INT, val2 INT);
CREATE TABLE dest2(key INT, val1 INT, val2 INT);
@@ -23,6 +19,16 @@ INSERT OVERWRITE TABLE dest2 SELECT INPU
SELECT * from dest1;
SELECT * from dest2;
+set hive.multigroupby.singlemr=true;
+EXPLAIN
+FROM INPUT
+INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key;
+FROM INPUT
+INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key;
+SELECT * from dest1;
+SELECT * from dest2;
Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby8.q?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby8.q Wed May 11 20:52:16 2011
@@ -16,3 +16,16 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
SELECT DEST1.* FROM DEST1;
SELECT DEST2.* FROM DEST2;
+set hive.multigroupby.singlemr=true;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key;
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby8_noskew.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby8_noskew.q?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby8_noskew.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby8_noskew.q Wed May 11 20:52:16 2011
@@ -17,4 +17,3 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
SELECT DEST1.* FROM DEST1;
SELECT DEST2.* FROM DEST2;
-
Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby9.q?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby9.q Wed May 11 20:52:16 2011
@@ -1,6 +1,4 @@
-
-
CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE;
CREATE TABLE DEST2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE;
@@ -16,5 +14,54 @@ INSERT OVERWRITE TABLE DEST2 SELECT SRC.
SELECT DEST1.* FROM DEST1;
SELECT DEST2.* FROM DEST2;
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key;
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
+
+set hive.multigroupby.singlemr=true;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value;
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value;
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key;
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
Added: hive/trunk/ql/src/test/queries/clientpositive/multigroupby_singlemr.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/multigroupby_singlemr.q?rev=1102085&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/multigroupby_singlemr.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/multigroupby_singlemr.q Wed May 11 20:52:16 2011
@@ -0,0 +1,35 @@
+set hive.multigroupby.singlemr=true;
+
+CREATE TABLE TBL(C1 INT, C2 INT, C3 INT, C4 INT);
+
+CREATE TABLE DEST1(d1 INT, d2 INT) STORED AS TEXTFILE;
+CREATE TABLE DEST2(d1 INT, d2 INT, d3 INT) STORED AS TEXTFILE;
+CREATE TABLE DEST3(d1 INT, d2 INT, d3 INT, d4 INT) STORED AS TEXTFILE;
+CREATE TABLE DEST4(d1 INT, d2 INT, d3 INT, d4 INT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM TBL
+INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1
+INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C1, TBL.C2;
+
+EXPLAIN
+FROM TBL
+INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1
+INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C2, TBL.C1;
+
+EXPLAIN
+FROM TBL
+INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3
+INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C1, TBL.C2;
+
+EXPLAIN
+FROM TBL
+INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3
+INSERT OVERWRITE TABLE DEST4 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C3, TBL.C2;
+
+
+EXPLAIN
+FROM TBL
+INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3
+INSERT OVERWRITE TABLE DEST2 SELECT TBL.C1, TBL.C2, COUNT(TBL.C3) GROUP BY TBL.C1, TBL.C2
+INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1;
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby10.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby10.q.out?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby10.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby10.q.out Wed May 11 20:52:16 2011
@@ -98,7 +98,7 @@ STAGE PLANS:
Stage: Stage-3
Map Reduce
Alias -> Map Operator Tree:
- file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-03-20_23-10-37_844_2784636939771236613/-mr-10004
+ file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-21-31_713_3968255414192405782/-mr-10004
Reduce Output Operator
key expressions:
expr: _col0
@@ -167,7 +167,7 @@ STAGE PLANS:
Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
- file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-03-20_23-10-37_844_2784636939771236613/-mr-10005
+ file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-21-31_713_3968255414192405782/-mr-10005
Reduce Output Operator
key expressions:
expr: _col0
@@ -257,11 +257,11 @@ POSTHOOK: Lineage: dest2.val2 EXPRESSION
PREHOOK: query: SELECT * from dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-03-20_23-11-13_750_3405393267317215329/-mr-10000
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-02_703_8932159178555555798/-mr-10000
POSTHOOK: query: SELECT * from dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-03-20_23-11-13_750_3405393267317215329/-mr-10000
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-02_703_8932159178555555798/-mr-10000
POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
@@ -291,11 +291,11 @@ POSTHOOK: Lineage: dest2.val2 EXPRESSION
PREHOOK: query: SELECT * from dest2
PREHOOK: type: QUERY
PREHOOK: Input: default@dest2
-PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-03-20_23-11-14_211_4036618129486956421/-mr-10000
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-03_016_8742949299415464174/-mr-10000
POSTHOOK: query: SELECT * from dest2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest2
-POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-03-20_23-11-14_211_4036618129486956421/-mr-10000
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-03_016_8742949299415464174/-mr-10000
POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
@@ -322,3 +322,255 @@ POSTHOOK: Lineage: dest2.val2 EXPRESSION
401 401 401
409 409 409
484 484 484
+PREHOOK: query: EXPLAIN
+FROM INPUT
+INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM INPUT
+INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ input
+ TableScan
+ alias: input
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: int
+ expr: substr(value, 5)
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: key
+ type: int
+ tag: -1
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: count(KEY._col1)
+ expr: count(DISTINCT KEY._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: bigint
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Group By Operator
+ aggregations:
+ expr: sum(KEY._col1)
+ expr: sum(DISTINCT KEY._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: int
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: double
+ expr: _col2
+ type: double
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM INPUT
+INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@input
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: FROM INPUT
+INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@input
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: SELECT * from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-17_401_1443701336089053996/-mr-10000
+POSTHOOK: query: SELECT * from dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-17_401_1443701336089053996/-mr-10000
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+27 1 1
+66 1 1
+86 1 1
+98 1 1
+128 1 1
+150 1 1
+165 1 1
+193 1 1
+213 3 2
+224 1 1
+238 3 3
+255 1 1
+265 1 1
+273 1 1
+278 1 1
+311 1 1
+369 1 1
+401 1 1
+409 1 1
+484 1 1
+PREHOOK: query: SELECT * from dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-17_833_8350215655391374047/-mr-10000
+POSTHOOK: query: SELECT * from dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_03-22-17_833_8350215655391374047/-mr-10000
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ]
+27 27 27
+66 66 66
+86 86 86
+98 98 98
+128 128 128
+150 150 150
+165 165 165
+193 193 193
+213 640 427
+224 224 224
+238 717 717
+255 255 255
+265 265 265
+273 273 273
+278 278 278
+311 311 311
+369 369 369
+401 401 401
+409 409 409
+484 484 484
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby8.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby8.q.out?rev=1102085&r1=1102084&r2=1102085&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby8.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby8.q.out Wed May 11 20:52:16 2011
@@ -85,7 +85,7 @@ STAGE PLANS:
Stage: Stage-3
Map Reduce
Alias -> Map Operator Tree:
- file:/tmp/sdong/hive_2011-02-10_01-53-11_510_7043405308119088869/-mr-10004
+ file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-02_088_4501856484124672195/-mr-10004
Reduce Output Operator
key expressions:
expr: _col0
@@ -147,7 +147,7 @@ STAGE PLANS:
Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
- file:/tmp/sdong/hive_2011-02-10_01-53-11_510_7043405308119088869/-mr-10005
+ file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-02_088_4501856484124672195/-mr-10005
Reduce Output Operator
key expressions:
expr: _col0
@@ -228,11 +228,11 @@ POSTHOOK: Lineage: dest2.value EXPRESSIO
PREHOOK: query: SELECT DEST1.* FROM DEST1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-53-23_916_4383646730333791609/-mr-10000
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-39_081_3655532117088145987/-mr-10000
POSTHOOK: query: SELECT DEST1.* FROM DEST1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-53-23_916_4383646730333791609/-mr-10000
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-39_081_3655532117088145987/-mr-10000
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -549,11 +549,11 @@ POSTHOOK: Lineage: dest2.value EXPRESSIO
PREHOOK: query: SELECT DEST2.* FROM DEST2
PREHOOK: type: QUERY
PREHOOK: Input: default@dest2
-PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-53-24_183_544446786218122627/-mr-10000
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-39_463_2118074606301355578/-mr-10000
POSTHOOK: query: SELECT DEST2.* FROM DEST2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest2
-POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_01-53-24_183_544446786218122627/-mr-10000
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-39_463_2118074606301355578/-mr-10000
POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -867,3 +867,809 @@ POSTHOOK: Lineage: dest2.value EXPRESSIO
96 1
97 1
98 1
+PREHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ expr: substr(value, 5)
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT DEST1.* FROM DEST1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-55_122_5746569605626089398/-mr-10000
+POSTHOOK: query: SELECT DEST1.* FROM DEST1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-55_122_5746569605626089398/-mr-10000
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1
+10 1
+100 1
+103 1
+104 1
+105 1
+11 1
+111 1
+113 1
+114 1
+116 1
+118 1
+119 1
+12 1
+120 1
+125 1
+126 1
+128 1
+129 1
+131 1
+133 1
+134 1
+136 1
+137 1
+138 1
+143 1
+145 1
+146 1
+149 1
+15 1
+150 1
+152 1
+153 1
+155 1
+156 1
+157 1
+158 1
+160 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+17 1
+170 1
+172 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+18 1
+180 1
+181 1
+183 1
+186 1
+187 1
+189 1
+19 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+199 1
+2 1
+20 1
+200 1
+201 1
+202 1
+203 1
+205 1
+207 1
+208 1
+209 1
+213 1
+214 1
+216 1
+217 1
+218 1
+219 1
+221 1
+222 1
+223 1
+224 1
+226 1
+228 1
+229 1
+230 1
+233 1
+235 1
+237 1
+238 1
+239 1
+24 1
+241 1
+242 1
+244 1
+247 1
+248 1
+249 1
+252 1
+255 1
+256 1
+257 1
+258 1
+26 1
+260 1
+262 1
+263 1
+265 1
+266 1
+27 1
+272 1
+273 1
+274 1
+275 1
+277 1
+278 1
+28 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+291 1
+292 1
+296 1
+298 1
+30 1
+302 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+315 1
+316 1
+317 1
+318 1
+321 1
+322 1
+323 1
+325 1
+327 1
+33 1
+331 1
+332 1
+333 1
+335 1
+336 1
+338 1
+339 1
+34 1
+341 1
+342 1
+344 1
+345 1
+348 1
+35 1
+351 1
+353 1
+356 1
+360 1
+362 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+37 1
+373 1
+374 1
+375 1
+377 1
+378 1
+379 1
+382 1
+384 1
+386 1
+389 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+399 1
+4 1
+400 1
+401 1
+402 1
+403 1
+404 1
+406 1
+407 1
+409 1
+41 1
+411 1
+413 1
+414 1
+417 1
+418 1
+419 1
+42 1
+421 1
+424 1
+427 1
+429 1
+43 1
+430 1
+431 1
+432 1
+435 1
+436 1
+437 1
+438 1
+439 1
+44 1
+443 1
+444 1
+446 1
+448 1
+449 1
+452 1
+453 1
+454 1
+455 1
+457 1
+458 1
+459 1
+460 1
+462 1
+463 1
+466 1
+467 1
+468 1
+469 1
+47 1
+470 1
+472 1
+475 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+487 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+5 1
+51 1
+53 1
+54 1
+57 1
+58 1
+64 1
+65 1
+66 1
+67 1
+69 1
+70 1
+72 1
+74 1
+76 1
+77 1
+78 1
+8 1
+80 1
+82 1
+83 1
+84 1
+85 1
+86 1
+87 1
+9 1
+90 1
+92 1
+95 1
+96 1
+97 1
+98 1
+PREHOOK: query: SELECT DEST2.* FROM DEST2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-55_531_3816962637700897051/-mr-10000
+POSTHOOK: query: SELECT DEST2.* FROM DEST2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-04-07_05-05-55_531_3816962637700897051/-mr-10000
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1
+10 1
+100 1
+103 1
+104 1
+105 1
+11 1
+111 1
+113 1
+114 1
+116 1
+118 1
+119 1
+12 1
+120 1
+125 1
+126 1
+128 1
+129 1
+131 1
+133 1
+134 1
+136 1
+137 1
+138 1
+143 1
+145 1
+146 1
+149 1
+15 1
+150 1
+152 1
+153 1
+155 1
+156 1
+157 1
+158 1
+160 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+17 1
+170 1
+172 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+18 1
+180 1
+181 1
+183 1
+186 1
+187 1
+189 1
+19 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+199 1
+2 1
+20 1
+200 1
+201 1
+202 1
+203 1
+205 1
+207 1
+208 1
+209 1
+213 1
+214 1
+216 1
+217 1
+218 1
+219 1
+221 1
+222 1
+223 1
+224 1
+226 1
+228 1
+229 1
+230 1
+233 1
+235 1
+237 1
+238 1
+239 1
+24 1
+241 1
+242 1
+244 1
+247 1
+248 1
+249 1
+252 1
+255 1
+256 1
+257 1
+258 1
+26 1
+260 1
+262 1
+263 1
+265 1
+266 1
+27 1
+272 1
+273 1
+274 1
+275 1
+277 1
+278 1
+28 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+291 1
+292 1
+296 1
+298 1
+30 1
+302 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+315 1
+316 1
+317 1
+318 1
+321 1
+322 1
+323 1
+325 1
+327 1
+33 1
+331 1
+332 1
+333 1
+335 1
+336 1
+338 1
+339 1
+34 1
+341 1
+342 1
+344 1
+345 1
+348 1
+35 1
+351 1
+353 1
+356 1
+360 1
+362 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+37 1
+373 1
+374 1
+375 1
+377 1
+378 1
+379 1
+382 1
+384 1
+386 1
+389 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+399 1
+4 1
+400 1
+401 1
+402 1
+403 1
+404 1
+406 1
+407 1
+409 1
+41 1
+411 1
+413 1
+414 1
+417 1
+418 1
+419 1
+42 1
+421 1
+424 1
+427 1
+429 1
+43 1
+430 1
+431 1
+432 1
+435 1
+436 1
+437 1
+438 1
+439 1
+44 1
+443 1
+444 1
+446 1
+448 1
+449 1
+452 1
+453 1
+454 1
+455 1
+457 1
+458 1
+459 1
+460 1
+462 1
+463 1
+466 1
+467 1
+468 1
+469 1
+47 1
+470 1
+472 1
+475 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+487 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+5 1
+51 1
+53 1
+54 1
+57 1
+58 1
+64 1
+65 1
+66 1
+67 1
+69 1
+70 1
+72 1
+74 1
+76 1
+77 1
+78 1
+8 1
+80 1
+82 1
+83 1
+84 1
+85 1
+86 1
+87 1
+9 1
+90 1
+92 1
+95 1
+96 1
+97 1
+98 1