You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/06/30 18:35:29 UTC
svn commit: r1498148 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/
test/results/clientpositive/
Author: hashutosh
Date: Sun Jun 30 16:35:29 2013
New Revision: 1498148
URL: http://svn.apache.org/r1498148
Log:
HIVE-4692 : Constant agg parameters will be replaced by ExprNodeColumnDesc with single-sourced multi-gby cases (Navis via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q
hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
hive/trunk/ql/src/test/results/clientpositive/union31.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1498148&r1=1498147&r2=1498148&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Sun Jun 30 16:35:29 2013
@@ -2902,11 +2902,11 @@ public class SemanticAnalyzer extends Ba
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
- String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
+ String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode,
Map<String, GenericUDAFEvaluator> genericUDAFEvaluators)
throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx
- .get(reduceSinkOperatorInfo).getRowResolver();
+ .get(input).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
@@ -2937,15 +2937,11 @@ public class SemanticAnalyzer extends Ba
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
- List<ExprNodeDesc> reduceValues = null;
- if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
- List<String> inputKeyCols = ((ReduceSinkDesc)
- reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
- if (inputKeyCols.size() > 0) {
- lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
- }
- reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
+ List<String> inputKeyCols = ((ReduceSinkDesc) rs.getConf()).getOutputKeyColumnNames();
+ if (inputKeyCols.size() > 0) {
+ lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
+ List<ExprNodeDesc> reduceValues = ((ReduceSinkDesc) rs.getConf()).getValueCols();
int numDistinctUDFs = 0;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
@@ -3022,7 +3018,7 @@ public class SemanticAnalyzer extends Ba
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
- reduceSinkOperatorInfo), groupByOutputRowResolver);
+ input), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
@@ -3490,7 +3486,7 @@ public class SemanticAnalyzer extends Ba
* @throws SemanticException
*/
@SuppressWarnings("nls")
- private Operator genGroupByPlanReduceSinkOperator(QB qb,
+ private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb,
String dest,
Operator inputOperatorInfo,
List<ASTNode> grpByExprs,
@@ -3680,7 +3676,7 @@ public class SemanticAnalyzer extends Ba
}
@SuppressWarnings("nls")
- private Operator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests,
+ private ReduceSinkOperator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests,
Operator inputOperatorInfo) throws SemanticException {
RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
@@ -4027,7 +4023,7 @@ public class SemanticAnalyzer extends Ba
}
// ////// 1. Generate ReduceSinkOperator
- Operator reduceSinkOperatorInfo =
+ ReduceSinkOperator reduceSinkOperatorInfo =
genGroupByPlanReduceSinkOperator(qb,
dest,
input,
@@ -4040,7 +4036,7 @@ public class SemanticAnalyzer extends Ba
// ////// 2. Generate GroupbyOperator
Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
- dest, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
+ dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
return groupByOperatorInfo;
}
@@ -4110,7 +4106,8 @@ public class SemanticAnalyzer extends Ba
Operator select = insertSelectAllPlanForGroupBy(selectInput);
// Generate ReduceSinkOperator
- Operator reduceSinkOperatorInfo = genCommonGroupByPlanReduceSinkOperator(qb, dests, select);
+ ReduceSinkOperator reduceSinkOperatorInfo =
+ genCommonGroupByPlanReduceSinkOperator(qb, dests, select);
// It is assumed throughout the code that a reducer has a single child, add a
// ForwardOperator so that we can add multiple filter/group by operators as children
@@ -4130,7 +4127,7 @@ public class SemanticAnalyzer extends Ba
// Generate GroupbyOperator
Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
- dest, curr, GroupByDesc.Mode.COMPLETE, null);
+ dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb);
}
@@ -4272,7 +4269,7 @@ public class SemanticAnalyzer extends Ba
// DISTINCT
// operator. We set the numPartitionColumns to -1 for this purpose. This is
// captured by WritableComparableHiveObject.hashCode() function.
- Operator reduceSinkOperatorInfo =
+ ReduceSinkOperator reduceSinkOperatorInfo =
genGroupByPlanReduceSinkOperator(qb,
dest,
input,
@@ -4287,7 +4284,7 @@ public class SemanticAnalyzer extends Ba
Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
new LinkedHashMap<String, GenericUDAFEvaluator>();
GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator(
- parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
+ parseInfo, dest, reduceSinkOperatorInfo, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1,
genericUDAFEvaluators);
int numReducers = -1;
Added: hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q?rev=1498148&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/multi_insert_gby2.q Sun Jun 30 16:35:29 2013
@@ -0,0 +1,19 @@
+--HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
+create table e1 (count int);
+create table e2 (percentile double);
+
+explain
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+ SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+ SELECT percentile_approx(value, 0.5);
+
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+ SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+ SELECT percentile_approx(value, 0.5);
+
+select * from e1;
+select * from e2;
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out?rev=1498148&r1=1498147&r2=1498148&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out Sun Jun 30 16:35:29 2013
@@ -83,7 +83,7 @@ STAGE PLANS:
type: boolean
Group By Operator
aggregations:
- expr: count(DISTINCT KEY._col1:1._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
expr: sum(KEY._col1:1._col0)
expr: sum(DISTINCT KEY._col1:1._col0)
expr: count(VALUE._col0)
@@ -120,7 +120,7 @@ STAGE PLANS:
type: boolean
Group By Operator
aggregations:
- expr: count(DISTINCT KEY._col1:1._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
expr: sum(KEY._col1:1._col0)
expr: sum(DISTINCT KEY._col1:1._col0)
expr: count(VALUE._col0)
@@ -153,7 +153,7 @@ STAGE PLANS:
name: default.dest_g3
Group By Operator
aggregations:
- expr: count(DISTINCT KEY._col1:1._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
expr: sum(KEY._col1:1._col0)
expr: sum(DISTINCT KEY._col1:1._col0)
expr: count(VALUE._col0)
@@ -445,7 +445,7 @@ STAGE PLANS:
type: boolean
Group By Operator
aggregations:
- expr: count(DISTINCT KEY._col1:1._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
expr: sum(KEY._col1:1._col0)
expr: sum(DISTINCT KEY._col1:1._col0)
expr: count(VALUE._col0)
@@ -482,7 +482,7 @@ STAGE PLANS:
type: boolean
Group By Operator
aggregations:
- expr: count(DISTINCT KEY._col1:1._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
expr: sum(KEY._col1:1._col0)
expr: sum(DISTINCT KEY._col1:1._col0)
expr: count(VALUE._col0)
@@ -515,7 +515,7 @@ STAGE PLANS:
name: default.dest_g3
Group By Operator
aggregations:
- expr: count(DISTINCT KEY._col1:1._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
expr: sum(KEY._col1:1._col0)
expr: sum(DISTINCT KEY._col1:1._col0)
expr: count(VALUE._col0)
Added: hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out?rev=1498148&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/multi_insert_gby2.q.out Sun Jun 30 16:35:29 2013
@@ -0,0 +1,198 @@
+PREHOOK: query: --HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
+create table e1 (count int)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: --HIVE-3699 Multiple insert overwrite into multiple tables query stores same results in all tables
+create table e1 (count int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@e1
+PREHOOK: query: create table e2 (percentile double)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table e2 (percentile double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@e2
+PREHOOK: query: explain
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+ SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+ SELECT percentile_approx(value, 0.5)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+ SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+ SELECT percentile_approx(value, 0.5)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION TOK_DOUBLE (TOK_TABLE_OR_COL key)) value)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR COUNT)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME e2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION percentile_approx (TOK_TABLE_OR_COL value) 0.5)))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: UDFToDouble(key)
+ type: double
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: double
+ Reduce Operator Tree:
+ Extract
+ Select Operator
+ expressions:
+ expr: _col1
+ type: double
+ outputColumnNames: _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: double
+ expr: 0.5
+ type: double
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: complete
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.e1
+ Group By Operator
+ aggregations:
+ expr: percentile_approx(VALUE._col0, 0.5)
+ bucketGroup: false
+ mode: complete
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: double
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.e2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.e1
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.e2
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+ SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+ SELECT percentile_approx(value, 0.5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@e1
+PREHOOK: Output: default@e2
+POSTHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a
+INSERT OVERWRITE TABLE e1
+ SELECT COUNT(*)
+INSERT OVERWRITE TABLE e2
+ SELECT percentile_approx(value, 0.5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@e1
+POSTHOOK: Output: default@e2
+POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: e2.percentile EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select * from e1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from e1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: e2.percentile EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+500
+PREHOOK: query: select * from e2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@e2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from e2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@e2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: e2.percentile EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+255.5
Modified: hive/trunk/ql/src/test/results/clientpositive/union31.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union31.q.out?rev=1498148&r1=1498147&r2=1498148&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union31.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union31.q.out Sun Jun 30 16:35:29 2013
@@ -965,7 +965,7 @@ STAGE PLANS:
Forward
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
bucketGroup: false
keys:
expr: KEY._col0
@@ -989,7 +989,7 @@ STAGE PLANS:
name: default.t7
Group By Operator
aggregations:
- expr: count(VALUE._col0)
+ expr: count(1)
bucketGroup: false
keys:
expr: KEY._col0