You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/03 21:31:33 UTC
svn commit: r1519805 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/correlation/
test/results/clientpositive/
Author: hashutosh
Date: Tue Sep 3 19:31:33 2013
New Revision: 1519805
URL: http://svn.apache.org/r1519805
Log:
HIVE-5149 : ReduceSinkDeDuplication can pick the wrong partitioning columns (Yin Huai via Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java Tue Sep 3 19:31:33 2013
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.Stack;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
@@ -230,8 +231,13 @@ public class ReduceSinkDeDuplication imp
}
/**
- * Current RSDedup remove/replace child RS. So always copies
+ * Current RSDedup remove/replace child RS. For key columns,
+ * sorting order, and the number of reducers, copy
* more specific part of configurations of child RS to that of parent RS.
+ * For partitioning columns, if both child RS and parent RS have been assigned
+ * partitioning columns, we will choose the more general partitioning columns.
+ * If parent RS has not been assigned any partitioning column, we will use
+ * partitioning columns (if exist) of child RS.
*/
protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
throws SemanticException {
@@ -239,20 +245,57 @@ public class ReduceSinkDeDuplication imp
if (result == null) {
return false;
}
+
if (result[0] > 0) {
- ArrayList<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
+ // The sorting columns of the child RS are more specific than
+ // those of the parent RS. Assign sorting columns of the child RS
+ // to the parent RS.
+ List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
}
- if (result[1] > 0) {
- ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
- pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+
+ if (result[1] < 0) {
+ // The partitioning columns of the parent RS are more specific than
+ // those of the child RS.
+ List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
+ if (childPCs != null && !childPCs.isEmpty()) {
+ // If partitioning columns of the child RS are assigned,
+ // assign these to the partitioning columns of the parent RS.
+ pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+ }
+ } else if (result[1] > 0) {
+ // The partitioning columns of the child RS are more specific than
+ // those of the parent RS.
+ List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
+ if (parentPCs == null || parentPCs.isEmpty()) {
+ // If partitioning columns of the parent RS are not assigned,
+ // assign partitioning columns of the child RS to the parent RS.
+ ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
+ pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+ }
}
+
if (result[2] > 0) {
+ // The sorting order of the child RS is more specific than
+ // that of the parent RS. Assign the sorting order of the child RS
+ // to the parent RS.
+ if (result[0] <= 0) {
+ // Sorting columns of the parent RS are more specific than those of the
+ // child RS but Sorting order of the child RS is more specific than
+ // that of the parent RS.
+ throw new SemanticException("Sorting columns and order don't match. " +
+ "Try set " + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION + "=false;");
+ }
pRS.getConf().setOrder(cRS.getConf().getOrder());
}
+
if (result[3] > 0) {
+ // The number of reducers of the child RS is more specific than
+ // that of the parent RS. Assign the number of reducers of the child RS
+ // to the parent RS.
pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
}
+
return true;
}
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out Tue Sep 3 19:31:33 2013
@@ -43,8 +43,6 @@ STAGE PLANS:
Map-reduce partition columns:
expr: substr(key, 1, 1)
type: string
- expr: substr(value, 5)
- type: string
tag: -1
Reduce Operator Tree:
Group By Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out Tue Sep 3 19:31:33 2013
@@ -55,8 +55,6 @@ STAGE PLANS:
Map-reduce partition columns:
expr: _col0
type: string
- expr: _col1
- type: string
tag: -1
value expressions:
expr: _col2
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out Tue Sep 3 19:31:33 2013
@@ -452,10 +452,6 @@ STAGE PLANS:
Map-reduce partition columns:
expr: _col0
type: string
- expr: _col1
- type: string
- expr: _col2
- type: string
tag: -1
value expressions:
expr: _col3
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out Tue Sep 3 19:31:33 2013
@@ -440,10 +440,6 @@ STAGE PLANS:
Map-reduce partition columns:
expr: _col0
type: string
- expr: _col1
- type: string
- expr: _col2
- type: string
tag: -1
value expressions:
expr: _col3
Modified: hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue Sep 3 19:31:33 2013
@@ -608,8 +608,6 @@ STAGE PLANS:
Map-reduce partition columns:
expr: _col0
type: string
- expr: _col1
- type: string
tag: -1
Reduce Operator Tree:
Group By Operator
@@ -2874,8 +2872,6 @@ STAGE PLANS:
Map-reduce partition columns:
expr: key
type: string
- expr: value
- type: string
tag: -1
Reduce Operator Tree:
Group By Operator