You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/03 21:31:33 UTC

svn commit: r1519805 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/correlation/ test/results/clientpositive/

Author: hashutosh
Date: Tue Sep  3 19:31:33 2013
New Revision: 1519805

URL: http://svn.apache.org/r1519805
Log:
HIVE-5149 : ReduceSinkDeDuplication can pick the wrong partitioning columns (Yin Huai via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
    hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
    hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java Tue Sep  3 19:31:33 2013
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
@@ -230,8 +231,13 @@ public class ReduceSinkDeDuplication imp
     }
 
     /**
-     * Current RSDedup remove/replace child RS. So always copies
+     * Current RSDedup remove/replace child RS. For key columns,
+     * sorting order, and the number of reducers, copy
      * more specific part of configurations of child RS to that of parent RS.
+     * For partitioning columns, if both child RS and parent RS have been assigned
+     * partitioning columns, we will choose the more general partitioning columns.
+     * If parent RS has not been assigned any partitioning column, we will use
+     * partitioning columns (if exist) of child RS.
      */
     protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
         throws SemanticException {
@@ -239,20 +245,57 @@ public class ReduceSinkDeDuplication imp
       if (result == null) {
         return false;
       }
+
       if (result[0] > 0) {
-        ArrayList<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
+        // The sorting columns of the child RS are more specific than
+        // those of the parent RS. Assign sorting columns of the child RS
+        // to the parent RS.
+        List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
         pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
       }
-      if (result[1] > 0) {
-        ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
-        pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+
+      if (result[1] < 0) {
+        // The partitioning columns of the parent RS are more specific than
+        // those of the child RS.
+        List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
+        if (childPCs != null && !childPCs.isEmpty()) {
+          // If partitioning columns of the child RS are assigned,
+          // assign these to the partitioning columns of the parent RS.
+          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+        }
+      } else if (result[1] > 0) {
+        // The partitioning columns of the child RS are more specific than
+        // those of the parent RS.
+        List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
+        if (parentPCs == null || parentPCs.isEmpty()) {
+          // If partitioning columns of the parent RS are not assigned,
+          // assign partitioning columns of the child RS to the parent RS.
+          ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
+          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
+        }
       }
+
       if (result[2] > 0) {
+        // The sorting order of the child RS is more specific than
+        // that of the parent RS. Assign the sorting order of the child RS
+        // to the parent RS.
+        if (result[0] <= 0) {
+          // Sorting columns of the parent RS are more specific than those of the
+          // child RS but Sorting order of the child RS is more specific than
+          // that of the parent RS.
+          throw new SemanticException("Sorting columns and order don't match. " +
+              "Try set " + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION + "=false;");
+        }
         pRS.getConf().setOrder(cRS.getConf().getOrder());
       }
+
       if (result[3] > 0) {
+        // The number of reducers of the child RS is more specific than
+        // that of the parent RS. Assign the number of reducers of the child RS
+        // to the parent RS.
         pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
       }
+
       return true;
     }
 

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby2.q.out Tue Sep  3 19:31:33 2013
@@ -43,8 +43,6 @@ STAGE PLANS:
                 Map-reduce partition columns:
                       expr: substr(key, 1, 1)
                       type: string
-                      expr: substr(value, 5)
-                      type: string
                 tag: -1
       Reduce Operator Tree:
         Group By Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby2_map_skew.q.out Tue Sep  3 19:31:33 2013
@@ -55,8 +55,6 @@ STAGE PLANS:
                   Map-reduce partition columns:
                         expr: _col0
                         type: string
-                        expr: _col1
-                        type: string
                   tag: -1
                   value expressions:
                         expr: _col2

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_cube1.q.out Tue Sep  3 19:31:33 2013
@@ -452,10 +452,6 @@ STAGE PLANS:
                   Map-reduce partition columns:
                         expr: _col0
                         type: string
-                        expr: _col1
-                        type: string
-                        expr: _col2
-                        type: string
                   tag: -1
                   value expressions:
                         expr: _col3

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_rollup1.q.out Tue Sep  3 19:31:33 2013
@@ -440,10 +440,6 @@ STAGE PLANS:
                   Map-reduce partition columns:
                         expr: _col0
                         type: string
-                        expr: _col1
-                        type: string
-                        expr: _col2
-                        type: string
                   tag: -1
                   value expressions:
                         expr: _col3

Modified: hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1519805&r1=1519804&r2=1519805&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue Sep  3 19:31:33 2013
@@ -608,8 +608,6 @@ STAGE PLANS:
                   Map-reduce partition columns:
                         expr: _col0
                         type: string
-                        expr: _col1
-                        type: string
                   tag: -1
       Reduce Operator Tree:
         Group By Operator
@@ -2874,8 +2872,6 @@ STAGE PLANS:
                 Map-reduce partition columns:
                       expr: key
                       type: string
-                      expr: value
-                      type: string
                 tag: -1
       Reduce Operator Tree:
         Group By Operator