You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/04/20 21:42:38 UTC

svn commit: r766829 - in /hadoop/hive/branches/branch-0.3: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java

Author: namit
Date: Mon Apr 20 19:42:38 2009
New Revision: 766829

URL: http://svn.apache.org/viewvc?rev=766829&view=rev
Log:
HIVE-432. Fix "SORT BY" using only one reducer.
(Zheng Shao via njain)


Modified:
    hadoop/hive/branches/branch-0.3/CHANGES.txt
    hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java

Modified: hadoop/hive/branches/branch-0.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/CHANGES.txt?rev=766829&r1=766828&r2=766829&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.3/CHANGES.txt Mon Apr 20 19:42:38 2009
@@ -155,6 +155,9 @@
     HIVE-404. Fix ordering in "SELECT * FROM t SORT BY col1 LIMIT 100" when
     query is a outer-most query.  (Namit Jain via zshao)
 
+    HIVE-432. Fix "SORT BY" using only one reducer.
+    (Zheng Shao via njain)
+
 Release 0.2.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=766829&r1=766828&r2=766829&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Mon Apr 20 19:42:38 2009
@@ -21,6 +21,7 @@
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.Random;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -119,6 +120,8 @@
   transient ArrayList<ObjectInspector> keyFieldsObjectInspectors = new ArrayList<ObjectInspector>();
   transient ArrayList<ObjectInspector> valueFieldsObjectInspectors = new ArrayList<ObjectInspector>();
   
+  transient Random random;
+  
   public void process(Object row, ObjectInspector rowInspector) throws HiveException {
     try {
       // Evaluate the keys
@@ -162,10 +165,21 @@
       }
       // Set the HashCode
       int keyHashCode = 0;
-      for(ExprNodeEvaluator e: partitionEval) {
-        e.evaluate(row, rowInspector, tempInspectableObject);
-        keyHashCode = keyHashCode * 31 
-          + (tempInspectableObject.o == null ? 0 : tempInspectableObject.o.hashCode());
+      if (partitionEval.length == 0) {
+        // If no partition cols, just distribute the data uniformly to provide better
+        // load balance.  If the requirement is to have a single reducer, we should set
+        // the number of reducers to 1.
+        // Use a constant seed to make the code deterministic.
+        if (random == null) {
+          random = new Random(12345);
+        }
+        keyHashCode = random.nextInt();
+      } else {
+        for(ExprNodeEvaluator e: partitionEval) {
+          e.evaluate(row, rowInspector, tempInspectableObject);
+          keyHashCode = keyHashCode * 31 
+            + (tempInspectableObject.o == null ? 0 : tempInspectableObject.o.hashCode());
+        }
       }
       keyWritable.setHashCode(keyHashCode);