You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/04/20 21:42:38 UTC
svn commit: r766829 - in /hadoop/hive/branches/branch-0.3: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
Author: namit
Date: Mon Apr 20 19:42:38 2009
New Revision: 766829
URL: http://svn.apache.org/viewvc?rev=766829&view=rev
Log:
HIVE-432. Fix "SORT BY" using only one reducer.
(Zheng Shao via njain)
Modified:
hadoop/hive/branches/branch-0.3/CHANGES.txt
hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
Modified: hadoop/hive/branches/branch-0.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/CHANGES.txt?rev=766829&r1=766828&r2=766829&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.3/CHANGES.txt Mon Apr 20 19:42:38 2009
@@ -155,6 +155,9 @@
HIVE-404. Fix ordering in "SELECT * FROM t SORT BY col1 LIMIT 100" when
query is a outer-most query. (Namit Jain via zshao)
+ HIVE-432. Fix "SORT BY" using only one reducer.
+ (Zheng Shao via njain)
+
Release 0.2.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=766829&r1=766828&r2=766829&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Mon Apr 20 19:42:38 2009
@@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -119,6 +120,8 @@
transient ArrayList<ObjectInspector> keyFieldsObjectInspectors = new ArrayList<ObjectInspector>();
transient ArrayList<ObjectInspector> valueFieldsObjectInspectors = new ArrayList<ObjectInspector>();
+ transient Random random;
+
public void process(Object row, ObjectInspector rowInspector) throws HiveException {
try {
// Evaluate the keys
@@ -162,10 +165,21 @@
}
// Set the HashCode
int keyHashCode = 0;
- for(ExprNodeEvaluator e: partitionEval) {
- e.evaluate(row, rowInspector, tempInspectableObject);
- keyHashCode = keyHashCode * 31
- + (tempInspectableObject.o == null ? 0 : tempInspectableObject.o.hashCode());
+ if (partitionEval.length == 0) {
+ // If no partition cols, just distribute the data uniformly to provide better
+ // load balance. If the requirement is to have a single reducer, we should set
+ // the number of reducers to 1.
+ // Use a constant seed to make the code deterministic.
+ if (random == null) {
+ random = new Random(12345);
+ }
+ keyHashCode = random.nextInt();
+ } else {
+ for(ExprNodeEvaluator e: partitionEval) {
+ e.evaluate(row, rowInspector, tempInspectableObject);
+ keyHashCode = keyHashCode * 31
+ + (tempInspectableObject.o == null ? 0 : tempInspectableObject.o.hashCode());
+ }
}
keyWritable.setHashCode(keyHashCode);