You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/11/10 01:32:49 UTC
hive git commit: HIVE-12354 : MapJoin with double keys is slow on MR
(Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Repository: hive
Updated Branches:
refs/heads/master 08e9d267c -> 41b60c444
HIVE-12354 : MapJoin with double keys is slow on MR (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41b60c44
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41b60c44
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41b60c44
Branch: refs/heads/master
Commit: 41b60c44401d92787227b5cdf2a51c20d28a2bc4
Parents: 08e9d26
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Nov 9 16:32:31 2015 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Nov 9 16:32:31 2015 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/KeyWrapperFactory.java | 20 +--------------
.../ql/exec/persistence/MapJoinKeyObject.java | 6 ++---
.../objectinspector/ObjectInspectorUtils.java | 26 ++++++++++++++++++++
3 files changed, 29 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/41b60c44/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
index 1c409a2..5154a5f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java
@@ -105,25 +105,7 @@ public class KeyWrapperFactory {
@Override
public void setHashKey() {
- if (keys == null) {
- hashcode = 0;
- } else {
- hashcode = 1;
- for (Object element : keys) {
- hashcode = 31 * hashcode;
- if(element != null) {
- if(element instanceof LazyDouble) {
- long v = Double.doubleToLongBits(((LazyDouble)element).getWritableObject().get());
- hashcode = hashcode + (int) (v ^ (v >>> 32));
- } else if (element instanceof DoubleWritable){
- long v = Double.doubleToLongBits(((DoubleWritable)element).get());
- hashcode = hashcode + (int) (v ^ (v >>> 32));
- } else {
- hashcode = hashcode + element.hashCode();
- }
- }
- }
- }
+ hashcode = ObjectInspectorUtils.writableArrayHashCode(keys);
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/41b60c44/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
index e1fd6d3..7592f9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKeyObject.java
@@ -78,11 +78,9 @@ public class MapJoinKeyObject extends MapJoinKey {
@Override
public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + Arrays.hashCode(key);
- return result;
+ return ObjectInspectorUtils.writableArrayHashCode(key);
}
+
@Override
public boolean equals(Object obj) {
if (this == obj)
http://git-wip-us.apache.org/repos/asf/hive/blob/41b60c44/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 56597a2..7a13eb0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.lazy.LazyDouble;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector;
@@ -77,6 +78,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectIn
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;
@@ -104,6 +106,30 @@ public final class ObjectInspectorUtils {
}
/**
+ * Calculates the hash code for array of Objects that contains writables. This is used
+ * to work around the buggy Hadoop DoubleWritable hashCode implementation. This should
+ * only be used for process-local hash codes; don't replace stored hash codes like bucketing.
+ */
+ public static int writableArrayHashCode(Object[] keys) {
+ if (keys == null) return 0;
+ int hashcode = 1;
+ for (Object element : keys) {
+ hashcode = 31 * hashcode;
+ if (element == null) continue;
+ if (element instanceof LazyDouble) {
+ long v = Double.doubleToLongBits(((LazyDouble)element).getWritableObject().get());
+ hashcode = hashcode + (int) (v ^ (v >>> 32));
+ } else if (element instanceof DoubleWritable){
+ long v = Double.doubleToLongBits(((DoubleWritable)element).get());
+ hashcode = hashcode + (int) (v ^ (v >>> 32));
+ } else {
+ hashcode = hashcode + element.hashCode();
+ }
+ }
+ return hashcode;
+ }
+
+ /**
* Ensures that an ObjectInspector is Writable.
*/
public static ObjectInspector getWritableObjectInspector(ObjectInspector oi) {