You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2014/11/08 01:03:06 UTC

[05/16] incubator-drill git commit: DRILL-1525: Modify hash functions to use XXHash algorithm

DRILL-1525: Modify hash functions to use XXHash algorithm


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/2e07c62f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/2e07c62f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/2e07c62f

Branch: refs/heads/master
Commit: 2e07c62f6bd9db240f745b18716b17fecd88d6ef
Parents: 3b8dd3b
Author: Mehant <me...@gmail.com>
Authored: Thu Aug 28 12:31:32 2014 -0700
Committer: Jinfeng Ni <jn...@maprtech.com>
Committed: Fri Nov 7 10:50:55 2014 -0800

----------------------------------------------------------------------
 .../drill/exec/expr/fn/impl/HashFunctions.java  |  69 ++++----
 .../apache/drill/exec/expr/fn/impl/XXHash.java  | 173 +++++++++++++++++++
 2 files changed, 207 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/2e07c62f/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
index 7f6d8a5..dd8ac88 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
@@ -69,7 +69,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(Float.floatToIntBits(in.value)).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -84,7 +84,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(Float.floatToIntBits(in.value)).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -101,7 +101,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(Double.doubleToLongBits(in.value)).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -116,7 +116,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(Double.doubleToLongBits(in.value)).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -133,7 +133,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
       }
     }
   }
@@ -151,7 +151,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
       }
     }
   }
@@ -169,7 +169,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
       }
     }
   }
@@ -184,11 +184,11 @@ public class HashFunctions {
     }
 
     public void eval() {
-      // TODO: implement hash function for other types
       if (in.isSet == 0) {
         out.value = 0;
-      } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+      }
+      else {
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -202,11 +202,11 @@ public class HashFunctions {
     }
 
     public void eval() {
-      // TODO: implement hash function for other types
       if (in.isSet == 0) {
         out.value = 0;
-      } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+      }
+      else {
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -221,7 +221,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
     }
   }
 
@@ -235,7 +235,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
     }
   }
 
@@ -249,7 +249,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
     }
   }
 
@@ -263,8 +263,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      // TODO: implement hash function for other types
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -278,7 +277,7 @@ public class HashFunctions {
 
     public void eval() {
       // TODO: implement hash function for other types
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
   @FunctionTemplate(name = "hash", scope = FunctionScope.SIMPLE, nulls = FunctionTemplate.NullHandling.INTERNAL)
@@ -290,7 +289,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -306,7 +305,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -320,7 +319,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -336,7 +335,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -350,7 +349,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -366,7 +365,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -380,7 +379,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value ^ in.index).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value ^ in.index, 0);
     }
   }
 
@@ -396,7 +395,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value ^ in.index).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value ^ in.index, 0);
       }
     }
   }
@@ -410,7 +409,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -426,7 +425,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -440,7 +439,7 @@ public class HashFunctions {
     }
 
     public void eval() {
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
     }
   }
 
@@ -456,7 +455,7 @@ public class HashFunctions {
       if (in.isSet == 0) {
         out.value = 0;
       } else {
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
       }
     }
   }
@@ -475,7 +474,7 @@ public class HashFunctions {
       for (int i = 0; i < in.nDecimalDigits; i++) {
         xor = xor ^ Decimal28SparseHolder.getInteger(i, in.start, in.buffer);
       }
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
     }
   }
 
@@ -495,7 +494,7 @@ public class HashFunctions {
         for (int i = 0; i < in.nDecimalDigits; i++) {
           xor = xor ^ NullableDecimal28SparseHolder.getInteger(i, in.start, in.buffer);
         }
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
       }
     }
   }
@@ -514,7 +513,7 @@ public class HashFunctions {
       for (int i = 0; i < in.nDecimalDigits; i++) {
         xor = xor ^ Decimal38SparseHolder.getInteger(i, in.start, in.buffer);
       }
-      out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+      out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
     }
   }
 
@@ -534,7 +533,7 @@ public class HashFunctions {
         for (int i = 0; i < in.nDecimalDigits; i++) {
           xor = xor ^ NullableDecimal38SparseHolder.getInteger(i, in.start, in.buffer);
         }
-        out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+        out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/2e07c62f/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java
new file mode 100644
index 0000000..a8a6484
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+import io.netty.buffer.DrillBuf;
+import io.netty.util.internal.PlatformDependent;
+
+import com.google.common.primitives.UnsignedLongs;
+
+public final class XXHash {
+  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(XXHash.class);
+
+  static final long PRIME64_1 = UnsignedLongs.decode("11400714785074694791");
+  static final long PRIME64_2 = UnsignedLongs.decode("14029467366897019727");
+  static final long PRIME64_3 = UnsignedLongs.decode("1609587929392839161");
+  static final long PRIME64_4 = UnsignedLongs.decode("9650029242287828579");
+  static final long PRIME64_5 = UnsignedLongs.decode("2870177450012600261");
+
+  public static long hash64(long start, long bEnd, long seed) {
+    long len = bEnd - start;
+    long h64;
+    long p = start;
+
+    // for long strings (greater than 32 bytes)
+    if (len >= 32) {
+      final long limit = bEnd - 32;
+      long v1 = seed + PRIME64_1 + PRIME64_2;
+      long v2 = seed + PRIME64_2;
+      long v3 = seed + 0;
+      long v4 = seed - PRIME64_1;
+
+      do {
+        v1 += PlatformDependent.getLong(p) * PRIME64_2;
+        p = p + 8;
+        v1 = Long.rotateLeft(v1, 31);
+        v1 *= PRIME64_1;
+
+        v2 += PlatformDependent.getLong(p) * PRIME64_2;
+        p = p + 8;
+        v2 = Long.rotateLeft(v2, 31);
+        v2 *= PRIME64_1;
+
+        v3 += PlatformDependent.getLong(p) * PRIME64_2;
+        p = p + 8;
+        v3 = Long.rotateLeft(v3, 31);
+        v3 *= PRIME64_1;
+
+        v4 += PlatformDependent.getLong(p) * PRIME64_2;
+        p = p + 8;
+        v4 = Long.rotateLeft(v4, 31);
+        v4 *= PRIME64_1;
+      } while (p <= limit);
+
+      h64 = Long.rotateLeft(v1, 1) + Long.rotateLeft(v2, 7) + Long.rotateLeft(v3, 12) + Long.rotateLeft(v4, 18);
+
+      v1 *= PRIME64_2;
+      v1 = Long.rotateLeft(v1, 31);
+      v1 *= PRIME64_1;
+      h64 ^= v1;
+
+      h64 = h64 * PRIME64_1 + PRIME64_4;
+
+      v2 *= PRIME64_2;
+      v2 = Long.rotateLeft(v2, 31);
+      v2 *= PRIME64_1;
+      h64 ^= v2;
+
+      h64 = h64 * PRIME64_1 + PRIME64_4;
+
+      v3 *= PRIME64_2;
+      v3 = Long.rotateLeft(v3, 31);
+      v3 *= PRIME64_1;
+      h64 ^= v3;
+
+      h64 = h64 * PRIME64_1 + PRIME64_4;
+
+      v4 *= PRIME64_2;
+      v4 = Long.rotateLeft(v4, 31);
+      v4 *= PRIME64_1;
+      h64 ^= v4;
+
+      h64 = h64 * PRIME64_1 + PRIME64_4;
+    } else {
+      h64 = seed + PRIME64_5;
+    }
+
+    h64 += len;
+
+    while (p + 8 <= bEnd) {
+      long k1 = PlatformDependent.getLong(p);
+      k1 *= PRIME64_2;
+      k1 = Long.rotateLeft(k1, 31);
+      k1 *= PRIME64_1;
+      h64 ^= k1;
+      h64 = Long.rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4;
+      p += 8;
+    }
+
+    if (p + 4 <= bEnd) {
+      h64 ^= PlatformDependent.getInt(p) * PRIME64_1;
+      h64 = Long.rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3;
+      p += 4;
+    }
+    while (p < bEnd) {
+      h64 ^= PlatformDependent.getByte(p) * PRIME64_5;
+      h64 = Long.rotateLeft(h64, 11) * PRIME64_1;
+      p++;
+    }
+
+    return applyFinalHashComputation(h64);
+  }
+
+  public static int hash32(int start, int end, DrillBuf buffer){
+    long s = buffer.memoryAddress() + start;
+    long e = buffer.memoryAddress() + end;
+    return hash32(s, e, 0);
+  }
+
+  public static int hash32(int val, int seed){
+    long h64 = seed + PRIME64_5;
+    h64 += 4; // add length (4 bytes) to hash value
+    h64 ^= val * PRIME64_1;
+    h64 = Long.rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3;
+    return (int) applyFinalHashComputation(h64);
+  }
+
+  public static int hash32(float val, int seed){
+    return hash32(Float.floatToIntBits(val), seed);
+  }
+
+  public static int hash32(double val, int seed){
+    return hash32(Double.doubleToLongBits(val), seed);
+  }
+
+  public static int hash32(long val, int seed){
+    long h64 = seed + PRIME64_5;
+    h64 += 8; // add length (8 bytes) to hash value
+    long k1 = val* PRIME64_2;
+    k1 = Long.rotateLeft(k1, 31);
+    k1 *= PRIME64_1;
+    h64 ^= k1;
+    h64 = Long.rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4;
+    return (int) applyFinalHashComputation(h64);
+  }
+
+  public static int hash32(long start, long bEnd, long seed){
+    return (int) hash64(start, bEnd, seed);
+  }
+
+  private static long applyFinalHashComputation(long h64) {
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+  }
+}