You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by jn...@apache.org on 2014/11/08 01:03:06 UTC
[05/16] incubator-drill git commit: DRILL-1525: Modify hash functions
to use XXHash algorithm
DRILL-1525: Modify hash functions to use XXHash algorithm
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/2e07c62f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/2e07c62f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/2e07c62f
Branch: refs/heads/master
Commit: 2e07c62f6bd9db240f745b18716b17fecd88d6ef
Parents: 3b8dd3b
Author: Mehant <me...@gmail.com>
Authored: Thu Aug 28 12:31:32 2014 -0700
Committer: Jinfeng Ni <jn...@maprtech.com>
Committed: Fri Nov 7 10:50:55 2014 -0800
----------------------------------------------------------------------
.../drill/exec/expr/fn/impl/HashFunctions.java | 69 ++++----
.../apache/drill/exec/expr/fn/impl/XXHash.java | 173 +++++++++++++++++++
2 files changed, 207 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/2e07c62f/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
index 7f6d8a5..dd8ac88 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/HashFunctions.java
@@ -69,7 +69,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(Float.floatToIntBits(in.value)).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -84,7 +84,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(Float.floatToIntBits(in.value)).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -101,7 +101,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(Double.doubleToLongBits(in.value)).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -116,7 +116,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(Double.doubleToLongBits(in.value)).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -133,7 +133,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
}
}
}
@@ -151,7 +151,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
}
}
}
@@ -169,7 +169,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
}
}
}
@@ -184,11 +184,11 @@ public class HashFunctions {
}
public void eval() {
- // TODO: implement hash function for other types
if (in.isSet == 0) {
out.value = 0;
- } else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ }
+ else {
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -202,11 +202,11 @@ public class HashFunctions {
}
public void eval() {
- // TODO: implement hash function for other types
if (in.isSet == 0) {
out.value = 0;
- } else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+ }
+ else {
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -221,7 +221,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
}
}
@@ -235,7 +235,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
}
}
@@ -249,7 +249,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = org.apache.drill.exec.expr.fn.impl.HashHelper.hash(in.buffer.nioBuffer(in.start, in.end - in.start), 0);
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.start, in.end, in.buffer);
}
}
@@ -263,8 +263,7 @@ public class HashFunctions {
}
public void eval() {
- // TODO: implement hash function for other types
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -278,7 +277,7 @@ public class HashFunctions {
public void eval() {
// TODO: implement hash function for other types
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@FunctionTemplate(name = "hash", scope = FunctionScope.SIMPLE, nulls = FunctionTemplate.NullHandling.INTERNAL)
@@ -290,7 +289,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -306,7 +305,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -320,7 +319,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -336,7 +335,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -350,7 +349,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -366,7 +365,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -380,7 +379,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value ^ in.index).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value ^ in.index, 0);
}
}
@@ -396,7 +395,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value ^ in.index).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value ^ in.index, 0);
}
}
}
@@ -410,7 +409,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -426,7 +425,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -440,7 +439,7 @@ public class HashFunctions {
}
public void eval() {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
@@ -456,7 +455,7 @@ public class HashFunctions {
if (in.isSet == 0) {
out.value = 0;
} else {
- out.value = com.google.common.hash.Hashing.murmur3_128().hashLong(in.value).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(in.value, 0);
}
}
}
@@ -475,7 +474,7 @@ public class HashFunctions {
for (int i = 0; i < in.nDecimalDigits; i++) {
xor = xor ^ Decimal28SparseHolder.getInteger(i, in.start, in.buffer);
}
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
}
}
@@ -495,7 +494,7 @@ public class HashFunctions {
for (int i = 0; i < in.nDecimalDigits; i++) {
xor = xor ^ NullableDecimal28SparseHolder.getInteger(i, in.start, in.buffer);
}
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
}
}
}
@@ -514,7 +513,7 @@ public class HashFunctions {
for (int i = 0; i < in.nDecimalDigits; i++) {
xor = xor ^ Decimal38SparseHolder.getInteger(i, in.start, in.buffer);
}
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
}
}
@@ -534,7 +533,7 @@ public class HashFunctions {
for (int i = 0; i < in.nDecimalDigits; i++) {
xor = xor ^ NullableDecimal38SparseHolder.getInteger(i, in.start, in.buffer);
}
- out.value = com.google.common.hash.Hashing.murmur3_128().hashInt(xor).asInt();
+ out.value = org.apache.drill.exec.expr.fn.impl.XXHash.hash32(xor, 0);
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/2e07c62f/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java
new file mode 100644
index 0000000..a8a6484
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/XXHash.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+import io.netty.buffer.DrillBuf;
+import io.netty.util.internal.PlatformDependent;
+
+import com.google.common.primitives.UnsignedLongs;
+
+public final class XXHash {
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(XXHash.class);
+
+ static final long PRIME64_1 = UnsignedLongs.decode("11400714785074694791");
+ static final long PRIME64_2 = UnsignedLongs.decode("14029467366897019727");
+ static final long PRIME64_3 = UnsignedLongs.decode("1609587929392839161");
+ static final long PRIME64_4 = UnsignedLongs.decode("9650029242287828579");
+ static final long PRIME64_5 = UnsignedLongs.decode("2870177450012600261");
+
+ public static long hash64(long start, long bEnd, long seed) {
+ long len = bEnd - start;
+ long h64;
+ long p = start;
+
+ // for long strings (greater than 32 bytes)
+ if (len >= 32) {
+ final long limit = bEnd - 32;
+ long v1 = seed + PRIME64_1 + PRIME64_2;
+ long v2 = seed + PRIME64_2;
+ long v3 = seed + 0;
+ long v4 = seed - PRIME64_1;
+
+ do {
+ v1 += PlatformDependent.getLong(p) * PRIME64_2;
+ p = p + 8;
+ v1 = Long.rotateLeft(v1, 31);
+ v1 *= PRIME64_1;
+
+ v2 += PlatformDependent.getLong(p) * PRIME64_2;
+ p = p + 8;
+ v2 = Long.rotateLeft(v2, 31);
+ v2 *= PRIME64_1;
+
+ v3 += PlatformDependent.getLong(p) * PRIME64_2;
+ p = p + 8;
+ v3 = Long.rotateLeft(v3, 31);
+ v3 *= PRIME64_1;
+
+ v4 += PlatformDependent.getLong(p) * PRIME64_2;
+ p = p + 8;
+ v4 = Long.rotateLeft(v4, 31);
+ v4 *= PRIME64_1;
+ } while (p <= limit);
+
+ h64 = Long.rotateLeft(v1, 1) + Long.rotateLeft(v2, 7) + Long.rotateLeft(v3, 12) + Long.rotateLeft(v4, 18);
+
+ v1 *= PRIME64_2;
+ v1 = Long.rotateLeft(v1, 31);
+ v1 *= PRIME64_1;
+ h64 ^= v1;
+
+ h64 = h64 * PRIME64_1 + PRIME64_4;
+
+ v2 *= PRIME64_2;
+ v2 = Long.rotateLeft(v2, 31);
+ v2 *= PRIME64_1;
+ h64 ^= v2;
+
+ h64 = h64 * PRIME64_1 + PRIME64_4;
+
+ v3 *= PRIME64_2;
+ v3 = Long.rotateLeft(v3, 31);
+ v3 *= PRIME64_1;
+ h64 ^= v3;
+
+ h64 = h64 * PRIME64_1 + PRIME64_4;
+
+ v4 *= PRIME64_2;
+ v4 = Long.rotateLeft(v4, 31);
+ v4 *= PRIME64_1;
+ h64 ^= v4;
+
+ h64 = h64 * PRIME64_1 + PRIME64_4;
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += len;
+
+ while (p + 8 <= bEnd) {
+ long k1 = PlatformDependent.getLong(p);
+ k1 *= PRIME64_2;
+ k1 = Long.rotateLeft(k1, 31);
+ k1 *= PRIME64_1;
+ h64 ^= k1;
+ h64 = Long.rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4;
+ p += 8;
+ }
+
+ if (p + 4 <= bEnd) {
+ h64 ^= PlatformDependent.getInt(p) * PRIME64_1;
+ h64 = Long.rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3;
+ p += 4;
+ }
+ while (p < bEnd) {
+ h64 ^= PlatformDependent.getByte(p) * PRIME64_5;
+ h64 = Long.rotateLeft(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ return applyFinalHashComputation(h64);
+ }
+
+ public static int hash32(int start, int end, DrillBuf buffer){
+ long s = buffer.memoryAddress() + start;
+ long e = buffer.memoryAddress() + end;
+ return hash32(s, e, 0);
+ }
+
+ public static int hash32(int val, int seed){
+ long h64 = seed + PRIME64_5;
+ h64 += 4; // add length (4 bytes) to hash value
+ h64 ^= val * PRIME64_1;
+ h64 = Long.rotateLeft(h64, 23) * PRIME64_2 + PRIME64_3;
+ return (int) applyFinalHashComputation(h64);
+ }
+
+ public static int hash32(float val, int seed){
+ return hash32(Float.floatToIntBits(val), seed);
+ }
+
+ public static int hash32(double val, int seed){
+ return hash32(Double.doubleToLongBits(val), seed);
+ }
+
+ public static int hash32(long val, int seed){
+ long h64 = seed + PRIME64_5;
+ h64 += 8; // add length (8 bytes) to hash value
+ long k1 = val* PRIME64_2;
+ k1 = Long.rotateLeft(k1, 31);
+ k1 *= PRIME64_1;
+ h64 ^= k1;
+ h64 = Long.rotateLeft(h64, 27) * PRIME64_1 + PRIME64_4;
+ return (int) applyFinalHashComputation(h64);
+ }
+
+ public static int hash32(long start, long bEnd, long seed){
+ return (int) hash64(start, bEnd, seed);
+ }
+
+ private static long applyFinalHashComputation(long h64) {
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+ }
+}