You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2014/02/12 20:18:29 UTC
svn commit: r1567724 - in
/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util:
Hash.java MurmurHash3.java
Author: liyin
Date: Wed Feb 12 19:18:29 2014
New Revision: 1567724
URL: http://svn.apache.org/r1567724
Log:
[HBASE-10496] Add MurmurHash3
Author: gauravm
Summary:
I found that the hashing algorithm that we use is the 32-bit MurmurHash version 2.
It seems there is a newer hash function in the same family, which according to the authors is about 25-30% faster. (https://code.google.com/p/smhasher/source/browse/wiki/MurmurHash3.wiki)
I ran a benchmark to verify this in https://phabricator.fb.com/P5616093. The results are as follows:
Murmur Hash 3 time taken: 17.022
Murmur Hash 3 encoding speed: 1434.265215603337 MB/s
Murmur Hash 2 time taken: 20.446
Murmur Hash 2 encoding speed: 1194.0752469920767 MB/s
I will write another benchmark to verify the collision resistance of MurmurHash3 when I get some bandwidth, and check how it improves/worsens the performance of ByteBloomFilter.
This has also been ported to open-source HBase in HBASE-9631.
Test Plan: Unit Tests
Reviewers: liyintang, adela, aaiyer, manukranthk, fan, daviddeng, weichen
Reviewed By: manukranthk
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D1168344
Task ID: 3696318
Added:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java?rev=1567724&r1=1567723&r2=1567724&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java Wed Feb 12 19:18:29 2014
@@ -32,6 +32,8 @@ public abstract class Hash {
public static final int JENKINS_HASH = 0;
/** Constant to denote {@link MurmurHash}. */
public static final int MURMUR_HASH = 1;
+ /** Constant to denote {@link MurmurHash3}. */
+ public static final int MURMUR_HASH3 = 2;
/**
* This utility method converts String representation of hash function name
@@ -45,6 +47,8 @@ public abstract class Hash {
return JENKINS_HASH;
} else if ("murmur".equalsIgnoreCase(name)) {
return MURMUR_HASH;
+ } else if ("murmur3".equalsIgnoreCase(name)) {
+ return MURMUR_HASH3;
} else {
return INVALID_HASH;
}
@@ -68,12 +72,14 @@ public abstract class Hash {
*/
public static Hash getInstance(int type) {
switch(type) {
- case JENKINS_HASH:
- return JenkinsHash.getInstance();
- case MURMUR_HASH:
- return MurmurHash.getInstance();
- default:
- return null;
+ case JENKINS_HASH:
+ return JenkinsHash.getInstance();
+ case MURMUR_HASH:
+ return MurmurHash.getInstance();
+ case MURMUR_HASH3:
+ return MurmurHash3.getInstance();
+ default:
+ return null;
}
}
Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java?rev=1567724&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java Wed Feb 12 19:18:29 2014
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2014 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+/**
+ * This is a very fast, non-cryptographic hash suitable for general hash-based
+ * lookup. See https://code.google.com/p/smhasher/wiki/MurmurHash3 for more
+ * details.
+ *
+ * This implementation in Java was copied from the Mahout repository:
+ * https://svn.apache.org/repos/asf/mahout/tags/mahout-0.7/math/src/main/java/org/apache/mahout/math/MurmurHash3.java
+ *
+ * In simple benchmarks, we found the 32-bit MurmurHash3 to be about 25% in
+ * hashing than the 32-bit MurmurHash2. Which is in sync with what the authors
+ * claim in
+ * https://code.google.com/p/smhasher/source/browse/wiki/MurmurHash3.wiki
+ *
+ * TODO
+ * @gauravm: Test the collision resistance properties of MurmurHash3
+ */
+public class MurmurHash3 extends Hash {
+ private static MurmurHash3 _instance = new MurmurHash3();
+
+ public static Hash getInstance() {
+ return _instance;
+ }
+
+ @Override
+ public int hash(byte[] data, int offset, int len, int seed) {
+ int c1 = 0xcc9e2d51;
+ int c2 = 0x1b873593;
+
+ int h1 = seed;
+ int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
+
+ for (int i = offset; i < roundedEnd; i += 4) {
+ // little endian load order
+ int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) |
+ ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24);
+ k1 *= c1;
+ k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
+ h1 = h1 * 5 + 0xe6546b64;
+ }
+
+ // tail
+ int k1 = 0;
+
+ switch(len & 0x03) {
+ case 3:
+ k1 = (data[roundedEnd + 2] & 0xff) << 16;
+ // fallthrough
+ case 2:
+ k1 |= (data[roundedEnd + 1] & 0xff) << 8;
+ // fallthrough
+ case 1:
+ k1 |= data[roundedEnd] & 0xff;
+ k1 *= c1;
+ k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ default:
+ }
+
+ // finalization
+ h1 ^= len;
+
+ // fmix(h1);
+ h1 ^= h1 >>> 16;
+ h1 *= 0x85ebca6b;
+ h1 ^= h1 >>> 13;
+ h1 *= 0xc2b2ae35;
+ h1 ^= h1 >>> 16;
+
+ return h1;
+ }
+
+}