You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2014/02/12 20:18:29 UTC

svn commit: r1567724 - in /hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util: Hash.java MurmurHash3.java

Author: liyin
Date: Wed Feb 12 19:18:29 2014
New Revision: 1567724

URL: http://svn.apache.org/r1567724
Log:
[HBASE-10496] Add MurmurHash3

Author: gauravm

Summary:
I found that the hashing algorithm that we use is the 32-bit MurmurHash version 2.

It seems there is a newer hash function in the same family, which according to the authors is about 25-30% faster. (https://code.google.com/p/smhasher/source/browse/wiki/MurmurHash3.wiki)

I ran a benchmark to verify this in https://phabricator.fb.com/P5616093. The results are as follows:
Murmur Hash 3 time taken: 17.022
Murmur Hash 3 encoding speed: 1434.265215603337 MB/s

Murmur Hash 2 time taken: 20.446
Murmur Hash 2 encoding speed: 1194.0752469920767 MB/s

I will write another benchmark to verify the collision resistance of MurmurHash3 when I get some bandwidth, and check how it improves/worsens the performance of ByteBloomFilter.

This has also been ported to open-source HBase in HBASE-9631.

Test Plan: Unit Tests

Reviewers: liyintang, adela, aaiyer, manukranthk, fan, daviddeng, weichen

Reviewed By: manukranthk

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D1168344

Task ID: 3696318

Added:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java
Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java?rev=1567724&r1=1567723&r2=1567724&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Hash.java Wed Feb 12 19:18:29 2014
@@ -32,6 +32,8 @@ public abstract class Hash {
   public static final int JENKINS_HASH = 0;
   /** Constant to denote {@link MurmurHash}. */
   public static final int MURMUR_HASH  = 1;
+  /** Constant to denote {@link MurmurHash3}. */
+  public static final int MURMUR_HASH3 = 2;
 
   /**
    * This utility method converts String representation of hash function name
@@ -45,6 +47,8 @@ public abstract class Hash {
       return JENKINS_HASH;
     } else if ("murmur".equalsIgnoreCase(name)) {
       return MURMUR_HASH;
+    } else if ("murmur3".equalsIgnoreCase(name)) {
+      return MURMUR_HASH3;
     } else {
       return INVALID_HASH;
     }
@@ -68,12 +72,14 @@ public abstract class Hash {
    */
   public static Hash getInstance(int type) {
     switch(type) {
-    case JENKINS_HASH:
-      return JenkinsHash.getInstance();
-    case MURMUR_HASH:
-      return MurmurHash.getInstance();
-    default:
-      return null;
+      case JENKINS_HASH:
+        return JenkinsHash.getInstance();
+      case MURMUR_HASH:
+        return MurmurHash.getInstance();
+      case MURMUR_HASH3:
+        return MurmurHash3.getInstance();
+      default:
+        return null;
     }
   }
 

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java?rev=1567724&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java Wed Feb 12 19:18:29 2014
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2014 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+/**
+ * This is a very fast, non-cryptographic hash suitable for general hash-based
+ * lookup.  See https://code.google.com/p/smhasher/wiki/MurmurHash3 for more
+ * details.
+ *
+ * This implementation in Java was copied from the Mahout repository:
+ * https://svn.apache.org/repos/asf/mahout/tags/mahout-0.7/math/src/main/java/org/apache/mahout/math/MurmurHash3.java
+ *
+ * In simple benchmarks, we found the 32-bit MurmurHash3 to be about 25% in
+ * hashing than the 32-bit MurmurHash2. Which is in sync with what the authors
+ * claim in
+ * https://code.google.com/p/smhasher/source/browse/wiki/MurmurHash3.wiki
+ *
+ * TODO
+ * @gauravm: Test the collision resistance properties of MurmurHash3
+ */
+public class MurmurHash3 extends Hash {
+  private static MurmurHash3 _instance = new MurmurHash3();
+
+  public static Hash getInstance() {
+    return _instance;
+  }
+
+  @Override
+  public int hash(byte[] data, int offset, int len, int seed) {
+    int c1 = 0xcc9e2d51;
+    int c2 = 0x1b873593;
+
+    int h1 = seed;
+    int roundedEnd = offset + (len & 0xfffffffc);  // round down to 4 byte block
+
+    for (int i = offset; i < roundedEnd; i += 4) {
+      // little endian load order
+      int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) |
+        ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24);
+      k1 *= c1;
+      k1 = (k1 << 15) | (k1 >>> 17);  // ROTL32(k1,15);
+      k1 *= c2;
+
+      h1 ^= k1;
+      h1 = (h1 << 13) | (h1 >>> 19);  // ROTL32(h1,13);
+      h1 = h1 * 5 + 0xe6546b64;
+    }
+
+    // tail
+    int k1 = 0;
+
+    switch(len & 0x03) {
+      case 3:
+        k1 = (data[roundedEnd + 2] & 0xff) << 16;
+        // fallthrough
+      case 2:
+        k1 |= (data[roundedEnd + 1] & 0xff) << 8;
+        // fallthrough
+      case 1:
+        k1 |= data[roundedEnd] & 0xff;
+        k1 *= c1;
+        k1 = (k1 << 15) | (k1 >>> 17);  // ROTL32(k1,15);
+        k1 *= c2;
+        h1 ^= k1;
+      default:
+    }
+
+    // finalization
+    h1 ^= len;
+
+    // fmix(h1);
+    h1 ^= h1 >>> 16;
+    h1 *= 0x85ebca6b;
+    h1 ^= h1 >>> 13;
+    h1 *= 0xc2b2ae35;
+    h1 ^= h1 >>> 16;
+
+    return h1;
+  }
+
+}