You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2014/04/02 22:49:20 UTC

svn commit: r1584166 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hbase/util/

Author: liyin
Date: Wed Apr  2 20:49:19 2014
New Revision: 1584166

URL: http://svn.apache.org/r1584166
Log:
[HBASE-10759] Testing the performance of Guava comparator.

Author: manukranthk

Summary: Adding guava comparator into the Bytes.compareTo function which is very fundamental to HBase compare functions and comparators.

Test Plan: Added a unit test which suggests that the guava comparator give slight improvement.

Reviewers: rshroff, liyintang, daviddeng

Reviewed By: daviddeng

CC: hbase-eng@, platform-diffs@lists

Differential Revision: https://phabricator.fb.com/D785376

Task ID: 2268161

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1584166&r1=1584165&r2=1584166&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Wed Apr  2 20:49:19 2014
@@ -1062,6 +1062,10 @@ public final class HConstants {
       "hbase.client.use.conf.from.server";
   public static final boolean DEFAULT_USE_CONF_FROM_SERVER = true;
 
+  public static final String USE_GUAVA_BYTES_COMPARISION =
+      "hbase.regionserver.use.guava.bytes.comparision";
+  public static boolean DEFAULT_USE_GUAVA_BYTES_COMPARISION = false;
+
   private HConstants() {
     // Can't be instantiated with this constructor.
   }

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1584166&r1=1584165&r2=1584166&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Apr  2 20:49:19 2014
@@ -517,6 +517,11 @@ public class HRegionServer implements HR
     int maxPreloadThreads =
         conf.getInt(HConstants.MAX_PRELOAD_THREAD_COUNT, HConstants.DEFAULT_MAX_PRELOAD_THREAD_COUNT);
     PreloadThreadPool.constructPreloaderThreadPool(corePreloadThreads, maxPreloadThreads);
+
+    // Configure use of Guava bytes comparator.
+    Bytes.useGuavaBytesComparision = conf.getBoolean(
+        HConstants.USE_GUAVA_BYTES_COMPARISION,
+        HConstants.DEFAULT_USE_GUAVA_BYTES_COMPARISION);
   }
 
   /**

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java?rev=1584166&r1=1584165&r2=1584166&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java Wed Apr  2 20:49:19 2014
@@ -23,8 +23,12 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
+import java.lang.reflect.Field;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Iterator;
@@ -41,6 +45,8 @@ import org.apache.thrift.protocol.TProto
 import org.apache.thrift.transport.TMemoryBuffer;
 import org.apache.thrift.transport.TMemoryInputTransport;
 
+import sun.misc.Unsafe;
+
 import com.facebook.nifty.header.protocol.TFacebookCompactProtocol;
 import com.facebook.swift.codec.ThriftCodec;
 
@@ -970,6 +976,9 @@ public class Bytes {
     return compareTo(left, 0, left.length, right, 0, right.length);
   }
 
+  public static boolean useGuavaBytesComparision =
+      HConstants.DEFAULT_USE_GUAVA_BYTES_COMPARISION;
+
   /**
    * Lexographically compare two arrays.
    *
@@ -983,17 +992,218 @@ public class Bytes {
    */
   public static int compareTo(byte[] buffer1, int offset1, int length1,
       byte[] buffer2, int offset2, int length2) {
-    // Bring WritableComparator code local
-    int end1 = offset1 + length1;
-    int end2 = offset2 + length2;
-    for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
-      int a = (buffer1[i] & 0xff);
-      int b = (buffer2[j] & 0xff);
-      if (a != b) {
-        return a - b;
+    if (!useGuavaBytesComparision || (length1 < LONG_BYTES_X_2)
+        || (length2 < LONG_BYTES_X_2)) {
+      // Bring WritableComparator code local
+      int end1 = offset1 + length1;
+      int end2 = offset2 + length2;
+      for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+        int a = (buffer1[i] & 0xff);
+        int b = (buffer2[j] & 0xff);
+        if (a != b) {
+          return a - b;
+        }
+      }
+      return length1 - length2;
+    } else {
+      return LexicographicalComparerHolder.BEST_COMPARER.compareTo(
+          buffer1, offset1, length1, buffer2, offset2, length2);
+    }
+  }
+
+  /**
+   * The number of bytes required to represent a primitive {@code long}
+   * value.
+   */
+  public static final int LONG_BYTES = Long.SIZE / Byte.SIZE;
+  public static final int LONG_BYTES_X_2 = LONG_BYTES * 2;
+
+  interface Comparer<T> {
+    abstract public int compareTo(T buffer1, int offset1, int length1,
+        T buffer2, int offset2, int length2);
+  }
+
+  static Comparer<byte[]> lexicographicalComparerJavaImpl() {
+    return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
+  }
+
+  /**
+   * Provides a lexicographical comparer implementation; either a Java
+   * implementation or a faster implementation based on {@link Unsafe}.
+   *
+   * <p>Uses reflection to gracefully fall back to the Java implementation if
+   * {@code Unsafe} isn't available.
+   */
+  static class LexicographicalComparerHolder {
+    static final String UNSAFE_COMPARER_NAME =
+        LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";
+
+    static final Comparer<byte[]> BEST_COMPARER = getBestComparer();
+    /**
+     * Returns the Unsafe-using Comparer, or falls back to the pure-Java
+     * implementation if unable to do so.
+     */
+    static Comparer<byte[]> getBestComparer() {
+      try {
+        Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME);
+
+        // yes, UnsafeComparer does implement Comparer<byte[]>
+        @SuppressWarnings("unchecked")
+        Comparer<byte[]> comparer =
+          (Comparer<byte[]>) theClass.getEnumConstants()[0];
+        return comparer;
+      } catch (Throwable t) { // ensure we really catch *everything*
+        return lexicographicalComparerJavaImpl();
+      }
+    }
+
+    enum PureJavaComparer implements Comparer<byte[]> {
+      INSTANCE;
+
+      @Override
+      public int compareTo(byte[] buffer1, int offset1, int length1,
+          byte[] buffer2, int offset2, int length2) {
+        // Short circuit equal case
+        if (buffer1 == buffer2 &&
+            offset1 == offset2 &&
+            length1 == length2) {
+          return 0;
+        }
+        // Bring WritableComparator code local
+        int end1 = offset1 + length1;
+        int end2 = offset2 + length2;
+        for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+          int a = (buffer1[i] & 0xff);
+          int b = (buffer2[j] & 0xff);
+          if (a != b) {
+            return a - b;
+          }
+        }
+        return length1 - length2;
+      }
+    }
+
+    enum UnsafeComparer implements Comparer<byte[]> {
+      INSTANCE;
+
+      static final Unsafe theUnsafe;
+
+      /** The offset to the first element in a byte array. */
+      static final int BYTE_ARRAY_BASE_OFFSET;
+
+      static {
+        theUnsafe = (Unsafe) AccessController.doPrivileged(
+            new PrivilegedAction<Object>() {
+              @Override
+              public Object run() {
+                try {
+                  Field f = Unsafe.class.getDeclaredField("theUnsafe");
+                  f.setAccessible(true);
+                  return f.get(null);
+                } catch (NoSuchFieldException e) {
+                  // It doesn't matter what we throw;
+                  // it's swallowed in getBestComparer().
+                  throw new Error();
+                } catch (IllegalAccessException e) {
+                  throw new Error();
+                }
+              }
+            });
+
+        BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);
+
+        // sanity check - this should never fail
+        if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
+          throw new AssertionError();
+        }
+      }
+
+      static final boolean littleEndian =
+        ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);
+
+      /**
+       * Returns true if x1 is less than x2, when both values are treated as
+       * unsigned.
+       */
+      static boolean lessThanUnsigned(long x1, long x2) {
+        return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
+      }
+
+      /**
+       * Lexicographically compare two arrays.
+       *
+       * @param buffer1 left operand
+       * @param buffer2 right operand
+       * @param offset1 Where to start comparing in the left buffer
+       * @param offset2 Where to start comparing in the right buffer
+       * @param length1 How much to compare from the left buffer
+       * @param length2 How much to compare from the right buffer
+       * @return 0 if equal, < 0 if left is less than right, etc.
+       */
+      @Override
+      public int compareTo(byte[] buffer1, int offset1, int length1,
+          byte[] buffer2, int offset2, int length2) {
+        // Short circuit equal case
+        if (buffer1 == buffer2 &&
+            offset1 == offset2 &&
+            length1 == length2) {
+          return 0;
+        }
+        int minLength = Math.min(length1, length2);
+        int minWords = minLength / LONG_BYTES;
+        int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
+        int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
+
+        /*
+         * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
+         * time is no slower than comparing 4 bytes at a time even on 32-bit.
+         * On the other hand, it is substantially faster on 64-bit.
+         */
+        for (int i = 0; i < minWords * LONG_BYTES; i += LONG_BYTES) {
+          long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
+          long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
+          long diff = lw ^ rw;
+
+          if (diff != 0) {
+            if (!littleEndian) {
+              return lessThanUnsigned(lw, rw) ? -1 : 1;
+            }
+
+            // Use binary search
+            int n = 0;
+            int y;
+            int x = (int) diff;
+            if (x == 0) {
+              x = (int) (diff >>> 32);
+              n = 32;
+            }
+
+            y = x << 16;
+            if (y == 0) {
+              n += 16;
+            } else {
+              x = y;
+            }
+
+            y = x << 8;
+            if (y == 0) {
+              n += 8;
+            }
+            return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
+          }
+        }
+
+        // The epilogue to cover the last (minLength % 8) elements.
+        for (int i = minWords * LONG_BYTES; i < minLength; i++) {
+          int a = (buffer1[offset1 + i] & 0xff);
+          int b = (buffer2[offset2 + i] & 0xff);
+          if (a != b) {
+            return a - b;
+          }
+        }
+        return length1 - length2;
       }
     }
-    return length1 - length2;
   }
 
   /**

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java?rev=1584166&r1=1584165&r2=1584166&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java Wed Apr  2 20:49:19 2014
@@ -24,7 +24,10 @@ import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
 
 import junit.framework.TestCase;
 
@@ -289,4 +292,80 @@ public class TestBytes extends TestCase 
     Assert.assertArrayEquals("appendToTail(a, 0, 6)",
         Bytes.appendToTail(a, 2, (byte) 6), new byte[] { 1, 2, 6, 6 });
   }
+
+  /**
+   * The rows in a given region follow the following pattern:
+   * [PREFIX BYTES][ID BYTES]
+   *
+   * @param numRows : number of rows to return in the list.
+   * @param prefixLength : length of common prefix.
+   * @param length : length of the rest of the row.
+   * @return : List of rows generated
+   */
+  private List<byte[]> getRowsRandom(int numRows, int prefixLength, int length) {
+    Random r = new Random();
+    byte[] prefixBytes = new byte[prefixLength];
+    r.nextBytes(prefixBytes);
+    List<byte[]> list = new ArrayList<byte[]>();
+    for (int i=0; i<numRows; i++) {
+      byte[] b = new byte[length];
+      r.nextBytes(b);
+      byte[] finalBytes = new byte[prefixLength + length];
+      Bytes.putBytes(finalBytes, 0, prefixBytes, 0, prefixLength);
+      Bytes.putBytes(finalBytes, prefixLength, b, 0, b.length);
+      list.add(finalBytes);
+    }
+    return list;
+  }
+
+  public void testComparatorPerfRandom() {
+    // The rows in a given region follow the following pattern:
+    // [PREFIX BYTES][ID BYTES]
+    // With long prefixes, the comparison using guava is faster.
+    // With fewer common bytes, the guava comparison is slower.
+    for (int PREFIX = 50; PREFIX >= 0; PREFIX -= 5) {
+      int ID = 100;
+      int numRows = 10000;
+      List<byte[]> list = getRowsRandom(numRows, PREFIX, ID);
+
+      // Correctness
+      for (int i=0; i<numRows; i++) {
+        for (int j=0; j<numRows; j++) {
+          Bytes.useGuavaBytesComparision = true;
+          int bg = Bytes.compareTo(list.get(i), list.get(j));
+          Bytes.useGuavaBytesComparision = false;
+          int bs = Bytes.compareTo(list.get(i), list.get(j));
+          assertTrue(bg == bs);
+        }
+      }
+
+      // Comparing the Bytes
+      boolean[] bools = new boolean[]{true, false};
+      long[] timeNs = new long[2];
+
+      for (int idx = 0; idx < 2; idx++) {
+        Bytes.useGuavaBytesComparision = bools[idx];
+        long st = System.nanoTime();
+        for (int i=0; i<numRows; i++) {
+          for (int j=0; j<numRows; j++) {
+            Bytes.compareTo(list.get(i), list.get(j));
+          }
+        }
+        long en = System.nanoTime();
+        timeNs[idx] += (en - st);
+      }
+      double gain = (timeNs[1] - timeNs[0]) / ((double)timeNs[1]);
+      System.out.println("Prefix : " + PREFIX + ", gain : " + gain * 100 + " ");
+      if (PREFIX > 20) {
+        assertTrue(gain > 0.1);
+        assertTrue(timeNs[1] > timeNs[0]);
+      } else if (PREFIX < 10) {
+        assertTrue(gain < -0.1);
+        if (PREFIX == 0) {
+          assertTrue(gain < -0.5);
+        }
+        assertTrue(timeNs[1] < timeNs[0]);
+      }
+    }
+  }
 }