You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by an...@apache.org on 2015/06/29 19:21:01 UTC

hbase git commit: HBASE-12345 Unsafe based ByteBuffer Comparator.

Repository: hbase
Updated Branches:
  refs/heads/master 7b92d8c06 -> 3eee73028


HBASE-12345 Unsafe based ByteBuffer Comparator.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3eee7302
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3eee7302
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3eee7302

Branch: refs/heads/master
Commit: 3eee7302847b55291cc87820d96e603f5e4d7612
Parents: 7b92d8c
Author: anoopsjohn <an...@gmail.com>
Authored: Mon Jun 29 22:50:33 2015 +0530
Committer: anoopsjohn <an...@gmail.com>
Committed: Mon Jun 29 22:50:33 2015 +0530

----------------------------------------------------------------------
 .../hadoop/hbase/util/ByteBufferUtils.java      | 95 ++++++++++++++++++--
 .../apache/hadoop/hbase/util/UnsafeAccess.java  | 89 +++++++++++-------
 2 files changed, 146 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/3eee7302/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index 6636679..33e5cc6 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -492,13 +492,12 @@ public final class ByteBufferUtils {
     return output;
   }
 
-  public static int compareTo(ByteBuffer buf1, int o1, int len1, ByteBuffer buf2, int o2, int len2) {
-    if (buf1.hasArray() && buf2.hasArray()) {
-      return Bytes.compareTo(buf1.array(), buf1.arrayOffset() + o1, len1, buf2.array(),
-          buf2.arrayOffset() + o2, len2);
+  public static int compareTo(ByteBuffer buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) {
+    if (UnsafeAccess.isAvailable()) {
+      return compareToUnsafe(buf1, o1, l1, buf2, o2, l2);
     }
-    int end1 = o1 + len1;
-    int end2 = o2 + len2;
+    int end1 = o1 + l1;
+    int end2 = o2 + l2;
     for (int i = o1, j = o2; i < end1 && j < end2; i++, j++) {
       int a = buf1.get(i) & 0xFF;
       int b = buf2.get(j) & 0xFF;
@@ -506,7 +505,89 @@ public final class ByteBufferUtils {
         return a - b;
       }
     }
-    return len1 - len2;
+    return l1 - l2;
+  }
+
+  static int compareToUnsafe(ByteBuffer buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) {
+    final int minLength = Math.min(l1, l2);
+    final int minWords = minLength / Bytes.SIZEOF_LONG;
+
+    /*
+     * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than
+     * comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on
+     * 64-bit.
+     */
+    int j = minWords << 3; // Same as minWords * SIZEOF_LONG
+    for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
+      long lw = UnsafeAccess.getAsLong(buf1, o1 + i);
+      long rw = UnsafeAccess.getAsLong(buf2, o2 + i);
+      long diff = lw ^ rw;
+      if (diff != 0) {
+        return lessThanUnsignedLong(lw, rw) ? -1 : 1;
+      }
+    }
+    int offset = j;
+
+    if (minLength - offset >= Bytes.SIZEOF_INT) {
+      int il = UnsafeAccess.getAsInt(buf1, o1 + offset);
+      int ir = UnsafeAccess.getAsInt(buf2, o2 + offset);
+      if (il != ir) {
+        return lessThanUnsignedInt(il, ir) ? -1 : 1;
+      }
+      offset += Bytes.SIZEOF_INT;
+    }
+    if (minLength - offset >= Bytes.SIZEOF_SHORT) {
+      short sl = UnsafeAccess.getAsShort(buf1, o1 + offset);
+      short sr = UnsafeAccess.getAsShort(buf2, o2 + offset);
+      if (sl != sr) {
+        return lessThanUnsignedShort(sl, sr) ? -1 : 1;
+      }
+      offset += Bytes.SIZEOF_SHORT;
+    }
+    if (minLength - offset == 1) {
+      int a = (buf1.get(o1 + offset) & 0xff);
+      int b = (buf2.get(o2 + offset) & 0xff);
+      if (a != b) {
+        return a - b;
+      }
+    }
+    return l1 - l2;
+  }
+
+  /*
+   * Both values are passed as is read by Unsafe. When platform is Little Endian, have to convert
+   * to corresponding Big Endian value and then do compare. We do all writes in Big Endian format.
+   */
+  private static boolean lessThanUnsignedLong(long x1, long x2) {
+    if (UnsafeAccess.littleEndian) {
+      x1 = Long.reverseBytes(x1);
+      x2 = Long.reverseBytes(x2);
+    }
+    return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
+  }
+
+  /*
+   * Both values are passed as is read by Unsafe. When platform is Little Endian, have to convert
+   * to corresponding Big Endian value and then do compare. We do all writes in Big Endian format.
+   */
+  private static boolean lessThanUnsignedInt(int x1, int x2) {
+    if (UnsafeAccess.littleEndian) {
+      x1 = Integer.reverseBytes(x1);
+      x2 = Integer.reverseBytes(x2);
+    }
+    return (x1 & 0xffffffffL) < (x2 & 0xffffffffL);
+  }
+
+  /*
+   * Both values are passed as is read by Unsafe. When platform is Little Endian, have to convert
+   * to corresponding Big Endian value and then do compare. We do all writes in Big Endian format.
+   */
+  private static boolean lessThanUnsignedShort(short x1, short x2) {
+    if (UnsafeAccess.littleEndian) {
+      x1 = Short.reverseBytes(x1);
+      x2 = Short.reverseBytes(x2);
+    }
+    return (x1 & 0xffff) < (x2 & 0xffff);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/3eee7302/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java
index 51dd643..deb9a1a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java
@@ -78,7 +78,7 @@ public final class UnsafeAccess {
 
   // APIs to read primitive data from a byte[] using Unsafe way
   /**
-   * Converts a byte array to a short value.
+   * Converts a byte array to a short value considering it was written in big-endian format.
    * @param bytes byte array
    * @param offset offset into array
    * @return the short value
@@ -92,7 +92,7 @@ public final class UnsafeAccess {
   }
 
   /**
-   * Converts a byte array to an int value.
+   * Converts a byte array to an int value considering it was written in big-endian format.
    * @param bytes byte array
    * @param offset offset into array
    * @return the int value
@@ -106,7 +106,7 @@ public final class UnsafeAccess {
   }
 
   /**
-   * Converts a byte array to a long value.
+   * Converts a byte array to a long value considering it was written in big-endian format.
    * @param bytes byte array
    * @param offset offset into array
    * @return the long value
@@ -121,7 +121,7 @@ public final class UnsafeAccess {
 
   // APIs to write primitive data to a byte[] using Unsafe way
   /**
-   * Put a short value out to the specified byte array position.
+   * Put a short value out to the specified byte array position in big-endian format.
    * @param bytes the byte array
    * @param offset position in the array
    * @param val short to write out
@@ -136,7 +136,7 @@ public final class UnsafeAccess {
   }
 
   /**
-   * Put an int value out to the specified byte array position.
+   * Put an int value out to the specified byte array position in big-endian format.
    * @param bytes the byte array
    * @param offset position in the array
    * @param val int to write out
@@ -151,7 +151,7 @@ public final class UnsafeAccess {
   }
 
   /**
-   * Put a long value out to the specified byte array position.
+   * Put a long value out to the specified byte array position in big-endian format.
    * @param bytes the byte array
    * @param offset position in the array
    * @param val long to write out
@@ -167,60 +167,87 @@ public final class UnsafeAccess {
 
   // APIs to read primitive data from a ByteBuffer using Unsafe way
   /**
-   * Reads a short value at the given buffer's offset.
+   * Reads a short value at the given buffer's offset considering it was written in big-endian
+   * format.
+   *
    * @param buf
    * @param offset
    * @return short value at offset
    */
   public static short toShort(ByteBuffer buf, int offset) {
-    short ret;
+    if (littleEndian) {
+      return Short.reverseBytes(getAsShort(buf, offset));
+    }
+    return getAsShort(buf, offset);
+  }
+
+  /**
+   * Reads bytes at the given offset as a short value.
+   * @param buf
+   * @param offset
+   * @return short value at offset
+   */
+  static short getAsShort(ByteBuffer buf, int offset) {
     if (buf.isDirect()) {
-      ret = theUnsafe.getShort(((DirectBuffer) buf).address() + offset);
-    } else {
-      ret = theUnsafe.getShort(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset);
+      return theUnsafe.getShort(((DirectBuffer) buf).address() + offset);
     }
+    return theUnsafe.getShort(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset);
+  }
+
+  /**
+   * Reads an int value at the given buffer's offset considering it was written in big-endian
+   * format.
+   *
+   * @param buf
+   * @param offset
+   * @return int value at offset
+   */
+  public static int toInt(ByteBuffer buf, int offset) {
     if (littleEndian) {
-      return Short.reverseBytes(ret);
+      return Integer.reverseBytes(getAsInt(buf, offset));
     }
-    return ret;
+    return getAsInt(buf, offset);
   }
 
   /**
-   * Reads an int value at the given buffer's offset.
+   * Reads bytes at the given offset as an int value.
    * @param buf
    * @param offset
    * @return int value at offset
    */
-  public static int toInt(ByteBuffer buf, int offset) {
-    int ret;
+  static int getAsInt(ByteBuffer buf, int offset) {
     if (buf.isDirect()) {
-      ret = theUnsafe.getInt(((DirectBuffer) buf).address() + offset);
-    } else {
-      ret = theUnsafe.getInt(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset);
+      return theUnsafe.getInt(((DirectBuffer) buf).address() + offset);
     }
+    return theUnsafe.getInt(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset);
+  }
+
+  /**
+   * Reads a long value at the given buffer's offset considering it was written in big-endian
+   * format.
+   *
+   * @param buf
+   * @param offset
+   * @return long value at offset
+   */
+  public static long toLong(ByteBuffer buf, int offset) {
     if (littleEndian) {
-      return Integer.reverseBytes(ret);
+      return Long.reverseBytes(getAsLong(buf, offset));
     }
-    return ret;
+    return getAsLong(buf, offset);
   }
 
   /**
-   * Reads a long value at the given buffer's offset.
+   * Reads bytes at the given offset as a long value.
    * @param buf
    * @param offset
    * @return long value at offset
    */
-  public static long toLong(ByteBuffer buf, int offset) {
-    long ret;
+  static long getAsLong(ByteBuffer buf, int offset) {
     if (buf.isDirect()) {
-      ret = theUnsafe.getLong(((DirectBuffer) buf).address() + offset);
-    } else {
-      ret = theUnsafe.getLong(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset);
-    }
-    if (littleEndian) {
-      return Long.reverseBytes(ret);
+      return theUnsafe.getLong(((DirectBuffer) buf).address() + offset);
     }
-    return ret;
+    return theUnsafe.getLong(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset);
   }
 
   // APIs to copy data. This will be direct memory location copy and will be much faster