You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2017/04/27 20:21:05 UTC
hbase git commit: HBASE-17877 Improve HBase's byte[] comparator.
Repository: hbase
Updated Branches:
refs/heads/master 880db3eee -> b81e00f5e
HBASE-17877 Improve HBase's byte[] comparator.
Signed-off-by: Lars Hofhansl <la...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b81e00f5
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b81e00f5
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b81e00f5
Branch: refs/heads/master
Commit: b81e00f5eabe8d99fd77d74f60e3754add8205da
Parents: 880db3e
Author: Vikas Vishwakarma <vi...@gmail.com>
Authored: Thu Apr 27 13:21:07 2017 -0700
Committer: Lars Hofhansl <la...@apache.org>
Committed: Thu Apr 27 13:21:07 2017 -0700
----------------------------------------------------------------------
NOTICE.txt | 4 +-
.../hadoop/hbase/util/ByteBufferUtils.java | 55 +++++++++-----------
.../org/apache/hadoop/hbase/util/Bytes.java | 55 +++++++++-----------
3 files changed, 54 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/b81e00f5/NOTICE.txt
----------------------------------------------------------------------
diff --git a/NOTICE.txt b/NOTICE.txt
index fb16a28..71efa52 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -38,8 +38,10 @@ Copyright Jan Kova\u0159�k
Licensed under the Apache License v2.0 as a part of the Bootstrap project.
--
-This product includes portions of the Guava project v14, specifically
+This product includes portions of the Guava project v14 and v21, specifically
'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java'
+'hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java'
+'hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java'
Copyright (C) 2007 The Guava Authors
http://git-wip-us.apache.org/repos/asf/hbase/blob/b81e00f5/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index 34a4e02..6dac300 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -701,46 +701,43 @@ public final class ByteBufferUtils {
}
static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, int l2) {
+ final int stride = 8;
final int minLength = Math.min(l1, l2);
- final int minWords = minLength / Bytes.SIZEOF_LONG;
+ int strideLimit = minLength & ~(stride - 1);
+ int i;
/*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than
* comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on
* 64-bit.
*/
- int j = minWords << 3; // Same as minWords * SIZEOF_LONG
- for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
- long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + i);
- long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + i);
- long diff = lw ^ rw;
- if (diff != 0) {
- return lessThanUnsignedLong(lw, rw) ? -1 : 1;
+ for (i = 0; i < strideLimit; i += stride) {
+ long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + (long) i);
+ long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + (long) i);
+ if (lw != rw) {
+ if (!UnsafeAccess.littleEndian) {
+ return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1;
+ }
+
+ /*
+ * We want to compare only the first index where left[index] != right[index]. This
+ * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
+ * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
+ * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
+ * that least significant nonzero byte. This comparison logic is based on UnsignedBytes
+ * from guava v21
+ */
+ int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
+ return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
}
}
- int offset = j;
- if (minLength - offset >= Bytes.SIZEOF_INT) {
- int il = UnsafeAccess.theUnsafe.getInt(obj1, o1 + offset);
- int ir = UnsafeAccess.theUnsafe.getInt(obj2, o2 + offset);
+ // The epilogue to cover the last (minLength % stride) elements.
+ for (; i < minLength; i++) {
+ int il = (UnsafeAccess.theUnsafe.getByte(obj1, o1 + i) & 0xFF);
+ int ir = (UnsafeAccess.theUnsafe.getByte(obj2, o2 + i) & 0xFF);
if (il != ir) {
- return lessThanUnsignedInt(il, ir) ? -1 : 1;
- }
- offset += Bytes.SIZEOF_INT;
- }
- if (minLength - offset >= Bytes.SIZEOF_SHORT) {
- short sl = UnsafeAccess.theUnsafe.getShort(obj1, o1 + offset);
- short sr = UnsafeAccess.theUnsafe.getShort(obj2, o2 + offset);
- if (sl != sr) {
- return lessThanUnsignedShort(sl, sr) ? -1 : 1;
- }
- offset += Bytes.SIZEOF_SHORT;
- }
- if (minLength - offset == 1) {
- int a = (UnsafeAccess.theUnsafe.getByte(obj1, o1 + offset) & 0xff);
- int b = (UnsafeAccess.theUnsafe.getByte(obj2, o2 + offset) & 0xff);
- if (a != b) {
- return a - b;
+ return il - ir;
}
}
return l1 - l2;
http://git-wip-us.apache.org/repos/asf/hbase/blob/b81e00f5/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index 704d97f..a481d27 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -1575,47 +1575,42 @@ public class Bytes implements Comparable<Bytes> {
length1 == length2) {
return 0;
}
+ final int stride = 8;
final int minLength = Math.min(length1, length2);
- final int minWords = minLength / SIZEOF_LONG;
+ int strideLimit = minLength & ~(stride - 1);
final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+ int i;
/*
- * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
- * time is no slower than comparing 4 bytes at a time even on 32-bit.
- * On the other hand, it is substantially faster on 64-bit.
+ * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower
+ * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit.
*/
- // This is the end offset of long parts.
- int j = minWords << 3; // Same as minWords * SIZEOF_LONG
- for (int i = 0; i < j; i += SIZEOF_LONG) {
+ for (i = 0; i < strideLimit; i += stride) {
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
- long diff = lw ^ rw;
- if (diff != 0) {
- return lessThanUnsignedLong(lw, rw) ? -1 : 1;
+ if (lw != rw) {
+ if(!UnsafeAccess.littleEndian) {
+ return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1;
+ }
+
+ /*
+ * We want to compare only the first index where left[index] != right[index]. This
+ * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
+ * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
+ * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
+ * that least significant nonzero byte. This comparison logic is based on UnsignedBytes
+ * comparator from guava v21
+ */
+ int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
+ return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
}
}
- int offset = j;
- if (minLength - offset >= SIZEOF_INT) {
- int il = theUnsafe.getInt(buffer1, offset1Adj + offset);
- int ir = theUnsafe.getInt(buffer2, offset2Adj + offset);
- if (il != ir) {
- return lessThanUnsignedInt(il, ir) ? -1: 1;
- }
- offset += SIZEOF_INT;
- }
- if (minLength - offset >= SIZEOF_SHORT) {
- short sl = theUnsafe.getShort(buffer1, offset1Adj + offset);
- short sr = theUnsafe.getShort(buffer2, offset2Adj + offset);
- if (sl != sr) {
- return lessThanUnsignedShort(sl, sr) ? -1: 1;
- }
- offset += SIZEOF_SHORT;
- }
- if (minLength - offset == 1) {
- int a = (buffer1[(int)(offset1 + offset)] & 0xff);
- int b = (buffer2[(int)(offset2 + offset)] & 0xff);
+ // The epilogue to cover the last (minLength % stride) elements.
+ for (; i < minLength; i++) {
+ int a = (buffer1[offset1 + i] & 0xFF);
+ int b = (buffer2[offset2 + i] & 0xFF);
if (a != b) {
return a - b;
}