You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xy...@apache.org on 2018/07/02 20:32:35 UTC
[18/45] hadoop git commit: HADOOP-14313. Replace/improve Hadoop's
byte[] comparator. Contributed by Vikas Vishwakarma.
HADOOP-14313. Replace/improve Hadoop's byte[] comparator. Contributed by Vikas Vishwakarma.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ddbff7c8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ddbff7c8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ddbff7c8
Branch: refs/heads/HDDS-4
Commit: ddbff7c8d3f1851e5c5fa9bc33637e859d7d8ccf
Parents: 2b2399d
Author: Akira Ajisaka <aa...@apache.org>
Authored: Thu Jun 28 14:58:40 2018 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Thu Jun 28 14:58:40 2018 +0900
----------------------------------------------------------------------
NOTICE.txt | 8 ++++
.../apache/hadoop/io/FastByteComparisons.java | 44 ++++++++------------
2 files changed, 25 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ddbff7c8/NOTICE.txt
----------------------------------------------------------------------
diff --git a/NOTICE.txt b/NOTICE.txt
index 95a670d..a53f13c 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -196,6 +196,14 @@ by Google Inc, which can be obtained at:
* HOMEPAGE:
* http://code.google.com/p/snappy/
+This product contains a modified portion of UnsignedBytes LexicographicalComparator
+from Guava v21 project by Google Inc, which can be obtained at:
+
+ * LICENSE:
+ * license/COPYING (Apache License 2.0)
+ * HOMEPAGE:
+ * https://github.com/google/guava
+
This product optionally depends on 'JBoss Marshalling', an alternative Java
serialization API, which can be obtained at:
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ddbff7c8/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java
index a2903f8..5af6602 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/FastByteComparisons.java
@@ -26,7 +26,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.misc.Unsafe;
-import com.google.common.primitives.Longs;
import com.google.common.primitives.UnsignedBytes;
/**
@@ -195,52 +194,43 @@ abstract class FastByteComparisons {
length1 == length2) {
return 0;
}
+ final int stride = 8;
int minLength = Math.min(length1, length2);
- int minWords = minLength / Longs.BYTES;
+ int strideLimit = minLength & ~(stride - 1);
int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
+ int i;
/*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
* time is no slower than comparing 4 bytes at a time even on 32-bit.
* On the other hand, it is substantially faster on 64-bit.
*/
- for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) {
+ for (i = 0; i < strideLimit; i += stride) {
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
- long diff = lw ^ rw;
- if (diff != 0) {
+ if (lw != rw) {
if (!littleEndian) {
return lessThanUnsigned(lw, rw) ? -1 : 1;
}
- // Use binary search
- int n = 0;
- int y;
- int x = (int) diff;
- if (x == 0) {
- x = (int) (diff >>> 32);
- n = 32;
- }
-
- y = x << 16;
- if (y == 0) {
- n += 16;
- } else {
- x = y;
- }
-
- y = x << 8;
- if (y == 0) {
- n += 8;
- }
- return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
+ /*
+ * We want to compare only the first index where left[index] !=
+ * right[index]. This corresponds to the least significant nonzero
+ * byte in lw ^ rw, since lw and rw are little-endian.
+ * Long.numberOfTrailingZeros(diff) tells us the least significant
+ * nonzero bit, and zeroing out the first three bits of L.nTZ gives
+ * us the shift to get that least significant nonzero byte. This
+ * comparison logic is based on UnsignedBytes from Guava v21
+ */
+ int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
+ return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
}
}
// The epilogue to cover the last (minLength % 8) elements.
- for (int i = minWords * Longs.BYTES; i < minLength; i++) {
+ for (; i < minLength; i++) {
int result = UnsignedBytes.compare(
buffer1[offset1 + i],
buffer2[offset2 + i]);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org