You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2021/12/27 16:05:27 UTC
[hbase] branch branch-2 updated: HBASE-26566 Optimize encodeNumeric in OrderedBytes (#3940)

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new c28a266  HBASE-26566 Optimize encodeNumeric in OrderedBytes (#3940)
c28a266 is described below

commit c28a266613b05687e1e460dbd455af08e9775396
Author: Yutong Xiao <yu...@gmail.com>
AuthorDate: Mon Dec 27 23:57:30 2021 +0800

    HBASE-26566 Optimize encodeNumeric in OrderedBytes (#3940)
    
    Signed-off-by: Reid Chan <re...@apache.org>
---
 .../org/apache/hadoop/hbase/util/OrderedBytes.java | 68 +++++++++++++---------
 .../apache/hadoop/hbase/util/TestOrderedBytes.java | 15 ++++-
 2 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
index db85f9f..c82d6ee 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
@@ -309,10 +309,6 @@ public class OrderedBytes {
 
   public static final Charset UTF8 = Charset.forName("UTF-8");
   private static final byte TERM = 0x00;
-  private static final BigDecimal E8 = BigDecimal.valueOf(1e8);
-  private static final BigDecimal E32 = BigDecimal.valueOf(1e32);
-  private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
-  private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);
 
   /**
    * Max precision guaranteed to fit into a {@code long}.
@@ -637,7 +633,7 @@ public class OrderedBytes {
     byte[] a = dst.getBytes();
     boolean isNeg = val.signum() == -1;
     final int offset = dst.getOffset(), start = dst.getPosition();
-    int e = 0, d, startM;
+    int e = 0, startM;
 
     if (isNeg) { /* Small negative number: 0x14, -E, ~M */
       dst.put(NEG_SMALL);
@@ -646,21 +642,17 @@ public class OrderedBytes {
     }
 
     // normalize abs(val) to determine E
-    while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; }
-    while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; }
+    int zerosBeforeFirstNonZero = abs.scale() - abs.precision();
+    int lengthToMoveRight = zerosBeforeFirstNonZero % 2 ==
+      0 ? zerosBeforeFirstNonZero : zerosBeforeFirstNonZero - 1;
+    e = lengthToMoveRight / 2;
+    abs = abs.movePointRight(lengthToMoveRight);
 
     putVaruint64(dst, e, !isNeg); // encode appropriate E value.
 
     // encode M by peeling off centimal digits, encoding x as 2x+1
     startM = dst.getPosition();
-    // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
-    // numeric scale.
-    for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
-      abs = abs.movePointRight(2);
-      d = abs.intValue();
-      dst.put((byte) ((2 * d + 1) & 0xff));
-      abs = abs.subtract(BigDecimal.valueOf(d));
-    }
+    encodeToCentimal(dst, abs);
     // terminal digit should be 2x
     a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
     if (isNeg) {
@@ -712,7 +704,7 @@ public class OrderedBytes {
     byte[] a = dst.getBytes();
     boolean isNeg = val.signum() == -1;
     final int start = dst.getPosition(), offset = dst.getOffset();
-    int e = 0, d, startM;
+    int e = 0, startM;
 
     if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
       dst.put(NEG_LARGE);
@@ -721,9 +713,10 @@ public class OrderedBytes {
     }
 
     // normalize abs(val) to determine E
-    while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; }
-    while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; }
-    while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; }
+    int integerDigits = abs.precision() - abs.scale();
+    int lengthToMoveLeft = integerDigits % 2 == 0 ? integerDigits : integerDigits + 1;
+    e = lengthToMoveLeft / 2;
+    abs = abs.movePointLeft(lengthToMoveLeft);
 
     // encode appropriate header byte and/or E value.
     if (e > 10) { /* large number, write out {~,}E */
@@ -738,14 +731,7 @@ public class OrderedBytes {
 
     // encode M by peeling off centimal digits, encoding x as 2x+1
     startM = dst.getPosition();
-    // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
-    // numeric scale.
-    for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
-      abs = abs.movePointRight(2);
-      d = abs.intValue();
-      dst.put((byte) (2 * d + 1));
-      abs = abs.subtract(BigDecimal.valueOf(d));
-    }
+    encodeToCentimal(dst, abs);
     // terminal digit should be 2x
     a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
     if (isNeg) {
@@ -756,6 +742,32 @@ public class OrderedBytes {
   }
 
   /**
+   * Encode a value val in [0.01, 1.0) into Centimals.
+   * Util function for {@link this.encodeNumericLarge()} and {@link this.encodeNumericSmall()}
+   * @param dst The destination to which encoded digits are written.
+   * @param val A BigDecimal after the normalization. The value must be in [0.01, 1.0).
+   */
+  private static void encodeToCentimal(PositionedByteRange dst, BigDecimal val) {
+    // The input value val must be in [0.01, 1.0)
+    String stringOfAbs = val.stripTrailingZeros().toPlainString();
+    String value = stringOfAbs.substring(stringOfAbs.indexOf('.') + 1);
+    int d;
+
+    // If the first float digit is 0, we will encode one digit more than MAX_PRECISION
+    // We encode at most MAX_PRECISION significant digits into centimals,
+    // because the input value, has been already normalized.
+    int maxPrecision = value.charAt(0) == '0' ? MAX_PRECISION + 1 : MAX_PRECISION;
+    maxPrecision = Math.min(maxPrecision, value.length());
+    for (int i = 0; i < maxPrecision; i += 2) {
+      d = (value.charAt(i) - '0') * 10;
+      if (i + 1 < maxPrecision) {
+        d += (value.charAt(i + 1) - '0');
+      }
+      dst.put((byte) (2 * d + 1));
+    }
+  }
+
+  /**
    * Encode a numerical value using the variable-length encoding.
    * @param dst The destination to which encoded digits are written.
    * @param val The value to encode.
@@ -795,6 +807,8 @@ public class OrderedBytes {
 
   /**
    * Encode a numerical value using the variable-length encoding.
+   * If the number of significant digits of the value exceeds the
+   * {@link OrderedBytes#MAX_PRECISION}, the exceeding part will be lost.
    * @param dst The destination to which encoded digits are written.
    * @param val The value to encode.
    * @param ord The {@link Order} to respect while encoding {@code val}.
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java
index c8e0381..943038f 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java
@@ -70,9 +70,14 @@ public class TestOrderedBytes {
   static final BigDecimal[] BD_VALS =
     { null, BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE),
       BigDecimal.valueOf(Double.MAX_VALUE), BigDecimal.valueOf(Double.MIN_VALUE),
-      BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100)) };
+      BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100)),
+      BigDecimal.valueOf(Long.MAX_VALUE).pow(64),
+      BigDecimal.valueOf(Long.MAX_VALUE).pow(64).negate(),
+      new BigDecimal("0." + String.join("", Collections.nCopies(500, "123"))),
+      new BigDecimal("-0." + String.join("", Collections.nCopies(500, "123")))
+    };
   static final int[] BD_LENGTHS =
-    { 1, 11, 11, 11, 4, 12 };
+    { 1, 11, 11, 11, 4, 12, 19, 19, 18, 18 };
 
   /*
    * This is the smallest difference between two doubles in D_VALS
@@ -335,7 +340,11 @@ public class TestOrderedBytes {
         if (null == BD_VALS[i]) {
           assertEquals(BD_VALS[i], decoded);
         } else {
-          assertEquals("Deserialization failed.", 0, BD_VALS[i].compareTo(decoded));
+          // The num will be rounded to a specific precision in the encoding phase.
+          // So that big value will lose precision here. Need to add a normalization here to
+          // make the test pass.
+          assertEquals("Deserialization failed.", 0,
+            OrderedBytes.normalize(BD_VALS[i]).compareTo(decoded));
         }
         assertEquals("Did not consume enough bytes.", BD_LENGTHS[i], buf1.getPosition() - 1);
       }