You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/08/10 19:18:11 UTC

hive git commit: HIVE-17235: Add ORC Decimal64 Serialization/Deserialization (Part 1) (Matt McCline, reviewed by Gopal Vijayaraghavan, Sergey Shelukhin, and Owen O'Malley)

Repository: hive
Updated Branches:
  refs/heads/master 7860e7628 -> 637123a77


HIVE-17235: Add ORC Decimal64 Serialization/Deserialization (Part 1) (Matt McCline, reviewed by Gopal Vijayaraghavan, Sergey Shelukhin, and Owen O'Malley)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/637123a7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/637123a7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/637123a7

Branch: refs/heads/master
Commit: 637123a7783dadf0a2cf7c02217364d9dc4214c7
Parents: 7860e76
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Aug 10 14:18:05 2017 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Aug 10 14:18:05 2017 -0500

----------------------------------------------------------------------
 .../hive/common/type/FastHiveDecimal.java       | 18 +++++
 .../hive/common/type/FastHiveDecimalImpl.java   | 71 ++++++++++++++++++-
 .../hive/serde2/io/HiveDecimalWritable.java     | 58 ++++++++++++++++
 .../hive/common/type/HiveDecimalTestBase.java   | 10 +++
 .../hive/common/type/TestHiveDecimal.java       | 73 ++++++++++++++++++++
 5 files changed, 229 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
index 7fa9fdf..4484ed2 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
@@ -273,6 +273,24 @@ public class FastHiveDecimal {
             scratchLongs);
   }
 
+  /*
+   * Deserializes 64-bit decimals up to the maximum 64-bit precision (18 decimal digits).
+   */
+  protected void fastDeserialize64(long decimalLong, int scale) {
+    FastHiveDecimalImpl.fastDeserialize64(
+        decimalLong, scale, this);
+  }
+
+  /*
+   * Serializes decimal64 up to the maximum 64-bit precision (18 decimal digits).
+   */
+  protected long fastSerialize64(int scale) {
+    return
+        FastHiveDecimalImpl.fastSerialize64(
+            scale,
+            fastSignum, fast1, fast0, fastScale);
+  }
+
   // The fastBigIntegerBytes method returns 3 56 bit (7 byte) words and a possible sign byte.
   // However, the fastBigIntegerBytes can take on trailing zeroes -- so make it larger.
   protected static final int FAST_SCRATCH_BUFFER_LEN_BIG_INTEGER_BYTES = 1 + 48;

http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
index ef9cbcf..84ff1c6 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
@@ -136,7 +136,9 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
     10000000000000L,
     100000000000000L,
     1000000000000000L,
-    10000000000000000L    // 16
+    10000000000000000L,   // 16
+    100000000000000000L,
+    1000000000000000000L, // 18
   };
 
   public static final int MAX_DECIMAL_DIGITS = 38;
@@ -154,6 +156,9 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
   private static final long MAX_LONGWORD_DECIMAL = powerOfTenTable[LONGWORD_DECIMAL_DIGITS] - 1;
   private static final long MULTIPLER_LONGWORD_DECIMAL = powerOfTenTable[LONGWORD_DECIMAL_DIGITS];
 
+  public static final int DECIMAL64_DECIMAL_DIGITS = 18;
+  public static final long MAX_ABS_DECIMAL64 = 999999999999999999L;  // 18 9's -- quite reliable!
+
   private static final int TWO_X_LONGWORD_DECIMAL_DIGITS = 2 * LONGWORD_DECIMAL_DIGITS;
   private static final int THREE_X_LONGWORD_DECIMAL_DIGITS = 3 * LONGWORD_DECIMAL_DIGITS;
   private static final int FOUR_X_LONGWORD_DECIMAL_DIGITS = 4 * LONGWORD_DECIMAL_DIGITS;
@@ -2138,6 +2143,70 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
     throw new RuntimeException("Unexpected");
   }
 
+  public static long getDecimal64AbsMax(int precision) {
+    return powerOfTenTable[precision] - 1;
+  }
+
+  /*
+   * Deserializes 64-bit decimals up to the maximum 64-bit precision (18 decimal digits).
+   *
+   * NOTE: Major assumption: the input decimal64 has already been bounds checked and a least
+   * has a precision <= DECIMAL64_DECIMAL_DIGITS.  We do not bounds check here for better
+   * performance.
+   */
+  public static void fastDeserialize64(
+      final long inputDecimal64Long, final int inputScale,
+      FastHiveDecimal fastResult) {
+
+    long decimal64Long;
+    if (inputDecimal64Long == 0) {
+      fastResult.fastReset();
+      return;
+    } else if (inputDecimal64Long > 0) {
+      fastResult.fastSignum = 1;
+      decimal64Long = inputDecimal64Long;
+    } else {
+      fastResult.fastSignum = -1;
+      decimal64Long = -inputDecimal64Long;
+    }
+
+    // Trim trailing zeroes -- but only below the decimal point.
+    int trimScale = inputScale;
+    while (trimScale > 0 && decimal64Long % 10 == 0) {
+      decimal64Long /= 10;
+      trimScale--;
+    }
+
+    fastResult.fast2 = 0;
+    fastResult.fast1 = decimal64Long / MULTIPLER_LONGWORD_DECIMAL;
+    fastResult.fast0 = decimal64Long % MULTIPLER_LONGWORD_DECIMAL;
+
+    fastResult.fastScale = trimScale;
+
+    fastResult.fastIntegerDigitCount =
+        Math.max(0, fastRawPrecision(fastResult) - fastResult.fastScale);
+  }
+
+  /*
+   * Serializes decimal64 up to the maximum 64-bit precision (18 decimal digits).
+   *
+   * NOTE: Major assumption: the fast decimal has already been bounds checked and a least
+   * has a precision <= DECIMAL64_DECIMAL_DIGITS.  We do not bounds check here for better
+   * performance.
+   */
+  public static long fastSerialize64(
+      int scale,
+      int fastSignum, long fast1, long fast0, int fastScale) {
+
+    if (fastSignum == 0) {
+      return 0;
+    } else if (fastSignum == 1) {
+      return (fast1 * MULTIPLER_LONGWORD_DECIMAL + fast0) * powerOfTenTable[scale - fastScale];
+    } else {
+      return -(fast1 * MULTIPLER_LONGWORD_DECIMAL + fast0) * powerOfTenTable[scale - fastScale];
+    }
+  }
+
   //************************************************************************************************
   // Emulate BigInteger deserialization used by LazyBinary and others.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
index ffbe31a..617fb99 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
@@ -488,6 +488,64 @@ public final class HiveDecimalWritable extends FastHiveDecimal
             scratchLongs);
   }
 
+  /*
+   * Maximum number of decimal digits in a decimal64 long.
+   */
+  @HiveDecimalWritableVersionV2
+  public static final int DECIMAL64_DECIMAL_DIGITS = FastHiveDecimalImpl.DECIMAL64_DECIMAL_DIGITS;
+
+  /*
+   * Test whether a precision will fit within a decimal64 (64-bit signed long with <= 18 decimal
+   * digits).
+   */
+  @HiveDecimalWritableVersionV2
+  public static boolean isPrecisionDecimal64(int precision) {
+    return (precision <= DECIMAL64_DECIMAL_DIGITS);
+  }
+
+  /*
+   * Return the maximum absolute decimal64 value for a precision.
+   */
+  @HiveDecimalWritableVersionV2
+  public static long getDecimal64AbsMax(int precision) {
+    return FastHiveDecimalImpl.getDecimal64AbsMax(precision);
+  }
+
+  /*
+   * Deserializes 64-bit decimals up to the maximum 64-bit precision (18 decimal digits).
+   *
+   * NOTE: Major assumption: the input decimal64 has already been bounds checked and a least
+   * has a precision <= DECIMAL64_DECIMAL_DIGITS.  We do not bounds check here for better
+   * performance.  You can bounds check beforehand with:
+   *     Math.abs(decimal64Long) <= getDecimal64AbsMax(precision)
+   */
+  @HiveDecimalWritableVersionV2
+  public void deserialize64(
+      long decimal64Long, int scale) {
+    fastDeserialize64(decimal64Long, scale);
+    isSet = true;
+  }
+
+   /*
+    * Serializes decimal64 up to the maximum 64-bit precision (18 decimal digits).
+    *
+    * NOTE: Major assumption: the fast decimal has already been bounds checked and a least
+    * has a precision <= DECIMAL64_DECIMAL_DIGITS.  We do not bounds check here for better
+    * performance.
+    */
+  @HiveDecimalWritableVersionV2
+  public long serialize64(int scale) {
+    return fastSerialize64(scale);
+  }
+
+  @HiveDecimalWritableVersionV2
+  public boolean isValid() {
+    if (!isSet) {
+      return false;
+    }
+    return FastHiveDecimalImpl.fastIsValid(this);
+  }
+
   /**
    * Returns the length of the decimal converted to bytes.
    * Call bigIntegerBytesBuffer() to get a reference to the converted bytes.

http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java b/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
index 553c456..9550b94 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
@@ -305,8 +305,14 @@ public class HiveDecimalTestBase {
     "-9999999999999999",
     "10000000000000000",                           // 10^16
     "-10000000000000000",
+    "99999999999999999",                            // 10^17 - 1
+    "-99999999999999999",
     "100000000000000000",
     "-100000000000000000",
+    "999999999999999999",                            // 10^18 - 1
+    "-999999999999999999",
+    "123456789012345678",
+    "-123456789012345678",
     "1000000000000000000",
     "-1000000000000000000",
     "9223372036854775807",                         // Long.MAX_VALUE
@@ -369,6 +375,10 @@ public class HiveDecimalTestBase {
     "-0.9999999999999999",
     "0.00000000000000001",                         // 10^-16
     "-0.00000000000000001",
+    "0.99999999999999999",
+    "-0.99999999999999999",
+    "0.999999999999999999",                        // 10^-18
+    "-0.999999999999999999",
     "0.00000000000000000000000000000001",          // 10^-31
     "-0.00000000000000000000000000000001",
     "0.99999999999999999999999999999999",          // 10^-32 + 1

http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java b/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
index f8a36e5..d11f41c 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
@@ -3394,6 +3394,79 @@ public class TestHiveDecimal extends HiveDecimalTestBase {
     }
   }
 
+  //------------------------------------------------------------------------------------------------
+
+  @Test
+  public void testRandomDecimal64() {
+    Random r = new Random(2497);
+    for (BigDecimalFlavor bigDecimalFlavor : BigDecimalFlavor.values()) {
+      doTestRandomDecimal64(r, standardAlphabet, bigDecimalFlavor);
+    }
+    for (BigDecimalFlavor bigDecimalFlavor : BigDecimalFlavor.values()) {
+      for (String sparseAlphabet : sparseAlphabets) {
+        doTestRandomDecimal64(r, sparseAlphabet, bigDecimalFlavor);
+      }
+    }
+  }
+
+  private void doTestRandomDecimal64(Random r, String digitAlphabet, BigDecimalFlavor bigDecimalFlavor) {
+
+    for (int i = 0; i < POUND_FACTOR; i++) {
+      BigDecimal bigDecimal = randHiveBigDecimal(r, digitAlphabet, bigDecimalFlavor);
+
+      doTestDecimal64(r, bigDecimal);
+    }
+  }
+
+  @Test
+  public void testDecimal64Special() {
+    Random r = new Random(198);
+    for (BigDecimal bigDecimal : specialBigDecimals) {
+      int precision = Math.min(bigDecimal.precision(), HiveDecimalWritable.DECIMAL64_DECIMAL_DIGITS);
+      int scale = Math.min(bigDecimal.scale(), precision);
+      doTestDecimal64(r, bigDecimal, precision, scale);
+    }
+  }
+
+  private void doTestDecimal64(Random r, BigDecimal inputBigDecimal) {
+    final int precision = 1 + r.nextInt(HiveDecimalWritable.DECIMAL64_DECIMAL_DIGITS);
+    assertTrue(HiveDecimalWritable.isPrecisionDecimal64(precision));
+    final int scale = r.nextInt(precision + 1);
+
+    doTestDecimal64(r, inputBigDecimal, precision, scale);
+  }
+
+  private void doTestDecimal64(Random r, BigDecimal inputBigDecimal, int precision, int scale) {
+
+    BigDecimal bigDecimal = inputBigDecimal;
+
+    if (!bigDecimal.equals(BigDecimal.ZERO)) {
+      while (true) {
+        bigDecimal = bigDecimal.remainder(BigDecimal.valueOf(10).pow(precision - scale));
+        bigDecimal = bigDecimal.setScale(scale, BigDecimal.ROUND_DOWN);
+        if (!bigDecimal.unscaledValue().equals(BigInteger.ZERO)) {
+          break;
+        }
+        bigDecimal = randHiveBigDecimalNormalRange(r, standardAlphabet);
+      }
+    }
+
+    HiveDecimal dec = HiveDecimal.create(bigDecimal);
+    assertTrue(dec != null);
+    dec.validate();
+
+    HiveDecimalWritable decWritable = new HiveDecimalWritable(dec);
+
+    final long decimal64Long = decWritable.serialize64(scale);
+    assertTrue(decimal64Long <= HiveDecimalWritable.getDecimal64AbsMax(precision));
+    HiveDecimalWritable resultWritable = new HiveDecimalWritable(0);
+    resultWritable.deserialize64(decimal64Long, scale);
+
+    assertEquals(dec, resultWritable.getHiveDecimal());
+  }
+
+  //------------------------------------------------------------------------------------------------
+
   public static String displayBytes(byte[] bytes, int start, int length) {
     StringBuilder sb = new StringBuilder();
     for (int i = start; i < start + length; i++) {