You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/08/10 19:18:11 UTC
hive git commit: HIVE-17235: Add ORC Decimal64
Serialization/Deserialization (Part 1) (Matt McCline,
reviewed by Gopal Vijayaraghavan, Sergey Shelukhin, and Owen O'Malley)
Repository: hive
Updated Branches:
refs/heads/master 7860e7628 -> 637123a77
HIVE-17235: Add ORC Decimal64 Serialization/Deserialization (Part 1) (Matt McCline, reviewed by Gopal Vijayaraghavan, Sergey Shelukhin, and Owen O'Malley)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/637123a7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/637123a7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/637123a7
Branch: refs/heads/master
Commit: 637123a7783dadf0a2cf7c02217364d9dc4214c7
Parents: 7860e76
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Aug 10 14:18:05 2017 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Aug 10 14:18:05 2017 -0500
----------------------------------------------------------------------
.../hive/common/type/FastHiveDecimal.java | 18 +++++
.../hive/common/type/FastHiveDecimalImpl.java | 71 ++++++++++++++++++-
.../hive/serde2/io/HiveDecimalWritable.java | 58 ++++++++++++++++
.../hive/common/type/HiveDecimalTestBase.java | 10 +++
.../hive/common/type/TestHiveDecimal.java | 73 ++++++++++++++++++++
5 files changed, 229 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
index 7fa9fdf..4484ed2 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimal.java
@@ -273,6 +273,24 @@ public class FastHiveDecimal {
scratchLongs);
}
+ /*
+ * Deserializes 64-bit decimals up to the maximum 64-bit precision (18 decimal digits).
+ */
+ protected void fastDeserialize64(long decimalLong, int scale) {
+ FastHiveDecimalImpl.fastDeserialize64(
+ decimalLong, scale, this);
+ }
+
+ /*
+ * Serializes decimal64 up to the maximum 64-bit precision (18 decimal digits).
+ */
+ protected long fastSerialize64(int scale) {
+ return
+ FastHiveDecimalImpl.fastSerialize64(
+ scale,
+ fastSignum, fast1, fast0, fastScale);
+ }
+
// The fastBigIntegerBytes method returns 3 56 bit (7 byte) words and a possible sign byte.
// However, the fastBigIntegerBytes can take on trailing zeroes -- so make it larger.
protected static final int FAST_SCRATCH_BUFFER_LEN_BIG_INTEGER_BYTES = 1 + 48;
http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
index ef9cbcf..84ff1c6 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
@@ -136,7 +136,9 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
10000000000000L,
100000000000000L,
1000000000000000L,
- 10000000000000000L // 16
+ 10000000000000000L, // 16
+ 100000000000000000L,
+ 1000000000000000000L, // 18
};
public static final int MAX_DECIMAL_DIGITS = 38;
@@ -154,6 +156,9 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
private static final long MAX_LONGWORD_DECIMAL = powerOfTenTable[LONGWORD_DECIMAL_DIGITS] - 1;
private static final long MULTIPLER_LONGWORD_DECIMAL = powerOfTenTable[LONGWORD_DECIMAL_DIGITS];
+ public static final int DECIMAL64_DECIMAL_DIGITS = 18;
+ public static final long MAX_ABS_DECIMAL64 = 999999999999999999L; // 18 9's -- quite reliable!
+
private static final int TWO_X_LONGWORD_DECIMAL_DIGITS = 2 * LONGWORD_DECIMAL_DIGITS;
private static final int THREE_X_LONGWORD_DECIMAL_DIGITS = 3 * LONGWORD_DECIMAL_DIGITS;
private static final int FOUR_X_LONGWORD_DECIMAL_DIGITS = 4 * LONGWORD_DECIMAL_DIGITS;
@@ -2138,6 +2143,70 @@ public class FastHiveDecimalImpl extends FastHiveDecimal {
throw new RuntimeException("Unexpected");
}
+ public static long getDecimal64AbsMax(int precision) {
+ return powerOfTenTable[precision] - 1;
+ }
+
+ /*
+ * Deserializes 64-bit decimals up to the maximum 64-bit precision (18 decimal digits).
+ *
+ * NOTE: Major assumption: the input decimal64 has already been bounds checked and a least
+ * has a precision <= DECIMAL64_DECIMAL_DIGITS. We do not bounds check here for better
+ * performance.
+ */
+ public static void fastDeserialize64(
+ final long inputDecimal64Long, final int inputScale,
+ FastHiveDecimal fastResult) {
+
+ long decimal64Long;
+ if (inputDecimal64Long == 0) {
+ fastResult.fastReset();
+ return;
+ } else if (inputDecimal64Long > 0) {
+ fastResult.fastSignum = 1;
+ decimal64Long = inputDecimal64Long;
+ } else {
+ fastResult.fastSignum = -1;
+ decimal64Long = -inputDecimal64Long;
+ }
+
+ // Trim trailing zeroes -- but only below the decimal point.
+ int trimScale = inputScale;
+ while (trimScale > 0 && decimal64Long % 10 == 0) {
+ decimal64Long /= 10;
+ trimScale--;
+ }
+
+ fastResult.fast2 = 0;
+ fastResult.fast1 = decimal64Long / MULTIPLER_LONGWORD_DECIMAL;
+ fastResult.fast0 = decimal64Long % MULTIPLER_LONGWORD_DECIMAL;
+
+ fastResult.fastScale = trimScale;
+
+ fastResult.fastIntegerDigitCount =
+ Math.max(0, fastRawPrecision(fastResult) - fastResult.fastScale);
+ }
+
+ /*
+ * Serializes decimal64 up to the maximum 64-bit precision (18 decimal digits).
+ *
+ * NOTE: Major assumption: the fast decimal has already been bounds checked and a least
+ * has a precision <= DECIMAL64_DECIMAL_DIGITS. We do not bounds check here for better
+ * performance.
+ */
+ public static long fastSerialize64(
+ int scale,
+ int fastSignum, long fast1, long fast0, int fastScale) {
+
+ if (fastSignum == 0) {
+ return 0;
+ } else if (fastSignum == 1) {
+ return (fast1 * MULTIPLER_LONGWORD_DECIMAL + fast0) * powerOfTenTable[scale - fastScale];
+ } else {
+ return -(fast1 * MULTIPLER_LONGWORD_DECIMAL + fast0) * powerOfTenTable[scale - fastScale];
+ }
+ }
+
//************************************************************************************************
// Emulate BigInteger deserialization used by LazyBinary and others.
http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
index ffbe31a..617fb99 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
@@ -488,6 +488,64 @@ public final class HiveDecimalWritable extends FastHiveDecimal
scratchLongs);
}
+ /*
+ * Maximum number of decimal digits in a decimal64 long.
+ */
+ @HiveDecimalWritableVersionV2
+ public static final int DECIMAL64_DECIMAL_DIGITS = FastHiveDecimalImpl.DECIMAL64_DECIMAL_DIGITS;
+
+ /*
+ * Test whether a precision will fit within a decimal64 (64-bit signed long with <= 18 decimal
+ * digits).
+ */
+ @HiveDecimalWritableVersionV2
+ public static boolean isPrecisionDecimal64(int precision) {
+ return (precision <= DECIMAL64_DECIMAL_DIGITS);
+ }
+
+ /*
+ * Return the maximum absolute decimal64 value for a precision.
+ */
+ @HiveDecimalWritableVersionV2
+ public static long getDecimal64AbsMax(int precision) {
+ return FastHiveDecimalImpl.getDecimal64AbsMax(precision);
+ }
+
+ /*
+ * Deserializes 64-bit decimals up to the maximum 64-bit precision (18 decimal digits).
+ *
+ * NOTE: Major assumption: the input decimal64 has already been bounds checked and a least
+ * has a precision <= DECIMAL64_DECIMAL_DIGITS. We do not bounds check here for better
+ * performance. You can bounds check beforehand with:
+ * Math.abs(decimal64Long) <= getDecimal64AbsMax(precision)
+ */
+ @HiveDecimalWritableVersionV2
+ public void deserialize64(
+ long decimal64Long, int scale) {
+ fastDeserialize64(decimal64Long, scale);
+ isSet = true;
+ }
+
+ /*
+ * Serializes decimal64 up to the maximum 64-bit precision (18 decimal digits).
+ *
+ * NOTE: Major assumption: the fast decimal has already been bounds checked and a least
+ * has a precision <= DECIMAL64_DECIMAL_DIGITS. We do not bounds check here for better
+ * performance.
+ */
+ @HiveDecimalWritableVersionV2
+ public long serialize64(int scale) {
+ return fastSerialize64(scale);
+ }
+
+ @HiveDecimalWritableVersionV2
+ public boolean isValid() {
+ if (!isSet) {
+ return false;
+ }
+ return FastHiveDecimalImpl.fastIsValid(this);
+ }
+
/**
* Returns the length of the decimal converted to bytes.
* Call bigIntegerBytesBuffer() to get a reference to the converted bytes.
http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java b/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
index 553c456..9550b94 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/common/type/HiveDecimalTestBase.java
@@ -305,8 +305,14 @@ public class HiveDecimalTestBase {
"-9999999999999999",
"10000000000000000", // 10^16
"-10000000000000000",
+ "99999999999999999", // 10^17 - 1
+ "-99999999999999999",
"100000000000000000",
"-100000000000000000",
+ "999999999999999999", // 10^18 - 1
+ "-999999999999999999",
+ "123456789012345678",
+ "-123456789012345678",
"1000000000000000000",
"-1000000000000000000",
"9223372036854775807", // Long.MAX_VALUE
@@ -369,6 +375,10 @@ public class HiveDecimalTestBase {
"-0.9999999999999999",
"0.00000000000000001", // 10^-16
"-0.00000000000000001",
+ "0.99999999999999999",
+ "-0.99999999999999999",
+ "0.999999999999999999", // 10^-18
+ "-0.999999999999999999",
"0.00000000000000000000000000000001", // 10^-31
"-0.00000000000000000000000000000001",
"0.99999999999999999999999999999999", // 10^-32 + 1
http://git-wip-us.apache.org/repos/asf/hive/blob/637123a7/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java b/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
index f8a36e5..d11f41c 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
@@ -3394,6 +3394,79 @@ public class TestHiveDecimal extends HiveDecimalTestBase {
}
}
+ //------------------------------------------------------------------------------------------------
+
+ @Test
+ public void testRandomDecimal64() {
+ Random r = new Random(2497);
+ for (BigDecimalFlavor bigDecimalFlavor : BigDecimalFlavor.values()) {
+ doTestRandomDecimal64(r, standardAlphabet, bigDecimalFlavor);
+ }
+ for (BigDecimalFlavor bigDecimalFlavor : BigDecimalFlavor.values()) {
+ for (String sparseAlphabet : sparseAlphabets) {
+ doTestRandomDecimal64(r, sparseAlphabet, bigDecimalFlavor);
+ }
+ }
+ }
+
+ private void doTestRandomDecimal64(Random r, String digitAlphabet, BigDecimalFlavor bigDecimalFlavor) {
+
+ for (int i = 0; i < POUND_FACTOR; i++) {
+ BigDecimal bigDecimal = randHiveBigDecimal(r, digitAlphabet, bigDecimalFlavor);
+
+ doTestDecimal64(r, bigDecimal);
+ }
+ }
+
+ @Test
+ public void testDecimal64Special() {
+ Random r = new Random(198);
+ for (BigDecimal bigDecimal : specialBigDecimals) {
+ int precision = Math.min(bigDecimal.precision(), HiveDecimalWritable.DECIMAL64_DECIMAL_DIGITS);
+ int scale = Math.min(bigDecimal.scale(), precision);
+ doTestDecimal64(r, bigDecimal, precision, scale);
+ }
+ }
+
+ private void doTestDecimal64(Random r, BigDecimal inputBigDecimal) {
+ final int precision = 1 + r.nextInt(HiveDecimalWritable.DECIMAL64_DECIMAL_DIGITS);
+ assertTrue(HiveDecimalWritable.isPrecisionDecimal64(precision));
+ final int scale = r.nextInt(precision + 1);
+
+ doTestDecimal64(r, inputBigDecimal, precision, scale);
+ }
+
+ private void doTestDecimal64(Random r, BigDecimal inputBigDecimal, int precision, int scale) {
+
+ BigDecimal bigDecimal = inputBigDecimal;
+
+ if (!bigDecimal.equals(BigDecimal.ZERO)) {
+ while (true) {
+ bigDecimal = bigDecimal.remainder(BigDecimal.valueOf(10).pow(precision - scale));
+ bigDecimal = bigDecimal.setScale(scale, BigDecimal.ROUND_DOWN);
+ if (!bigDecimal.unscaledValue().equals(BigInteger.ZERO)) {
+ break;
+ }
+ bigDecimal = randHiveBigDecimalNormalRange(r, standardAlphabet);
+ }
+ }
+
+ HiveDecimal dec = HiveDecimal.create(bigDecimal);
+ assertTrue(dec != null);
+ dec.validate();
+
+ HiveDecimalWritable decWritable = new HiveDecimalWritable(dec);
+
+ final long decimal64Long = decWritable.serialize64(scale);
+ assertTrue(decimal64Long <= HiveDecimalWritable.getDecimal64AbsMax(precision));
+ HiveDecimalWritable resultWritable = new HiveDecimalWritable(0);
+ resultWritable.deserialize64(decimal64Long, scale);
+
+ assertEquals(dec, resultWritable.getHiveDecimal());
+ }
+
+ //------------------------------------------------------------------------------------------------
+
public static String displayBytes(byte[] bytes, int start, int length) {
StringBuilder sb = new StringBuilder();
for (int i = start; i < start + length; i++) {