You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/12/22 08:32:34 UTC
[05/10] hive git commit: HIVE-15335: Fast Decimal (Matt McCline,
reviewed by Sergey Shelukhin, Prasanth Jayachandran, Owen O'Malley)
http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
new file mode 100644
index 0000000..a4fed5d
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/FastHiveDecimalImpl.java
@@ -0,0 +1,9149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import java.util.Arrays;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.RoundingMode;
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * This class is a companion to the FastHiveDecimal class that separates the essential of code
+ * out of FastHiveDecimal into static methods in this class so that they can be used directly
+ * by vectorization to implement decimals by storing the fast0, fast1, and fast2 longs and
+ * the fastSignum, fastScale, etc ints in the DecimalColumnVector class.
+ */
+public class FastHiveDecimalImpl extends FastHiveDecimal {
+
+ /**
+ * Representation of fast decimals.
+ *
+ * We use 3 long words to store the 38 digits of fast decimals and and 3 integers for sign,
+ * integer digit count, and scale.
+ *
+ * The lower and middle long words store 16 decimal digits each; the high long word has
+ * 6 decimal digits; total 38 decimal digits.
+ *
+ * We do not try and represent fast decimal value as an unsigned 128 bit binary number in 2 longs.
+ * There are several important reasons for this.
+ *
+ * The effort to represent an unsigned 128 integer in 2 Java signed longs is very difficult,
+ * error prone, hard to debug, and not worth the effort.
+ *
+ * The focus here is on reusing memory (i.e. with HiveDecimalWritable) as often as possible.
+ * Reusing memory is good for grouping of fast decimal objects and related objects in CPU cache
+ * lines for fast memory access and eliminating the cost of allocating temporary objects and
+ * reducing the global cost of garbage collection.
+ *
+ * In other words, we are focused on avoiding the poor performance of Java general immutable
+ * objects.
+ *
+ * Reducing memory size or being concerned about the memory size of using 3 longs vs. 2 longs
+ * for 128 unsigned bits is not the focus here.
+ *
+ * Besides focusing on reusing memory, storing a limited number (16) decimal digits in the longs
+ * rather than compacting the value into all binary bits of 2 longs has a surprising benefit.
+ *
+ * One big part of implementing decimals turns out to be manipulating decimal digits.
+ *
+ * For example, rounding a decimal involves trimming off lower digits or clearing lower digits.
+ * Since radix 10 digits cannot be masked with binary masks, we use division and multiplication
+ * using powers of 10. We can easily manipulate the decimal digits in a long word using simple
+ * integer multiplication / division without doing emulated 128 binary bit multiplication /
+ * division (e.g. the defunct Decimal128 class).
+ *
+ * For example, say we want to scale (round) down the fraction digits of a decimal.
+ *
+ * final long divideFactor = powerOfTenTable[scaleDown];
+ * final long multiplyFactor = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - scaleDown];
+ *
+ * result0 =
+ * fast0 / divideFactor
+ * + ((fast1 % divideFactor) * multiplyFactor);
+ * result1 =
+ * fast1 / divideFactor
+ * + ((fast2 % divideFactor) * multiplyFactor);
+ * result2 =
+ * fast2 / divideFactor;
+ *
+ * It also turns out to do addition and subtraction of decimals with different scales can involve
+ * overlap using more than 3 long words. Manipulating extra words is a natural extension of
+ * the existing techniques.
+ *
+ * Why is the decimal digits representation easier to debug? You can see the decimal digits in
+ * the 3 long words and do not have to convert binary words to decimal to see the value.
+ *
+ * 16 decimal digits for a long was choose so that an int can have 1/2 or 8 decimal digits during
+ * multiplication of int half words so intermediate multiplication results do not overflow a long.
+ * And, so addition overflow is well below the sign bit of a long.
+ */
+
+ // Code Sections:
+ // Initialize (fastSetFrom*).
+ // Take Integer or Fractional Portion.
+ // Binary to Decimal Conversion.
+ // Decimal to Binary Conversion.r
+ // Emulate SerializationUtils Deserialization used by ORC.
+ // Emulate SerializationUtils Serialization used by ORC.
+ // Emulate BigInteger Deserialization used by LazyBinary and others.
+ // Emulate BigInteger Serialization used by LazyBinary and others.
+ // Decimal to Integer Conversion.
+ // Decimal to Non-Integer Conversion.
+ // Decimal Comparison.
+ // Decimal Rounding.
+ // Decimal Scale Up/Down.
+ // Decimal Precision / Trailing Zeroes.
+ // Decimal Addition / Subtraction.
+ // Decimal Multiply.
+ // Decimal Division / Remainder.
+ // Decimal String Formatting.
+ // Decimal Validation.
+ // Decimal Debugging.
+
+ private static final long[] powerOfTenTable = {
+ 1L, // 0
+ 10L,
+ 100L,
+ 1000L,
+ 10000L,
+ 100000L,
+ 1000000L,
+ 10000000L,
+ 100000000L, // 8
+ 1000000000L,
+ 10000000000L,
+ 100000000000L,
+ 1000000000000L,
+ 10000000000000L,
+ 100000000000000L,
+ 1000000000000000L,
+ 10000000000000000L // 16
+ };
+
+ public static final int MAX_DECIMAL_DIGITS = 38;
+
+ /**
+ * Int: 8 decimal digits. An even number and 1/2 of MAX_LONGWORD_DECIMAL.
+ */
+ private static final int INTWORD_DECIMAL_DIGITS = 8;
+ private static final int MAX_INTWORD_DECIMAL = (int) powerOfTenTable[INTWORD_DECIMAL_DIGITS] - 1;
+ private static final int MULTIPLER_INTWORD_DECIMAL = (int) powerOfTenTable[INTWORD_DECIMAL_DIGITS];
+
+ /**
+ * Long: 16 decimal digits. An even number and twice MAX_INTWORD_DECIMAL.
+ */
+ private static final int LONGWORD_DECIMAL_DIGITS = 16;
+ private static final long MAX_LONGWORD_DECIMAL = powerOfTenTable[LONGWORD_DECIMAL_DIGITS] - 1;
+ private static final long MULTIPLER_LONGWORD_DECIMAL = powerOfTenTable[LONGWORD_DECIMAL_DIGITS];
+
+ private static final int TWO_X_LONGWORD_DECIMAL_DIGITS = 2 * LONGWORD_DECIMAL_DIGITS;
+ private static final int THREE_X_LONGWORD_DECIMAL_DIGITS = 3 * LONGWORD_DECIMAL_DIGITS;
+ private static final int FOUR_X_LONGWORD_DECIMAL_DIGITS = 4 * LONGWORD_DECIMAL_DIGITS;
+
+ // 38 decimal maximum - 32 digits in 2 lower longs (6 digits here).
+ private static final int HIGHWORD_DECIMAL_DIGITS = MAX_DECIMAL_DIGITS - TWO_X_LONGWORD_DECIMAL_DIGITS;
+ private static final long MAX_HIGHWORD_DECIMAL =
+ powerOfTenTable[HIGHWORD_DECIMAL_DIGITS] - 1;
+
+ private static long HIGHWORD_DIVIDE_FACTOR = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - HIGHWORD_DECIMAL_DIGITS];
+ private static long HIGHWORD_MULTIPLY_FACTOR = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS];
+
+ // 38 * 2 or 76 full decimal maximum - (64 + 8) digits in 4 lower longs (4 digits here).
+ private static final long FULL_MAX_HIGHWORD_DECIMAL =
+ powerOfTenTable[MAX_DECIMAL_DIGITS * 2 - (FOUR_X_LONGWORD_DECIMAL_DIGITS + INTWORD_DECIMAL_DIGITS)] - 1;
+
+ /**
+ * BigInteger constants.
+ */
+
+ private static final BigInteger BIG_INTEGER_TWO = BigInteger.valueOf(2);
+ private static final BigInteger BIG_INTEGER_FIVE = BigInteger.valueOf(5);
+ private static final BigInteger BIG_INTEGER_TEN = BigInteger.valueOf(10);
+
+ public static final BigInteger BIG_INTEGER_MAX_DECIMAL =
+ BIG_INTEGER_TEN.pow(MAX_DECIMAL_DIGITS).subtract(BigInteger.ONE);
+
+ private static final BigInteger BIG_INTEGER_MAX_LONGWORD_DECIMAL =
+ BigInteger.valueOf(MAX_LONGWORD_DECIMAL);
+
+ private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER =
+ BigInteger.ONE.add(BIG_INTEGER_MAX_LONGWORD_DECIMAL);
+ private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_2X =
+ BIG_INTEGER_LONGWORD_MULTIPLIER.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER);
+ private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_3X =
+ BIG_INTEGER_LONGWORD_MULTIPLIER_2X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER);
+ private static final BigInteger BIG_INTEGER_LONGWORD_MULTIPLIER_4X =
+ BIG_INTEGER_LONGWORD_MULTIPLIER_3X.multiply(BIG_INTEGER_LONGWORD_MULTIPLIER);
+
+ private static final BigInteger BIG_INTEGER_MAX_HIGHWORD_DECIMAL =
+ BigInteger.valueOf(MAX_HIGHWORD_DECIMAL);
+ private static final BigInteger BIG_INTEGER_HIGHWORD_MULTIPLIER =
+ BigInteger.ONE.add(BIG_INTEGER_MAX_HIGHWORD_DECIMAL);
+
+ // UTF-8 byte constants used by string/UTF-8 bytes to decimal and decimal to String/UTF-8 byte
+ // conversion.
+
+ // There is only one blank in UTF-8.
+ private final static byte BYTE_BLANK = (byte) ' ';
+
+ private final static byte BYTE_DIGIT_ZERO = (byte) '0';
+ private final static byte BYTE_DIGIT_NINE = (byte) '9';
+
+ // Decimal point.
+ private final static byte BYTE_DOT = (byte) '.';
+
+ // Sign.
+ private final static byte BYTE_MINUS = (byte) '-';
+ private final static byte BYTE_PLUS = (byte) '+';
+
+ // Exponent E or e.
+ private final static byte BYTE_EXPONENT_LOWER = (byte) 'e';
+ private final static byte BYTE_EXPONENT_UPPER = (byte) 'E';
+
+ //************************************************************************************************
+ // Initialize (fastSetFrom*).
+
+ /*
+ * All of the fastSetFrom* methods require the caller to pass a fastResult parameter has been
+ * reset for better performance.
+ */
+
+ private static void doRaiseSetFromBytesInvalid(
+ byte[] bytes, int offset, int length,
+ FastHiveDecimal fastResult) {
+ final int end = offset + length;
+ throw new RuntimeException(
+ "Invalid fast decimal \"" +
+ new String(bytes, offset, end) + "\"" +
+ " fastSignum " + fastResult.fastSignum + " fast0 " + fastResult.fast0 + " fast1 " + fastResult.fast1 + " fast2 " + fastResult.fast2 +
+ " fastIntegerDigitCount " + fastResult.fastIntegerDigitCount +" fastScale " + fastResult.fastScale +
+ " stack trace: " + getStackTraceAsSingleLine(Thread.currentThread().getStackTrace()));
+ }
+
+ /**
+ * Scan a byte array slice for a decimal number in UTF-8 bytes.
+ *
+ * Syntax:
+ * [+|-][integerPortion][.[fractionalDigits]][{E|e}[+|-]exponent]
+ * // Where at least one integer or fractional
+ * // digit is required...
+ *
+ * We handle too many fractional digits by doing rounding ROUND_HALF_UP.
+ *
+ * NOTE: The fastSetFromBytes method requires the caller to pass a fastResult parameter has been
+ * reset for better performance.
+ *
+ * @param fastResult True if the byte array slice was successfully converted to a decimal.
+ * @return
+ */
+ public static boolean fastSetFromBytes(byte[] bytes, int offset, int length, boolean trimBlanks,
+ FastHiveDecimal fastResult) {
+
+ final int bytesLength = bytes.length;
+
+ if (offset < 0 || offset >= bytesLength) {
+ return false;
+ }
+ final int end = offset + length;
+ if (end <= offset || end > bytesLength) {
+ return false;
+ }
+
+ // We start here with at least one byte.
+ int index = offset;
+
+ if (trimBlanks) {
+ while (bytes[index] == BYTE_BLANK) {
+ if (++index >= end) {
+ return false;
+ }
+ }
+ }
+
+ // Started with a few ideas from BigDecimal(char[] in, int offset, int len) constructor...
+ // But soon became very fast decimal specific.
+
+ boolean isNegative = false;
+ if (bytes[index] == BYTE_MINUS) {
+ isNegative = true;
+ if (++index >= end) {
+ return false;
+ }
+ } else if (bytes[index] == BYTE_PLUS) {
+ if (++index >= end) {
+ return false;
+ }
+ }
+
+ int precision = 0;
+
+ // We fill starting with highest digit in highest longword (HIGHWORD_DECIMAL_DIGITS) and
+ // move down. At end will will shift everything down if necessary.
+
+ int longWordIndex = 0; // Where 0 is the highest longword; 1 is middle longword, etc.
+
+ int digitNum = HIGHWORD_DECIMAL_DIGITS;
+ long multiplier = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS - 1];
+
+ int digitValue = 0;
+ long longWord = 0;
+
+ long fast0 = 0;
+ long fast1 = 0;
+ long fast2 = 0;
+
+ byte work;
+
+ // Parse integer portion.
+
+ boolean haveInteger = false;
+ while (true) {
+ work = bytes[index];
+ if (work < BYTE_DIGIT_ZERO || work > BYTE_DIGIT_NINE) {
+ break;
+ }
+ haveInteger = true;
+ if (precision == 0 && work == BYTE_DIGIT_ZERO) {
+ // Ignore leading zeroes.
+ if (++index >= end) {
+ break;
+ }
+ continue;
+ }
+ digitValue = work - BYTE_DIGIT_ZERO;
+ if (digitNum == 0) {
+
+ // Integer parsing move to next lower longword.
+
+ // Save previous longword.
+ if (longWordIndex == 0) {
+ fast2 = longWord;
+ } else if (longWordIndex == 1) {
+ fast1 = longWord;
+ } else if (longWordIndex == 2) {
+
+ // We have filled HiveDecimal.MAX_PRECISION digits and have no more room in our limit precision
+ // fast decimal.
+ return false;
+ }
+ longWordIndex++;
+
+ // The middle and lowest longwords highest digit number is LONGWORD_DECIMAL_DIGITS.
+ digitNum = LONGWORD_DECIMAL_DIGITS;
+ multiplier = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - 1];
+ longWord = 0;
+ }
+ longWord += digitValue * multiplier;
+ multiplier /= 10;
+ digitNum--;
+ precision++;
+ if (++index >= end) {
+ break;
+ }
+ }
+
+ // At this point we may have parsed an integer.
+
+ // Try to eat a dot now since it could be the end. We remember if we saw a dot so we can
+ // do error checking later and detect just a dot.
+ boolean sawDot = false;
+ if (index < end && bytes[index] == BYTE_DOT) {
+ sawDot = true;
+ index++;
+ }
+
+ // Try to eat trailing blank padding.
+ if (trimBlanks && index < end && bytes[index] == BYTE_BLANK) {
+ index++;
+ while (index < end && bytes[index] == BYTE_BLANK) {
+ index++;
+ }
+ if (index < end) {
+ // Junk after trailing blank padding.
+ return false;
+ }
+ // Otherwise, fall through and process the what we saw before possible trailing blanks.
+ }
+
+ // Any more input?
+ if (index >= end) {
+
+ // We hit the end after getting optional integer and optional dot and optional blank padding.
+
+ if (!haveInteger) {
+ return false;
+ }
+
+ if (precision == 0) {
+
+ // We just had leading zeroes (and possibly a dot and trailing blanks).
+ // Value is 0.
+ return true;
+ }
+ // Save last longword.
+ if (longWordIndex == 0) {
+ fast2 = longWord;
+ } else if (longWordIndex == 1) {
+ fast1 = longWord;
+ } else {
+ fast0 = longWord;
+ }
+ fastResult.fastSignum = (isNegative ? -1 : 1);
+ fastResult.fastIntegerDigitCount = precision;
+ fastResult.fastScale = 0;
+ final int scaleDown = HiveDecimal.MAX_PRECISION - precision;
+ if (scaleDown > 0) {
+ doFastScaleDown(fast0, fast1, fast2, scaleDown, fastResult);
+ } else {
+ fastResult.fast0 = fast0;
+ fastResult.fast1 = fast1;
+ fastResult.fast2 = fast2;
+ }
+ return true;
+ }
+
+ // We have more input but did we start with something valid?
+ if (!haveInteger && !sawDot) {
+
+ // Must have one of those at this point.
+ return false;
+ }
+
+ int integerDigitCount = precision;
+
+ int nonTrailingZeroScale = 0;
+ boolean roundingNecessary = false;
+ if (sawDot) {
+
+ // Parse fraction portion.
+
+ while (true) {
+ work = bytes[index];
+ if (work < BYTE_DIGIT_ZERO || work > BYTE_DIGIT_NINE) {
+ if (!haveInteger) {
+
+ // Naked dot.
+ return false;
+ }
+ break;
+ }
+ digitValue = work - BYTE_DIGIT_ZERO;
+ if (digitNum == 0) {
+
+ // Fraction digit parsing move to next lower longword.
+
+ // Save previous longword.
+ if (longWordIndex == 0) {
+ fast2 = longWord;
+ } else if (longWordIndex == 1) {
+ fast1 = longWord;
+ } else if (longWordIndex == 2) {
+
+ // We have filled HiveDecimal.MAX_PRECISION digits and have no more room in our limit precision
+ // fast decimal. However, since we are processing fractional digits, we do rounding.
+ // away.
+ if (digitValue >= 5) {
+ roundingNecessary = true;
+ }
+
+ // Scan through any remaining digits...
+ while (++index < end) {
+ work = bytes[index];
+ if (work < BYTE_DIGIT_ZERO || work > BYTE_DIGIT_NINE) {
+ break;
+ }
+ }
+ break;
+ }
+ longWordIndex++;
+ digitNum = LONGWORD_DECIMAL_DIGITS;
+ multiplier = powerOfTenTable[digitNum - 1];
+ longWord = 0;
+ }
+ longWord += digitValue * multiplier;
+ multiplier /= 10;
+ digitNum--;
+ precision++;
+ if (digitValue != 0) {
+ nonTrailingZeroScale = precision - integerDigitCount;
+ }
+ if (++index >= end) {
+ break;
+ }
+ }
+ }
+
+ boolean haveExponent = false;
+ if (index < end &&
+ (bytes[index] == BYTE_EXPONENT_UPPER || bytes[index] == BYTE_EXPONENT_LOWER)) {
+ haveExponent = true;
+ index++;
+ if (index >= end) {
+ // More required.
+ return false;
+ }
+ }
+
+ // At this point we have a number. Save it in fastResult. Round it. If we have an exponent,
+ // we will do a power 10 operation on fastResult.
+
+ // Save last longword.
+ if (longWordIndex == 0) {
+ fast2 = longWord;
+ } else if (longWordIndex == 1) {
+ fast1 = longWord;
+ } else {
+ fast0 = longWord;
+ }
+
+ int trailingZeroesScale = precision - integerDigitCount;
+ if (integerDigitCount == 0 && nonTrailingZeroScale == 0) {
+ // Zero(es).
+ } else {
+ fastResult.fastSignum = (isNegative ? -1 : 1);
+ fastResult.fastIntegerDigitCount = integerDigitCount;
+ fastResult.fastScale = nonTrailingZeroScale;
+ final int trailingZeroCount = trailingZeroesScale - fastResult.fastScale;
+ final int scaleDown = HiveDecimal.MAX_PRECISION - precision + trailingZeroCount;
+ if (scaleDown > 0) {
+ doFastScaleDown(fast0, fast1, fast2, scaleDown, fastResult);
+ } else {
+ fastResult.fast0 = fast0;
+ fastResult.fast1 = fast1;
+ fastResult.fast2 = fast2;
+ }
+ }
+
+ if (roundingNecessary) {
+
+ if (fastResult.fastSignum == 0) {
+ fastResult.fastSignum = (isNegative ? -1 : 1);
+ fastResult.fast0 = 1;
+ fastResult.fastIntegerDigitCount = 0;
+ fastResult.fastScale = HiveDecimal.MAX_SCALE;
+ } else {
+ if (!fastAdd(
+ fastResult.fastSignum, fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ fastResult.fastIntegerDigitCount, fastResult.fastScale,
+ fastResult.fastSignum, 1, 0, 0, 0, trailingZeroesScale,
+ fastResult)) {
+ return false;
+ }
+ }
+ }
+
+ if (!haveExponent) {
+
+ // Try to eat trailing blank padding.
+ if (trimBlanks && index < end && bytes[index] == BYTE_BLANK) {
+ index++;
+ while (index < end && bytes[index] == BYTE_BLANK) {
+ index++;
+ }
+ }
+ if (index < end) {
+ // Junk after trailing blank padding.
+ return false;
+ }
+ return true;
+ }
+
+ // At this point, we have seen the exponent letter E or e and have decimal information as:
+ // isNegative, precision, integerDigitCount, nonTrailingZeroScale, and
+ // fast0, fast1, fast2.
+ //
+ // After we determine the exponent, we will do appropriate scaling and fill in fastResult.
+
+ boolean isExponentNegative = false;
+ if (bytes[index] == BYTE_MINUS) {
+ isExponentNegative = true;
+ if (++index >= end) {
+ return false;
+ }
+ } else if (bytes[index] == BYTE_PLUS) {
+ if (++index >= end) {
+ return false;
+ }
+ }
+
+ long exponent = 0;
+ multiplier = 1;
+ while (true) {
+ work = bytes[index];
+ if (work < BYTE_DIGIT_ZERO || work > BYTE_DIGIT_NINE) {
+ break;
+ }
+ if (multiplier > 10) {
+ // Power of ten way beyond our precision/scale...
+ return false;
+ }
+ digitValue = work - BYTE_DIGIT_ZERO;
+ if (digitValue != 0 || exponent != 0) {
+ exponent = exponent * 10 + digitValue;
+ multiplier *= 10;
+ }
+ if (++index >= end) {
+ break;
+ }
+ }
+ if (isExponentNegative) {
+ exponent = -exponent;
+ }
+
+ // Try to eat trailing blank padding.
+ if (trimBlanks && index < end && bytes[index] == BYTE_BLANK) {
+ index++;
+ while (index < end && bytes[index] == BYTE_BLANK) {
+ index++;
+ }
+ }
+ if (index < end) {
+ // Junk after exponent.
+ return false;
+ }
+
+
+ if (integerDigitCount == 0 && nonTrailingZeroScale == 0) {
+ // Zero(es).
+ return true;
+ }
+
+ if (exponent == 0) {
+
+ // No effect since 10^0 = 1.
+
+ } else {
+
+ // We for these input with exponents, we have at this point an intermediate decimal,
+ // an exponent power, and a result:
+ //
+ // intermediate
+ // input decimal exponent result
+ // 701E+1 701 scale 0 +1 7010 scale 0
+ // 3E+4 3 scale 0 +4 3 scale 0
+ // 3.223E+9 3.223 scale 3 +9 3223000000 scale 0
+ // 0.009E+10 0.009 scale 4 +10 90000000 scale 0
+ // 0.3221E-2 0.3221 scale 4 -2 0.003221 scale 6
+ // 0.00223E-20 0.00223 scale 5 -20 0.0000000000000000000000223 scale 25
+ //
+
+ if (!fastScaleByPowerOfTen(
+ fastResult,
+ (int) exponent,
+ fastResult)) {
+ return false;
+ }
+ }
+
+ final int trailingZeroCount =
+ fastTrailingDecimalZeroCount(
+ fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ fastResult.fastIntegerDigitCount, fastResult.fastScale);
+ if (trailingZeroCount > 0) {
+ doFastScaleDown(
+ fastResult,
+ trailingZeroCount,
+ fastResult);
+ fastResult.fastScale -= trailingZeroCount;
+ }
+
+ return true;
+ }
+
+ /**
+ * Scans a byte array slice for UNSIGNED RAW DIGITS ONLY in UTF-8 (ASCII) characters
+ * and forms a decimal from the digits and a sign and scale.
+ *
+ * Designed for BinarySortable serialization format that separates the sign and scale
+ * from the raw digits.
+ *
+ * NOTE: The fastSetFromDigitsOnlyBytesAndScale method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ * @return True if the sign, digits, and scale were successfully converted to a decimal.
+ */
+ public static boolean fastSetFromDigitsOnlyBytesAndScale(
+ boolean isNegative, byte[] bytes, int offset, int length, int scale,
+ FastHiveDecimal fastResult) {
+
+ final int bytesLength = bytes.length;
+
+ if (offset < 0 || offset >= bytesLength) {
+ return false;
+ }
+ final int end = offset + length;
+ if (end <= offset || end > bytesLength) {
+ return false;
+ }
+
+ // We start here with at least one byte.
+ int index = offset;
+
+ // A stripped down version of fastSetFromBytes.
+
+ int precision = 0;
+
+ // We fill starting with highest digit in highest longword (HIGHWORD_DECIMAL_DIGITS) and
+ // move down. At end will will shift everything down if necessary.
+
+ int longWordIndex = 0; // Where 0 is the highest longword; 1 is middle longword, etc.
+
+ int digitNum = HIGHWORD_DECIMAL_DIGITS;
+ long multiplier = powerOfTenTable[HIGHWORD_DECIMAL_DIGITS - 1];
+
+ int digitValue;
+ long longWord = 0;
+
+ long fast0 = 0;
+ long fast1 = 0;
+ long fast2 = 0;
+
+ byte work;
+
+ // Parse digits.
+
+ boolean haveInteger = false;
+ while (true) {
+ work = bytes[index];
+ if (work < BYTE_DIGIT_ZERO || work > BYTE_DIGIT_NINE) {
+ if (!haveInteger) {
+ return false;
+ }
+ break;
+ }
+ haveInteger = true;
+ if (precision == 0 && work == BYTE_DIGIT_ZERO) {
+ // Ignore leading zeroes.
+ if (++index >= end) {
+ break;
+ }
+ continue;
+ }
+ digitValue = work - BYTE_DIGIT_ZERO;
+ if (digitNum == 0) {
+
+ // Integer parsing move to next lower longword.
+
+ // Save previous longword.
+ if (longWordIndex == 0) {
+ fast2 = longWord;
+ } else if (longWordIndex == 1) {
+ fast1 = longWord;
+ } else if (longWordIndex == 2) {
+
+ // We have filled HiveDecimal.MAX_PRECISION digits and have no more room in our limit precision
+ // fast decimal.
+ return false;
+ }
+ longWordIndex++;
+
+ // The middle and lowest longwords highest digit number is LONGWORD_DECIMAL_DIGITS.
+ digitNum = LONGWORD_DECIMAL_DIGITS;
+ multiplier = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - 1];
+ longWord = 0;
+ }
+ longWord += digitValue * multiplier;
+ multiplier /= 10;
+ digitNum--;
+ precision++;
+ if (++index >= end) {
+ break;
+ }
+ }
+
+ // Just an digits?
+ if (index < end) {
+ return false;
+ }
+
+ if (precision == 0) {
+ // We just had leading zeroes.
+ // Value is 0.
+ return true;
+ }
+
+ // Save last longword.
+ if (longWordIndex == 0) {
+ fast2 = longWord;
+ } else if (longWordIndex == 1) {
+ fast1 = longWord;
+ } else {
+ fast0 = longWord;
+ }
+ fastResult.fastSignum = (isNegative ? -1 : 1);
+ fastResult.fastIntegerDigitCount = Math.max(0, precision - scale);
+ fastResult.fastScale = scale;
+ final int scaleDown = HiveDecimal.MAX_PRECISION - precision;
+ if (scaleDown > 0) {
+ doFastScaleDown(fast0, fast1, fast2, scaleDown, fastResult);
+ } else {
+ fastResult.fast0 = fast0;
+ fastResult.fast1 = fast1;
+ fastResult.fast2 = fast2;
+ }
+ return true;
+
+ }
+
+ /**
+ * Scale down a BigInteger by a power of 10 and round off if necessary using ROUND_HALF_UP.
+ * @return The scaled and rounded BigInteger.
+ */
+ private static BigInteger doBigIntegerScaleDown(BigInteger unscaledValue, int scaleDown) {
+ BigInteger[] quotientAndRemainder = unscaledValue.divideAndRemainder(BigInteger.TEN.pow(scaleDown));
+ BigInteger quotient = quotientAndRemainder[0];
+ BigInteger round = quotientAndRemainder[1].divide(BigInteger.TEN.pow(scaleDown - 1));
+ if (round.compareTo(BIG_INTEGER_FIVE) >= 0) {
+ quotient = quotient.add(BigInteger.ONE);
+ }
+ return quotient;
+ }
+
+ /**
+ * Create a fast decimal from a BigDecimal.
+ *
+ * NOTE: The fastSetFromBigDecimal method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ * @return True if the BigDecimal could be converted to our decimal representation.
+ */
+ public static boolean fastSetFromBigDecimal(
+ BigDecimal bigDecimal, boolean allowRounding, FastHiveDecimal fastResult) {
+
+ // We trim the trailing zero fraction digits so we don't cause unnecessary precision
+ // overflow later.
+ if (bigDecimal.signum() == 0) {
+ if (bigDecimal.scale() != 0) {
+
+ // For some strange reason BigDecimal 0 can have a scale. We do not support that.
+ bigDecimal = BigDecimal.ZERO;
+ }
+ } else {
+ BigDecimal bigDecimalStripped = bigDecimal.stripTrailingZeros();
+ int stripTrailingZerosScale = bigDecimalStripped.scale();
+ // System.out.println("FAST_SET_FROM_BIG_DECIMAL bigDecimal " + bigDecimal);
+ // System.out.println("FAST_SET_FROM_BIG_DECIMAL bigDecimalStripped " + bigDecimalStripped);
+ // System.out.println("FAST_SET_FROM_BIG_DECIMAL stripTrailingZerosScale " + stripTrailingZerosScale);
+ if (stripTrailingZerosScale < 0) {
+
+ // The trailing zeroes extend into the integer part -- we only want to eliminate the
+ // fractional zero digits.
+
+ bigDecimal = bigDecimal.setScale(0);
+ } else {
+
+ // Ok, use result with some or all fractional digits stripped.
+
+ bigDecimal = bigDecimalStripped;
+ }
+ }
+ // System.out.println("FAST_SET_FROM_BIG_DECIMAL adjusted for zeroes/scale " + bigDecimal + " scale " + bigDecimal.scale());
+
+ BigInteger unscaledValue = bigDecimal.unscaledValue();
+ // System.out.println("FAST_SET_FROM_BIG_DECIMAL unscaledValue " + unscaledValue + " length " + unscaledValue.toString().length());
+
+ final int scale = bigDecimal.scale();
+ if (!allowRounding) {
+ if (scale < 0 || scale > HiveDecimal.MAX_SCALE) {
+ return false;
+ }
+ // The digits must fit without rounding.
+ if (!fastSetFromBigInteger(unscaledValue, fastResult)) {
+ return false;
+ }
+ if (fastResult.fastSignum != 0) {
+ fastResult.fastIntegerDigitCount = Math.max(0, fastResult.fastIntegerDigitCount - scale);
+ fastResult.fastScale = scale;
+ }
+ return true;
+ }
+ // This method will scale down and round to fit, if necessary.
+ if (!fastSetFromBigInteger(unscaledValue, scale, fastResult)) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Scan a String for a decimal number in UTF-8 characters.
+ *
+ * NOTE: The fastSetFromString method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ * @return True if the String was successfully converted to a decimal.
+ */
+ public static boolean fastSetFromString(
+ String string, boolean trimBlanks, FastHiveDecimal result) {
+ byte[] bytes = string.getBytes();
+ return fastSetFromBytes(bytes, 0, bytes.length, trimBlanks, result);
+ }
+
+ /**
+ * Creates a scale 0 fast decimal from an int.
+ *
+ * NOTE: The fastSetFromString method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ */
+ public static void fastSetFromInt(int intValue, FastHiveDecimal fastResult) {
+ if (intValue == 0) {
+ // Zero special case.
+ return;
+ }
+ if (intValue > 0) {
+ fastResult.fastSignum = 1;
+ } else {
+ fastResult.fastSignum = -1;
+ intValue = Math.abs(intValue);
+ }
+ // 10 digit int is all in lowest 16 decimal digit longword.
+ // Since we are creating with scale 0, no fraction digits to zero trim.
+ fastResult.fast0 = intValue & 0xFFFFFFFFL;
+ fastResult.fastIntegerDigitCount =
+ fastLongWordPrecision(fastResult.fast0);
+ }
+
+ /**
+ * Creates a scale 0 fast decimal from a long.
+ *
+ * NOTE: The fastSetFromLong method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ */
+ public static void fastSetFromLong(
+ long longValue, FastHiveDecimal fastResult) {
+ if (longValue == 0) {
+ // Zero special case.
+ return;
+ }
+ // Handle minimum integer case that doesn't have abs().
+ if (longValue == Long.MIN_VALUE) {
+ // Split -9,223,372,036,854,775,808 into 16 digit middle and lowest longwords by hand.
+ fastResult.fastSignum = -1;
+ fastResult.fast1 = 922L;
+ fastResult.fast0 = 3372036854775808L;
+ fastResult.fastIntegerDigitCount = 19;
+ } else {
+ if (longValue > 0) {
+ fastResult.fastSignum = 1;
+ } else {
+ fastResult.fastSignum = -1;
+ longValue = Math.abs(longValue);
+ }
+ // Split into 16 digit middle and lowest longwords remainder / division.
+ fastResult.fast1 = longValue / MULTIPLER_LONGWORD_DECIMAL;
+ fastResult.fast0 = longValue % MULTIPLER_LONGWORD_DECIMAL;
+ if (fastResult.fast1 != 0) {
+ fastResult.fastIntegerDigitCount =
+ LONGWORD_DECIMAL_DIGITS + fastLongWordPrecision(fastResult.fast1);
+ } else {
+ fastResult.fastIntegerDigitCount =
+ fastLongWordPrecision(fastResult.fast0);
+ }
+ }
+ return;
+ }
+
+ /**
+ * Creates a fast decimal from a long with a specified scale.
+ *
+ * NOTE: The fastSetFromLongAndScale method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ */
+ public static boolean fastSetFromLongAndScale(
+ long longValue, int scale, FastHiveDecimal fastResult) {
+
+ if (scale < 0 || scale > HiveDecimal.MAX_SCALE) {
+ return false;
+ }
+
+ fastSetFromLong(longValue, fastResult);
+ if (scale == 0) {
+ return true;
+ }
+
+ if (!fastScaleByPowerOfTen(
+ fastResult,
+ -scale,
+ fastResult)) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Creates fast decimal from a float.
+ *
+ * NOTE: The fastSetFromFloat method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ */
+ public static boolean fastSetFromFloat(
+ float floatValue, FastHiveDecimal fastResult) {
+
+ String floatString = Float.toString(floatValue);
+ return fastSetFromString(floatString, false, fastResult);
+
+ }
+
+ /**
+ * Creates fast decimal from a double.
+ *
+ * NOTE: The fastSetFromDouble method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ */
+ public static boolean fastSetFromDouble(
+ double doubleValue, FastHiveDecimal fastResult) {
+
+ String doubleString = Double.toString(doubleValue);
+ return fastSetFromString(doubleString, false, fastResult);
+
+ }
+
+ /**
+ * Creates a fast decimal from a BigInteger with scale 0.
+ *
+ * For efficiency, we assume that fastResult is fastReset. This method does not set the
+ * fastScale field.
+ *
+ * NOTE: The fastSetFromBigInteger method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ * @return Return true if the BigInteger value fit within HiveDecimal.MAX_PRECISION. Otherwise,
+ * false for overflow.
+ */
+ public static boolean fastSetFromBigInteger(
+ BigInteger bigInteger, FastHiveDecimal fastResult) {
+
+ final int signum = bigInteger.signum();
+ if (signum == 0) {
+ // Zero special case.
+ return true;
+ }
+ fastResult.fastSignum = signum;
+ if (signum == -1) {
+ bigInteger = bigInteger.negate();
+ }
+ if (bigInteger.compareTo(BIG_INTEGER_LONGWORD_MULTIPLIER) < 0) {
+
+ // Fits in one longword.
+ fastResult.fast0 = bigInteger.longValue();
+ if (fastResult.fast0 == 0) {
+ fastResult.fastSignum = 0;
+ } else {
+ fastResult.fastIntegerDigitCount = fastLongWordPrecision(fastResult.fast0);
+ }
+ return true;
+ }
+ BigInteger[] quotientAndRemainder =
+ bigInteger.divideAndRemainder(BIG_INTEGER_LONGWORD_MULTIPLIER);
+ fastResult.fast0 = quotientAndRemainder[1].longValue();
+ BigInteger quotient = quotientAndRemainder[0];
+ if (quotient.compareTo(BIG_INTEGER_LONGWORD_MULTIPLIER) < 0) {
+
+ // Fits in two longwords.
+ fastResult.fast1 = quotient.longValue();
+ if (fastResult.fast0 == 0 && fastResult.fast1 == 0) {
+ // The special case zero logic at the beginning should have caught this.
+ throw new RuntimeException("Unexpected");
+ } else {
+ fastResult.fastIntegerDigitCount =
+ LONGWORD_DECIMAL_DIGITS + fastLongWordPrecision(fastResult.fast1);
+ }
+ return true;
+ }
+
+ // Uses all 3 decimal longs.
+ quotientAndRemainder =
+ quotient.divideAndRemainder(BIG_INTEGER_LONGWORD_MULTIPLIER);
+ fastResult.fast1 = quotientAndRemainder[1].longValue();
+ quotient = quotientAndRemainder[0];
+ if (quotient.compareTo(BIG_INTEGER_HIGHWORD_MULTIPLIER) >= 0) {
+ // Overflow.
+ return false;
+ }
+ fastResult.fast2 = quotient.longValue();
+ if (fastResult.fast0 == 0 && fastResult.fast1 == 0 && fastResult.fast2 == 0) {
+ fastResult.fastSignum = 0;
+ } else {
+ fastResult.fastIntegerDigitCount =
+ TWO_X_LONGWORD_DECIMAL_DIGITS + fastHighWordPrecision(fastResult.fast2);
+ }
+ return true;
+ }
+
+ /**
+ * Creates a fast decimal from a BigInteger with a specified scale.
+ *
+ * NOTE: The fastSetFromBigInteger method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ *
+ * @return True if the BigInteger and scale were successfully converted to a decimal.
+ */
+ public static boolean fastSetFromBigInteger(
+ BigInteger bigInteger, int scale, FastHiveDecimal fastResult) {
+
+ if (scale < 0) {
+
+ // Multiply by 10^(-scale) to normalize. We do not use negative scale in our representation.
+ //
+ // Example:
+ // 4.172529E+20 has a negative scale -20 since scale is number of digits below the dot.
+ // 417252900000000000000 normalized as scale 0.
+ //
+ bigInteger = bigInteger.multiply(BIG_INTEGER_TEN.pow(-scale));
+ scale = 0;
+ }
+
+ int signum = bigInteger.signum();
+ if (signum == 0) {
+ // Zero.
+ return true;
+ } else if (signum == -1) {
+ // Normalize to positive.
+ bigInteger = bigInteger.negate();
+ }
+
+ // A slow way to get the number of decimal digits.
+ int precision = bigInteger.toString().length();
+
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER adjusted bigInteger " + bigInteger + " precision " + precision);
+
+ int integerDigitCount = precision - scale;
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER integerDigitCount " + integerDigitCount + " scale " + scale);
+ int maxScale;
+ if (integerDigitCount >= 0) {
+ if (integerDigitCount > HiveDecimal.MAX_PRECISION) {
+ return false;
+ }
+ maxScale = HiveDecimal.MAX_SCALE - integerDigitCount;
+ } else {
+ maxScale = HiveDecimal.MAX_SCALE;
+ }
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER maxScale " + maxScale);
+
+ if (scale > maxScale) {
+
+ // A larger scale is ok -- we will knock off lower digits and round.
+
+ final int trimAwayCount = scale - maxScale;
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER trimAwayCount " + trimAwayCount);
+ if (trimAwayCount > 1) {
+ // First, throw away digits below round digit.
+ BigInteger bigIntegerThrowAwayBelowRoundDigitDivisor = BIG_INTEGER_TEN.pow(trimAwayCount - 1);
+ bigInteger = bigInteger.divide(bigIntegerThrowAwayBelowRoundDigitDivisor);
+ }
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER with round digit bigInteger " + bigInteger + " length " + bigInteger.toString().length());
+
+ BigInteger[] quotientAndRemainder = bigInteger.divideAndRemainder(BIG_INTEGER_TEN);
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER quotientAndRemainder " + Arrays.toString(quotientAndRemainder));
+
+ BigInteger quotient = quotientAndRemainder[0];
+ if (quotientAndRemainder[1].intValue() >= 5) {
+ if (quotient.equals(BIG_INTEGER_MAX_DECIMAL)) {
+
+ // 38 9's digits.
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER quotient is BIG_INTEGER_MAX_DECIMAL");
+
+ if (maxScale == 0) {
+ // No room above for rounding.
+ return false;
+ }
+
+ // System.out.println("FAST_SET_FROM_BIG_INTEGER reached here... scale " + scale + " maxScale " + maxScale);
+ // Rounding results in 10^N.
+ bigInteger = BIG_INTEGER_TEN.pow(integerDigitCount);
+ maxScale = 0;
+ } else {
+
+ // Round up.
+ bigInteger = quotient.add(BigInteger.ONE);
+ }
+ } else {
+
+ // No rounding.
+ bigInteger = quotient;
+ }
+ scale = maxScale;
+ }
+ if (!fastSetFromBigInteger(bigInteger, fastResult)) {
+ return false;
+ }
+
+ if (fastResult.fast0 == 0 && fastResult.fast1 == 0 && fastResult.fast2 == 0) {
+ fastResult.fastSignum = 0;
+ } else {
+ fastResult.fastSignum = signum;
+ fastResult.fastIntegerDigitCount = Math.max(0, fastResult.fastIntegerDigitCount - scale);
+ fastResult.fastScale = scale;
+
+ final int trailingZeroCount =
+ fastTrailingDecimalZeroCount(
+ fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ fastResult.fastIntegerDigitCount, scale);
+ if (trailingZeroCount > 0) {
+ doFastScaleDown(
+ fastResult,
+ trailingZeroCount,
+ fastResult);
+ fastResult.fastScale -= trailingZeroCount;
+ }
+ }
+
+ return true;
+ }
+
+ //************************************************************************************************
+ // Take Integer or Fractional Portion.
+
+ /**
+ * Creates fast decimal from the fraction portion of a fast decimal.
+ *
+ * NOTE: The fastFractionPortion method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ */
+ public static void fastFractionPortion(
+ int fastSignum, long fast0, long fast1, long fast2,
+ int fastIntegerDigitCount, int fastScale,
+ FastHiveDecimal fastResult) {
+
+ if (fastSignum == 0 || fastScale == 0) {
+ fastResult.fastReset();
+ return;
+ }
+
+ // Clear integer portion; keep fraction.
+
+ // Adjust all longs using power 10 division/remainder.
+ long result0;
+ long result1;
+ long result2;
+ if (fastScale < LONGWORD_DECIMAL_DIGITS) {
+
+ // Part of lowest word survives.
+
+ final long clearFactor = powerOfTenTable[fastScale];
+
+ result0 = fast0 % clearFactor;
+ result1 = 0;
+ result2 = 0;
+
+ } else if (fastScale < TWO_X_LONGWORD_DECIMAL_DIGITS) {
+
+ // Throw away lowest word.
+
+ final int adjustedScaleDown = fastScale - LONGWORD_DECIMAL_DIGITS;
+
+ final long clearFactor = powerOfTenTable[adjustedScaleDown];
+
+ result0 = fast0;
+ result1 = fast1 % clearFactor;
+ result2 = 0;
+
+ } else {
+
+ // Throw away middle and lowest words.
+
+ final int adjustedScaleDown = fastScale - 2*LONGWORD_DECIMAL_DIGITS;
+
+ final long clearFactor = powerOfTenTable[adjustedScaleDown];
+
+ result0 = fast0;
+ result1 = fast1;
+ result2 = fast2 % clearFactor;
+
+ }
+ if (result0 == 0 && result1 == 0 && result2 == 0) {
+ fastResult.fastReset();
+ } else {
+ fastResult.fastSet(fastSignum, result0, result1, result2, /* fastIntegerDigitCount */ 0, fastScale);
+ }
+ }
+
+ /**
+ * Creates fast decimal from the integer portion.
+ *
+ * NOTE: The fastFractionPortion method requires the caller to pass a fastResult
+ * parameter has been reset for better performance.
+ */
+ public static void fastIntegerPortion(
+ int fastSignum, long fast0, long fast1, long fast2,
+ int fastIntegerDigitCount, int fastScale,
+ FastHiveDecimal fastResult) {
+
+ if (fastSignum == 0) {
+ fastResult.fastReset();
+ return;
+ }
+ if (fastScale == 0) {
+ fastResult.fastSet(fastSignum, fast0, fast1, fast2, fastIntegerDigitCount, fastScale);
+ }
+
+ // Scale down no rounding to clear fraction.
+ fastResult.fastSignum = fastSignum;
+ doFastScaleDown(
+ fast0, fast1, fast2,
+ fastScale,
+ fastResult);
+ fastResult.fastIntegerDigitCount = fastIntegerDigitCount;
+ fastResult.fastScale = 0;
+ }
+
+ //************************************************************************************************
+ // Binary to Decimal Conversion.
+
+ /**
+ * Convert 3 binary words of N bits each to a fast decimal (scale 0).
+ *
+ * The 3 binary words highWord, middleWord, and lowerWord form a large binary value:
+ *
+ * highWord * 2^(M+L) + middleWord * 2^L + lowerWord.
+ *
+ * Where L is the number of bits in the lower word; M is the number of bits in the middle word.
+ * We let L and M be different to support the SerializationUtil serialization where the lower
+ * word is 62 bits and the remaining words are 63 bits...
+ *
+ * The fast decimal middleWordMultiplier is 2^L.
+ * The fast decimal highWordMultiplier is 2^(M+L).
+ *
+ * @return True if the conversion of the 3 binary words to decimal was successful.
+ */
+ public static boolean doBinaryToDecimalConversion(
+ long lowerWord, long middleWord, long highWord,
+ FastHiveDecimal middleWordMultiplier,
+ FastHiveDecimal highWordMultiplier,
+ FastHiveDecimal fastResult) {
+
+ /*
+ * Challenge: How to do the math to get this raw binary back to our decimal form.
+ *
+ * Briefly, for the middle and upper binary words, convert the middle/upper word into a decimal
+ * long words and then multiply those by the binary word's power of 2.
+ *
+ * And, add the multiply results into the result decimal longwords.
+ *
+ */
+ long result0 =
+ lowerWord % MULTIPLER_LONGWORD_DECIMAL;
+ long result1 =
+ lowerWord / MULTIPLER_LONGWORD_DECIMAL;
+ long result2 = 0;
+
+ if (middleWord != 0 || highWord != 0) {
+
+ if (highWord == 0) {
+
+ // Form result from lower and middle words.
+
+ if (!fastMultiply5x5HalfWords(
+ middleWord % MULTIPLER_LONGWORD_DECIMAL,
+ middleWord / MULTIPLER_LONGWORD_DECIMAL,
+ 0,
+ middleWordMultiplier.fast0, middleWordMultiplier.fast1, middleWordMultiplier.fast2,
+ fastResult)) {
+ return false;
+ }
+
+ final long calc0 =
+ result0
+ + fastResult.fast0;
+ result0 =
+ calc0 % MULTIPLER_LONGWORD_DECIMAL;
+ final long calc1 =
+ calc0 / MULTIPLER_LONGWORD_DECIMAL
+ + result1
+ + fastResult.fast1;
+ result1 =
+ calc1 % MULTIPLER_LONGWORD_DECIMAL;
+ result2 =
+ calc1 / MULTIPLER_LONGWORD_DECIMAL
+ + fastResult.fast2;
+
+ } else if (middleWord == 0) {
+
+ // Form result from lower and high words.
+
+ if (!fastMultiply5x5HalfWords(
+ highWord % MULTIPLER_LONGWORD_DECIMAL,
+ highWord / MULTIPLER_LONGWORD_DECIMAL,
+ 0,
+ highWordMultiplier.fast0, highWordMultiplier.fast1, highWordMultiplier.fast2,
+ fastResult)) {
+ return false;
+ }
+
+ final long calc0 =
+ result0
+ + fastResult.fast0;
+ result0 =
+ calc0 % MULTIPLER_LONGWORD_DECIMAL;
+ final long calc1 =
+ calc0 / MULTIPLER_LONGWORD_DECIMAL
+ + result1
+ + fastResult.fast1;
+ result1 =
+ calc1 % MULTIPLER_LONGWORD_DECIMAL;
+ result2 =
+ calc1 / MULTIPLER_LONGWORD_DECIMAL
+ + fastResult.fast2;
+
+ } else {
+
+ // Form result from lower, middle, and middle words.
+
+ if (!fastMultiply5x5HalfWords(
+ middleWord % MULTIPLER_LONGWORD_DECIMAL,
+ middleWord / MULTIPLER_LONGWORD_DECIMAL,
+ 0,
+ middleWordMultiplier.fast0, middleWordMultiplier.fast1, middleWordMultiplier.fast2,
+ fastResult)) {
+ return false;
+ }
+
+ long middleResult0 = fastResult.fast0;
+ long middleResult1 = fastResult.fast1;
+ long middleResult2 = fastResult.fast2;
+
+ if (!fastMultiply5x5HalfWords(
+ highWord % MULTIPLER_LONGWORD_DECIMAL,
+ highWord / MULTIPLER_LONGWORD_DECIMAL,
+ 0,
+ highWordMultiplier.fast0, highWordMultiplier.fast1, highWordMultiplier.fast2,
+ fastResult)) {
+ return false;
+ }
+
+ long calc0 =
+ result0
+ + middleResult0
+ + fastResult.fast0;
+ result0 =
+ calc0 % MULTIPLER_LONGWORD_DECIMAL;
+ long calc1 =
+ calc0 / MULTIPLER_LONGWORD_DECIMAL
+ + result1
+ + middleResult1
+ + fastResult.fast1;
+ result1 =
+ calc1 % MULTIPLER_LONGWORD_DECIMAL;
+ result2 =
+ calc1 / MULTIPLER_LONGWORD_DECIMAL
+ + middleResult2
+ + fastResult.fast2;
+ }
+ }
+
+ // Let caller set negative sign if necessary.
+ if (result2 != 0) {
+ fastResult.fastIntegerDigitCount = TWO_X_LONGWORD_DECIMAL_DIGITS + fastHighWordPrecision(result2);
+ fastResult.fastSignum = 1;
+ } else if (result1 != 0) {
+ fastResult.fastIntegerDigitCount = LONGWORD_DECIMAL_DIGITS + fastHighWordPrecision(result1);
+ fastResult.fastSignum = 1;
+ } else if (result0 != 0) {
+ fastResult.fastIntegerDigitCount = fastHighWordPrecision(result0);
+ fastResult.fastSignum = 1;
+ } else {
+ fastResult.fastIntegerDigitCount = 0;
+ fastResult.fastSignum = 0;
+ }
+
+ fastResult.fast0 = result0;
+ fastResult.fast1 = result1;
+ fastResult.fast2 = result2;
+
+ return true;
+ }
+
+ //************************************************************************************************
+ // Decimal to Binary Conversion.
+
+ /**
+ * A helper method that produces a single binary word remainder from a fast decimal (and
+ * quotient).
+ *
+ * The fast decimal is longwords of 16 digits each and we need binary words of 2^N. Since
+ * we are in decimal form, we have do work to get to convert to binary form.
+ *
+ * We effectively need to produce on big binary value (i.e. greater than 64 bits since
+ * HiveDecimal needs 128 bits of binary which Java does not provide primitive support for)
+ * from the decimal long words and get the lower N binary bit remainder.
+ *
+ * We could try and do decimal division by 2^N to get the (integer) quotient, multiply the
+ * quotient by 2^N decimal, and finally do a decimal subtract that from the original decimal.
+ * The resulting decimal can be used to easily get the binary remainder.
+ *
+ * However, currently, we do not have fast decimal division.
+ *
+ * The "trick" we do here is to remember from your Algebra in school than multiplication and
+ * division are inverses of each other.
+ *
+ * So instead of doing decimal division by 2^N we multiply by the inverse: 2^-N.
+ *
+ * We produce 1 binary word (remainder) and a decimal quotient for the higher portion.
+ *
+ * So, the parameters are:
+ *
+ * The input decimal (dividendFast0, dividendFast1, and dividendFast2) that will produce a
+ * single binary word remainder and decimal quotient.
+ *
+ * The fast decimal inverse of 2^N = 2^-N (fastInverseConst).
+ *
+ * Where in the inverse multiplication result (quotientIntegerWordNum and
+ * quotientIntegerDigitNum) to find the quotient integer decimal portion.
+ *
+ * The fast decimal multiplier for converting the quotient integer to the larger number to
+ * subtract from the input decimal to get the remainder.
+ *
+ * And, the scratch longs where to store the result remainder word (index 3) and result quotient
+ * decimal longwords (indices 0 .. 2).
+ *
+ * @return True if the results were produced without overflow.
+ */
+ public static boolean doDecimalToBinaryDivisionRemainder(
+ long dividendFast0, long dividendFast1, long dividendFast2,
+ FastHiveDecimal fastInverseConst,
+ int quotientIntegerWordNum,
+ int quotientIntegerDigitNum,
+ FastHiveDecimal fastMultiplierConst,
+ long[] scratchLongs) {
+
+ // Multiply by inverse (2^-N) to do the 2^N division.
+ if (!fastMultiply5x6HalfWords(
+ dividendFast0, dividendFast1, dividendFast2,
+ fastInverseConst.fast0, fastInverseConst.fast1, fastInverseConst.fast2,
+ scratchLongs)) {
+ // Overflow.
+ return false;
+ }
+
+ final long divideFactor = powerOfTenTable[quotientIntegerDigitNum];
+ final long multiplyFactor = powerOfTenTable[LONGWORD_DECIMAL_DIGITS - quotientIntegerDigitNum];
+
+ // Extract the integer portion to get the quotient.
+ long quotientFast0 =
+ scratchLongs[quotientIntegerWordNum] / divideFactor
+ + ((scratchLongs[quotientIntegerWordNum + 1] % divideFactor) * multiplyFactor);
+ long quotientFast1 =
+ scratchLongs[quotientIntegerWordNum + 1] / divideFactor
+ + ((scratchLongs[quotientIntegerWordNum + 2] % divideFactor) * multiplyFactor);
+ long quotientFast2 =
+ scratchLongs[quotientIntegerWordNum + 2] / divideFactor;
+
+ // Multiply the integer quotient back out so we can subtract it from the original to get
+ // the remainder.
+ if (!fastMultiply5x6HalfWords(
+ quotientFast0, quotientFast1, quotientFast2,
+ fastMultiplierConst.fast0, fastMultiplierConst.fast1, fastMultiplierConst.fast2,
+ scratchLongs)) {
+ return false;
+ }
+
+ long quotientMultiplied0 = scratchLongs[0];
+ long quotientMultiplied1 = scratchLongs[1];
+ long quotientMultiplied2 = scratchLongs[2];
+
+ if (!doSubtractSameScaleNoUnderflow(
+ dividendFast0, dividendFast1, dividendFast2,
+ quotientMultiplied0, quotientMultiplied1, quotientMultiplied2,
+ scratchLongs)) {
+ // Underflow.
+ return false;
+ }
+
+ long remainderBinaryWord =
+ scratchLongs[1] * MULTIPLER_LONGWORD_DECIMAL
+ + scratchLongs[0];
+
+ // Pack the output into the scratch longs.
+ scratchLongs[0] = quotientFast0;
+ scratchLongs[1] = quotientFast1;
+ scratchLongs[2] = quotientFast2;
+
+ scratchLongs[3] = remainderBinaryWord;
+
+ return true;
+ }
+
+ /**
+ * Convert a fast decimal into 3 binary words of N bits each.
+ *
+ * The 3 binary words will form a large binary value that is the unsigned unscaled decimal value:
+ *
+ * highWord * 2^(M+L) + middleWord * 2^L + lowerWord.
+ *
+ * Where L is the number of bits in the lower word; M is the number of bits in the middle word.
+ * We let L and M be different to support the SerializationUtil serialization where the lower
+ * word is 62 bits and the remaining words are 63 bits...
+ *
+ * The fast decimal is longwords of 16 digits each and we need binary words of 2^N. Since
+ * we are in decimal form, we have do work to get to convert to binary form.
+ *
+ * See the comments for doDecimalToBinaryDivisionRemainder for details on the parameters.
+ *
+ * The lowerWord is produced by calling doDecimalToBinaryDivisionRemainder. The quotient from
+ * that is passed to doDecimalToBinaryDivisionRemainder to produce the middleWord. The final
+ * quotient is used to produce the highWord.
+ *
+ * @return True if the 3 binary words were produced without overflow. Overflow is not expected.
+ */
+ private static boolean doDecimalToBinaryConversion(
+ long fast0, long fast1, long fast2,
+ FastHiveDecimal fastInverseConst,
+ int quotientIntegerWordNum,
+ int quotientIntegerDigitNum,
+ FastHiveDecimal fastMultiplierConst,
+ long[] scratchLongs) {
+
+ long lowerBinaryWord;
+ long middleBinaryWord = 0;
+ long highBinaryWord = 0;
+
+ if (fastCompareTo(
+ 1,
+ fast0, fast1, fast2, 0,
+ 1,
+ fastMultiplierConst.fast0, fastMultiplierConst.fast1, fastMultiplierConst.fast2, 0) < 0) {
+
+ // Optimize: whole decimal fits in one binary word.
+
+ lowerBinaryWord =
+ fast1 * MULTIPLER_LONGWORD_DECIMAL
+ + fast0;
+
+ } else {
+
+ // Do division/remainder to get lower binary word; quotient will either be middle decimal
+ // or be both high and middle decimal that requires another division/remainder.
+
+ if (!doDecimalToBinaryDivisionRemainder(
+ fast0, fast1, fast2,
+ fastInverseConst,
+ quotientIntegerWordNum,
+ quotientIntegerDigitNum,
+ fastMultiplierConst,
+ scratchLongs)) {
+ // Overflow.
+ return false;
+ }
+
+ // Unpack the output.
+ long quotientFast0 = scratchLongs[0];
+ long quotientFast1 = scratchLongs[1];
+ long quotientFast2 = scratchLongs[2];
+
+ lowerBinaryWord = scratchLongs[3];
+
+ if (fastCompareTo(
+ 1,
+ quotientFast0, quotientFast1, quotientFast2, 0,
+ 1,
+ fastMultiplierConst.fast0, fastMultiplierConst.fast1, fastMultiplierConst.fast2, 0) < 0) {
+
+ // Optimize: whole decimal fits in two binary words.
+
+ middleBinaryWord =
+ quotientFast1 * MULTIPLER_LONGWORD_DECIMAL
+ + quotientFast0;
+
+ } else {
+ if (!doDecimalToBinaryDivisionRemainder(
+ quotientFast0, quotientFast1, quotientFast2,
+ fastInverseConst,
+ quotientIntegerWordNum,
+ quotientIntegerDigitNum,
+ fastMultiplierConst,
+ scratchLongs)) {
+ // Overflow.
+ return false;
+ }
+
+ highBinaryWord =
+ scratchLongs[1] * MULTIPLER_LONGWORD_DECIMAL
+ + scratchLongs[0];
+
+ middleBinaryWord = scratchLongs[3];
+
+ }
+ }
+
+ scratchLongs[0] = lowerBinaryWord;
+ scratchLongs[1] = middleBinaryWord;
+ scratchLongs[2] = highBinaryWord;
+
+ return true;
+ }
+
+ //************************************************************************************************
+ // Emulate SerializationUtils Deserialization used by ORC.
+
+ /*
+ * fastSerializationUtilsRead lower word is 62 bits (the lower bit is used as the sign and is
+ * removed). So, we need a multiplier 2^62
+ *
+ * 2^62 =
+ * 4611686018427387904 or
+ * 4,611,686,018,427,387,904 or
+ * 461,1686018427387904 (16 digit comma'd)
+ */
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_62 =
+ new FastHiveDecimal(1, 1686018427387904L, 461L, 0, 19, 0);
+
+ /*
+ * fastSerializationUtilsRead middle word is 63 bits. So, we need a multiplier 2^63
+ *
+ * 2^63 =
+ * 9223372036854775808 (Long.MAX_VALUE) or
+ * 9,223,372,036,854,775,808 or
+ * 922,3372036854775808 (16 digit comma'd)
+ */
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63 =
+ new FastHiveDecimal(1, 3372036854775808L, 922L, 0, 19, 0);
+
+ /*
+ * fastSerializationUtilsRead high word multiplier:
+ *
+ * Multiply by 2^(62 + 63) -- 38 digits or 3 decimal words.
+ *
+ * (2^62)*(2^63) =
+ * 42535295865117307932921825928971026432 or
+ * (12345678901234567890123456789012345678)
+ * ( 1 2 3 )
+ * 42,535,295,865,117,307,932,921,825,928,971,026,432 or
+ * 425352,9586511730793292,1825928971026432 (16 digit comma'd)
+ */
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_125 =
+ new FastHiveDecimal(1, 1825928971026432L, 9586511730793292L, 425352L, 38, 0);
+
+ /*
+ * Inverse of 2^63 = 2^-63. Please see comments for doDecimalToBinaryDivisionRemainder.
+ *
+ * Multiply by 1/2^63 = 1.08420217248550443400745280086994171142578125e-19 to divide by 2^63.
+ * As 16 digit comma'd 1084202172485,5044340074528008,6994171142578125
+ *
+ * Scale down: 63 = 44 fraction digits + 19 (negative exponent or number of zeros after dot).
+ *
+ * 3*16 (48) + 15 --> 63 down shift.
+ */
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_63_INVERSE =
+ new FastHiveDecimal(1, 6994171142578125L, 5044340074528008L, 1084202172485L, 45, 0);
+
+ /*
+ * Where in the inverse multiplication result to find the quotient integer decimal portion.
+ *
+ * Please see comments for doDecimalToBinaryDivisionRemainder.
+ */
+ private static final int SERIALIZATION_UTILS_WRITE_QUOTIENT_INTEGER_WORD_NUM = 3;
+ private static final int SERIALIZATION_UTILS_WRITE_QUOTIENT_INTEGER_DIGIT_NUM = 15;
+
+ /**
+ * Deserialize data written in the format used by the SerializationUtils methods
+ * readBigInteger/writeBigInteger and create a decimal using the supplied scale.
+ *
+ * ORC uses those SerializationUtils methods for its serialization.
+ *
+ * A scratch bytes array is necessary to do the binary to decimal conversion for better
+ * performance. Pass a FAST_SCRATCH_BUFFER_LEN_SERIALIZATION_UTILS_READ byte array for
+ * scratchBytes.
+ *
+ * @return The deserialized decimal or null if the conversion failed.
+ */
+ public static boolean fastSerializationUtilsRead(InputStream inputStream, int scale,
+ byte[] scratchBytes,
+ FastHiveDecimal fastResult) throws IOException, EOFException {
+
+ // Following a suggestion from Gopal, quickly read in the bytes from the stream.
+ // CONSIDER: Have ORC read the whole input stream into a big byte array with one call to
+ // the read(byte[] b, int off, int len) method and then let this method read from the big
+ // byte array.
+ int readCount = 0;
+ int input;
+ do {
+ input = inputStream.read();
+ if (input == -1) {
+ throw new EOFException("Reading BigInteger past EOF from " + inputStream);
+ }
+ scratchBytes[readCount++] = (byte) input;
+ } while (input >= 0x80);
+
+ /*
+ * Determine the 3 binary words like what SerializationUtils.readBigInteger does.
+ */
+
+ long lowerWord63 = 0;
+ long middleWord63 = 0;
+ long highWord63 = 0;
+
+ long work = 0;
+ int offset = 0;
+ int readIndex = 0;
+ long b;
+ do {
+ b = scratchBytes[readIndex++];
+ work |= (0x7f & b) << (offset % 63);
+ offset += 7;
+ // if we've read 63 bits, roll them into the result
+ if (offset == 63) {
+ lowerWord63 = work;
+ work = 0;
+ } else if (offset % 63 == 0) {
+ if (offset == 126) {
+ middleWord63 = work;
+ } else if (offset == 189) {
+ highWord63 = work;
+ } else {
+ throw new EOFException("Reading more than 3 words of BigInteger");
+ }
+ work = 0;
+ }
+ } while (readIndex < readCount);
+
+ if (work != 0) {
+ if (offset < 63) {
+ lowerWord63 = work;
+ } else if (offset < 126) {
+ middleWord63 = work;
+ } else if (offset < 189) {
+ highWord63 =work;
+ } else {
+ throw new EOFException("Reading more than 3 words of BigInteger");
+ }
+ }
+
+ // Grab sign bit and shift it away.
+ boolean isNegative = ((lowerWord63 & 0x1) != 0);
+ lowerWord63 >>= 1;
+
+ /*
+ * Use common binary to decimal conversion method we share with fastSetFromBigIntegerBytes.
+ */
+ if (!doBinaryToDecimalConversion(
+ lowerWord63, middleWord63, highWord63,
+ FAST_HIVE_DECIMAL_TWO_POWER_62,
+ FAST_HIVE_DECIMAL_TWO_POWER_125, // 2^(62 + 63)
+ fastResult)) {
+ return false;
+ }
+
+ if (isNegative) {
+
+ // Adjust negative result, again doing what SerializationUtils.readBigInteger does.
+ if (!doAddSameScaleSameSign(
+ /* resultSignum */ 1,
+ fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ 1, 0, 0,
+ fastResult)) {
+ return false;
+ }
+ }
+
+ if (fastResult.fast0 == 0 && fastResult.fast1 == 0 && fastResult.fast2 == 0) {
+ fastResult.fastSignum = 0;
+ } else {
+ fastResult.fastSignum = (isNegative ? -1 : 1);
+ final int rawPrecision = fastRawPrecision(fastResult);
+ fastResult.fastIntegerDigitCount = Math.max(0, rawPrecision - scale);
+ fastResult.fastScale = scale;
+
+ /*
+ * Just in case we deserialize a decimal with trailing zeroes...
+ */
+ final int resultTrailingZeroCount =
+ fastTrailingDecimalZeroCount(
+ fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ fastResult.fastIntegerDigitCount, fastResult.fastScale);
+ if (resultTrailingZeroCount > 0) {
+ doFastScaleDown(
+ fastResult,
+ resultTrailingZeroCount,
+ fastResult);
+
+ fastResult.fastScale -= resultTrailingZeroCount;
+ }
+ }
+
+ return true;
+ }
+
+ //************************************************************************************************
+ // Emulate SerializationUtils Serialization used by ORC.
+
+ /**
+ * Write the value of this decimal just like SerializationUtils.writeBigInteger. It header
+ * comments are:
+ *
+ * Write the arbitrarily sized signed BigInteger in vint format.
+ *
+ * Signed integers are encoded using the low bit as the sign bit using zigzag
+ * encoding.
+ *
+ * Each byte uses the low 7 bits for data and the high bit for stop/continue.
+ *
+ * Bytes are stored LSB first.
+ *
+ * NOTE:
+ * SerializationUtils.writeBigInteger sometimes pads the result with extra zeroes due to
+ * BigInteger.bitLength -- we do not emulate that. SerializationUtils.readBigInteger will
+ * produce the same result for both.
+ *
+ * @return True if the decimal was successfully serialized into the output stream.
+ */
+ public static boolean fastSerializationUtilsWrite(OutputStream outputStream,
+ int fastSignum, long fast0, long fast1, long fast2,
+ int fastIntegerDigitCount, int fastScale,
+ long[] scratchLongs)
+ throws IOException {
+
+ boolean isNegative = (fastSignum == -1);
+
+ /*
+ * The sign is encoded as the least significant bit.
+ *
+ * We need to adjust our decimal before conversion to binary.
+ *
+ * Positive:
+ * Multiply by 2.
+ *
+ * Negative:
+ * Logic in SerializationUtils.writeBigInteger does a negate on the BigInteger. We
+ * do not have to since FastHiveDecimal stores the numbers unsigned in fast0, fast1,
+ * and fast2. We do need to subtract one though.
+ *
+ * And then multiply by 2 and add in the 1 sign bit.
+ *
+ * CONSIDER: This could be combined.
+ */
+ long adjust0;
+ long adjust1;
+ long adjust2;
+
+ if (isNegative) {
+
+ // Subtract 1.
+ long r0 = fast0 - 1;
+ long r1;
+ if (r0 < 0) {
+ adjust0 = r0 + MULTIPLER_LONGWORD_DECIMAL;
+ r1 = fast1 - 1;
+ } else {
+ adjust0 = r0;
+ r1 = fast1;
+ }
+ if (r1 < 0) {
+ adjust1 = r1 + MULTIPLER_LONGWORD_DECIMAL;
+ adjust2 = fast2 - 1;
+ } else {
+ adjust1 = r1;
+ adjust2 = fast2;
+ }
+ if (adjust2 < 0) {
+ return false;
+ }
+
+ // Now multiply by 2 and add 1 sign bit.
+ r0 = adjust0 * 2 + 1;
+ adjust0 =
+ r0 % MULTIPLER_LONGWORD_DECIMAL;
+ r1 =
+ adjust1 * 2
+ + r0 / MULTIPLER_LONGWORD_DECIMAL;
+ adjust1 =
+ r1 % MULTIPLER_LONGWORD_DECIMAL;
+ adjust2 =
+ adjust2 * 2
+ + r1 / MULTIPLER_LONGWORD_DECIMAL;
+
+ } else {
+
+ // Multiply by 2 to make room for 0 sign bit.
+ long r0 = fast0 * 2;
+ adjust0 =
+ r0 % MULTIPLER_LONGWORD_DECIMAL;
+ final long r1 =
+ fast1 * 2
+ + r0 / MULTIPLER_LONGWORD_DECIMAL;
+ adjust1 =
+ r1 % MULTIPLER_LONGWORD_DECIMAL;
+ adjust2 =
+ fast2 * 2
+ + r1 / MULTIPLER_LONGWORD_DECIMAL;
+
+ }
+
+ /*
+ * Use common decimal to binary conversion method we share with fastBigIntegerBytes.
+ */
+ if (!doDecimalToBinaryConversion(
+ adjust0, adjust1, adjust2,
+ FAST_HIVE_DECIMAL_TWO_POWER_63_INVERSE,
+ SERIALIZATION_UTILS_WRITE_QUOTIENT_INTEGER_WORD_NUM,
+ SERIALIZATION_UTILS_WRITE_QUOTIENT_INTEGER_DIGIT_NUM,
+ FAST_HIVE_DECIMAL_TWO_POWER_63,
+ scratchLongs)) {
+ // Overflow.
+ return false;
+ }
+
+ long lowerWord63 = scratchLongs[0];
+ long middleWord63 = scratchLongs[1];
+ long highWord63 = scratchLongs[2];
+
+ int wordCount;
+ if (highWord63 != 0) {
+ wordCount = 3;
+ } else if (middleWord63 != 0) {
+ wordCount = 2;
+ } else {
+ wordCount = 1;
+ }
+
+ // Write out the first 63 bits worth of data.
+ long lowBits = lowerWord63;
+ for(int i=0; i < 9; ++i) {
+ // If this is the last byte, leave the high bit off
+ if (wordCount == 1 && (lowBits & ~0x7f) == 0) {
+ outputStream.write((byte) lowBits);
+ return true;
+ } else {
+ outputStream.write((byte) (0x80 | (lowBits & 0x7f)));
+ lowBits >>>= 7;
+ }
+ }
+ if (wordCount <= 1) {
+ throw new RuntimeException("Expecting write word count > 1");
+ }
+
+ lowBits = middleWord63;
+ for(int i=0; i < 9; ++i) {
+ // If this is the last byte, leave the high bit off
+ if (wordCount == 2 && (lowBits & ~0x7f) == 0) {
+ outputStream.write((byte) lowBits);
+ return true;
+ } else {
+ outputStream.write((byte) (0x80 | (lowBits & 0x7f)));
+ lowBits >>>= 7;
+ }
+ }
+
+ lowBits = highWord63;
+ for(int i=0; i < 9; ++i) {
+ // If this is the last byte, leave the high bit off
+ if ((lowBits & ~0x7f) == 0) {
+ outputStream.write((byte) lowBits);
+ return true;
+ } else {
+ outputStream.write((byte) (0x80 | (lowBits & 0x7f)));
+ lowBits >>>= 7;
+ }
+ }
+
+ // Should not get here.
+ throw new RuntimeException("Unexpected");
+ }
+
+ //************************************************************************************************
+ // Emulate BigInteger deserialization used by LazyBinary and others.
+
+ /*
+ * fastSetFromBigIntegerBytes word size we choose is 56 bits to stay below the 64 bit sign bit:
+ * So, we need a multiplier 2^56
+ *
+ * 2^56 =
+ * 72057594037927936 or
+ * 72,057,594,037,927,936 or
+ * 7,2057594037927936 (16 digit comma'd)
+ */
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_56 =
+ new FastHiveDecimal(1, 2057594037927936L, 7L, 0, 17, 0);
+
+ /*
+ * fastSetFromBigIntegerBytes high word multiplier is 2^(56 + 56)
+ *
+ * (2^56)*(2^56) =
+ * 5192296858534827628530496329220096 or
+ * (1234567890123456789012345678901234)
+ * ( 1 2 3 )
+ * 5,192,296,858,534,827,628,530,496,329,220,096 or
+ * 51,9229685853482762,8530496329220096 (16 digit comma'd)
+ */
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_112 =
+ new FastHiveDecimal(1, 8530496329220096L, 9229685853482762L, 51L, 34, 0);
+
+ // Multiply by 1/2^56 or 1.387778780781445675529539585113525390625e-17 to divide by 2^56.
+ // As 16 digit comma'd 13877787,8078144567552953,9585113525390625
+ //
+ // Scale down: 56 = 39 fraction digits + 17 (negative exponent or number of zeros after dot).
+ //
+ // 3*16 (48) + 8 --> 56 down shift.
+ //
+ private static FastHiveDecimal FAST_HIVE_DECIMAL_TWO_POWER_56_INVERSE =
+ new FastHiveDecimal(1, 9585113525390625L, 8078144567552953L, 13877787L, 40, 0);
+
+ /*
+ * Where in the inverse multiplication result to find the quotient integer decimal portion.
+ *
+ * Please see comments for doDecimalToBinaryDivisionRemainder.
+ */
+ private static final int BIG_INTEGER_BYTES_QUOTIENT_INTEGER_WORD_NUM = 3;
+ private static final int BIG_INTEGER_BYTES_QUOTIENT_INTEGER_DIGIT_NUM = 8;
+
+ private static int INITIAL_SHIFT = 48; // 56 bits minus 1 byte.
+
+ // Long masks and values.
+ private static long LONG_56_BIT_MASK = 0xFFFFFFFFFFFFFFL;
+ private static long LONG_TWO_TO_56_POWER = LONG_56_BIT_MASK + 1L;
+ private static long LONG_BYTE_MASK = 0xFFL;
+ private static long LONG_BYTE_HIGH_BIT_MASK = 0x80L;
+
+ // Byte values.
+ private static byte BYTE_ALL_BITS = (byte) 0xFF;
+
+ /**
+ * Convert bytes in the format used by BigInteger's toByteArray format (and accepted by its
+ * constructor) into a decimal using the specified scale.
+ *
+ * Our bigIntegerBytes methods create bytes in this format, too.
+ *
+ * This method is designed for high performance and does not create an actual BigInteger during
+ * binary to decimal conversion.
+ *
+ * @return
+ */
+ public static boolean fastSetFromBigIntegerBytesAndScale(
+ byte[] bytes, int offset, int length, int scale,
+ FastHiveDecimal fastResult) {
+
+ final int bytesLength = bytes.length;
+
+ if (offset < 0 || offset >= bytesLength) {
+ return false;
+ }
+ final int end = offset + length;
+ if (end <= offset || end > bytesLength) {
+ return false;
+ }
+
+ final int startOffset = offset;
+
+ // Roughly based on BigInteger code.
+
+ boolean isNegative = (bytes[offset] < 0);
+ if (isNegative) {
+
+ // Find first non-sign (0xff) byte of input.
+ while (offset < end) {
+ if (bytes[offset] != -1) {
+ break;
+ }
+ offset++;
+ }
+ if (offset > end) {
+ return false;
+ }
+ } else {
+
+ // Strip leading zeroes -- although there shouldn't be any for a decimal.
+
+ while (offset < end && bytes[offset] == 0) {
+ offset++;
+ }
+ if (offset >= end) {
+ // Zero.
+ return true;
+ }
+ }
+
+ long lowerWord56 = 0;
+ long middleWord56 = 0;
+ long highWord56 = 0;
+
+ int reverseIndex = end;
+
+ long work;
+ int shift;
+
+ final int lowestCount = Math.min(reverseIndex - offset, 7);
+ shift = 0;
+ for (int i = 0; i < lowestCount; i++) {
+ work = bytes[--reverseIndex] & 0xFF;
+ lowerWord56 |= work << shift;
+ shift += 8;
+ }
+
+ if (reverseIndex <= offset) {
+ if (isNegative) {
+ lowerWord56 = ~lowerWord56 & ((1L << shift) - 1);
+ }
+ } else {
+
+ // Go on to middle word.
+
+ final int middleCount = Math.min(reverseIndex - offset, 7);
+ shift = 0;
+ for (int i = 0; i < middleCount; i++) {
+ work = bytes[--reverseIndex] & 0xFF;
+ middleWord56 |= work << shift;
+ shift += 8;
+ }
+ if (reverseIndex <= offset) {
+ if (isNegative) {
+ lowerWord56 = ~lowerWord56 & LONG_56_BIT_MASK;
+ middleWord56 = ~middleWord56 & ((1L << shift) - 1);
+ }
+ } else {
+
+ // Go on to high word.
+
+ final int highCount = Math.min(reverseIndex - offset, 7);
+ shift = 0;
+ for (int i = 0; i < highCount; i++) {
+ work = bytes[--reverseIndex] & 0xFF;
+ highWord56 |= work << shift;
+ shift += 8;
+ }
+ if (isNegative) {
+ // We only need to apply negation to all 3 words when there are 3 words, etc.
+ lowerWord56 = ~lowerWord56 & LONG_56_BIT_MASK;
+ middleWord56 = ~middleWord56 & LONG_56_BIT_MASK;
+ highWord56 = ~highWord56 & ((1L << shift) - 1);
+ }
+ }
+ }
+
+ if (!doBinaryToDecimalConversion(
+ lowerWord56, middleWord56, highWord56,
+ FAST_HIVE_DECIMAL_TWO_POWER_56,
+ FAST_HIVE_DECIMAL_TWO_POWER_112, // 2^(56 + 56)
+ fastResult)) {
+ // Overflow. Use slower alternate.
+ return doAlternateSetFromBigIntegerBytesAndScale(
+ bytes, startOffset, length, scale,
+ fastResult);
+ }
+
+ // System.out.println("fastSetFromBigIntegerBytesAndScale fast0 " + fastResult.fast0 + " fast1 " + fastResult.fast1 + " fast2 " + fastResult.fast2);
+ if (isNegative) {
+ if (!doAddSameScaleSameSign(
+ /* resultSignum */ 1,
+ fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ 1, 0, 0,
+ fastResult)) {
+ // Overflow. Use slower alternate.
+ return doAlternateSetFromBigIntegerBytesAndScale(
+ bytes, startOffset, length, scale,
+ fastResult);
+ }
+ }
+
+ if (fastResult.fast0 == 0 && fastResult.fast1 == 0 && fastResult.fast2 == 0) {
+ fastResult.fastSignum = 0;
+ } else {
+ fastResult.fastSignum = (isNegative ? -1 : 1);
+ fastResult.fastScale = scale;
+ final int rawPrecision = fastRawPrecision(fastResult);
+ fastResult.fastIntegerDigitCount = Math.max(0, rawPrecision - scale);
+
+ /*
+ * Just in case we deserialize a decimal with trailing zeroes...
+ */
+ final int resultTrailingZeroCount =
+ fastTrailingDecimalZeroCount(
+ fastResult.fast0, fastResult.fast1, fastResult.fast2,
+ fastResult.fastIntegerDigitCount, fastResult.fastScale);
+ if (resultTrailingZeroCount > 0) {
+ doFastScaleDown(
+ fastResult,
+ resultTrailingZeroCount,
+ fastResult);
+
+ fastResult.fastScale -= resultTrailingZeroCount;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * When fastSetFromBigIntegerBytesAndScale can handle the input because it is too large,
+ * we fall back to this.
+ */
+ private static boolean doAlternateSetFromBigIntegerBytesAndScale(
+ byte[] bytes, int offset, int length, int scale,
+ FastHiveDecimal fastResult) {
+
+ byte[] byteArray = Arrays.copyOfRange(bytes, offset, offset + length);
+
+ BigInteger bigInteger = new BigInteger(byteArray);
+ // System.out.println("doAlternateSetFromBigIntegerBytesAndScale bigInteger " + bigInteger);
+ BigDecimal bigDecimal = new BigDecimal(bigInteger, scale);
+ // System.out.println("doAlternateSetFromBigIntegerBytesAndScale bigDecimal " + bigDecimal);
+ fastResult.fastReset();
+ return fastSetFromBigDecimal(bigDecimal, true, fastResult);
+ }
+
+ //************************************************************************************************
+ // Emulate BigInteger serialization used by LazyBinary, Avro, Parquet, and possibly others.
+
+ public static int fastBigIntegerBytes(
+ final int fastSignum, long fast0, long fast1, long fast2,
+ int fastIntegerDigitCount, int fastScale,
+ int fastSerializeScale,
+ long[] scratchLongs, byte[] buffer) {
+ if (fastSerializeScale != -1) {
+ return
+ fastBigIntegerBytesScaled(
+ fastSignum, fast0, fast1, fast2,
+ fastIntegerDigitCount, fastScale,
+ fastSerializeScale,
+ scratchLongs, buffer);
+ } else {
+ return
+ fastBigIntegerBytesUnscaled(
+ fastSignum, fast0, fast1, fast2,
+ scratchLongs, buffer);
+ }
+ }
+
+ /**
+ * Return binary representation of this decimal's BigInteger equivalent unscaled value using
+ * the format that the BigInteger's toByteArray method returns (and the BigInteger constructor
+ * accepts).
+ *
+ * Used by LazyBinary, Avro, and Parquet serialization.
+ *
+ * Scratch objects necessary to do the decimal to binary conversion without actually creating a
+ * BigInteger object are passed for better performance.
+ *
+ * Allocate scratchLongs with SCRATCH_LONGS_LEN longs.
+ * And, allocate buffer with SCRATCH_BUFFER_LEN_BIG_INTEGER_BYTES bytes.
+ * @return The number of bytes used for the binary result in buffer. Otherwise, 0 if the
+ * conversion failed.
+ */
+ public static int fastBigIntegerBytesUnscaled(
+ final int fastSignum, long fast0, long fast1, long fast2,
+ long[] scratchLongs, byte[] buffer) {
+
+ /*
+ * Algorithm:
+ * 1) Convert decimal to three 56-bit words (three is enough for the decimal since we
+ * represent the decimal with trailing zeroes trimmed).
+ * 2) Skip leading zeroes in the words.
+ * 3) Once we find real data (i.e. a non-zero byte), add a sign byte to buffer if necessary.
+ * 4) Add bytes from the (rest of) 56-bit words.
+ * 5) Return byte count.
+ */
+
+ if (fastSignum == 0) {
+ buffer[0] = 0;
+ return 1;
+ }
+
+ boolean isNegative = (fastSignum == -1);
+
+ /*
+ * Use common conversion method we share with fastSerializationUtilsWrite.
+ */
+ if (!doDecimalToBinaryConversion(
+ fast0, fast1, fast2,
+ FAST_HIVE_DECIMAL_TWO_POWER_56_INVERSE,
+ BIG_INTEGER_BYTES_QUOTIENT_INTEGER_WORD_NUM,
+ BIG_INTEGER_BYTES_QUOTIENT_INTEGER_DIGIT_NUM,
+ FAST_HIVE_DECIMAL_TWO_POWER_56,
+ scratchLongs)) {
+ // Overflow. This is not expected.
+ return 0;
+ }
+
+ int byteIndex = 0;
+
+ long word0 = scratchLongs[0];
+ long word1 = scratchLongs[1];
+ long word2 = scratchLongs[2];
+
+ if (!isNegative) {
+
+ // Positive number.
+
+ long longWork = 0;
+
+ int shift = INITIAL_SHIFT;
+
+ if (word2 != 0L) {
+
+ // Skip leading zeroes in word2.
+
+ while (true) {
+ longWork = (word2 >> shift) & LONG_BYTE_MASK;
+ if (longWork != 0) {
+ break;
+ }
+ if (shift == 0) {
+ throw new RuntimeException("Unexpected #1");
+ }
+ shift -= Byte.SIZE;
+ }
+
+ // Now that we have found real data, emit sign byte if necessary.
+ if ((longWork & LONG_BYTE_HIGH_BIT_MASK) != 0) {
+ // Add sign byte since high bit is on.
+ buffer[byteIndex++] = (byte) 0;
+ }
+
+ // Emit the rest of word2
+ while (true) {
+ buffer[byteIndex++] = (byte) longWork;
+ if (shift == 0) {
+ break;
+ }
+ shift -= Byte.SIZE;
+ longWork = (word2 >> shift) & LONG_BYTE_MASK;
+ }
+
+ shift = INITIAL_SHIFT;
+ }
+
+ if (byteIndex == 0 && word1 == 0L) {
+
+ // Skip word1, also.
+
+ } else {
+
+ if (byteIndex == 0) {
+
+ // Skip leading zeroes in word1.
+
+ while (true) {
+ longWork = (word1 >> shift) & LONG_BYTE_MASK;
+ if (longWork != 0) {
+ break;
+ }
+ if (shift == 0) {
+ throw new RuntimeException("Unexpected #2");
+ }
+ shift -= Byte.SIZE;
+ }
+
+ // Now that we have found real data, emit sign byte if necessary.
+ if ((longWork & LONG_BYTE_HIGH_BIT_MASK) != 0) {
+ // Add sign byte since high bit is on.
+ buffer[byteIndex++] = (byte) 0;
+ }
+
+ } else {
+ longWork = (word1 >> shift) & LONG_BYTE_MASK;
+ }
+
+ // Emit the rest of word1
+
+ while (true) {
+ buffer[byteIndex++] = (byte) longWork;
+ if (shift == 0) {
+ break;
+ }
+ shift -= Byte.SIZE;
+ longWork = (word1 >> shift) & LONG_BYTE_MASK;
+ }
+
+ shift = INITIAL_SHIFT;
+ }
+
+ if (byteIndex == 0) {
+
+ // Skip leading zeroes in word0.
+
+ while (true) {
+ longWork = (word0 >> shift) & LONG_BYTE_MASK;
+ if (longWork != 0) {
+ break;
+ }
+ if (shift == 0) {
+
+ // All zeroes -- we should have handled this earlier.
+ throw new RuntimeException("Unexpected #3");
+ }
+ shift -= Byte.SIZE;
+ }
+
+ // Now that we have found real data, emit sign byte if necessary.
+ if ((longWork & LONG_BYTE_HIGH_BIT_MASK) != 0) {
+ // Add sign byte since high bit is on.
+ buffer[byteIndex++] = (byte) 0;
+ }
+
+ } else {
+ longWork = (word0 >> shift) & LONG_BYTE_MASK;
+ }
+
+ // Emit the rest of word0.
+ while (true) {
+ buffer[byteIndex++] = (byte) longWork;
+ if (shift == 0) {
+ break;
+ }
+ shift -= Byte.SIZE;
+ longWork = (word0 >> shift) & LONG_BYTE_MASK;
+ }
+
+ } else {
+
+ // Negative number.
+
+ // Subtract 1 for two's compliment adjustment.
+ word0--;
+ if (word0 < 0) {
+ word0 += LONG_TWO_TO_56_POWER;
+ word1--;
+ if (word1 < 0) {
+ word1 += LONG_TWO_TO_56_POWER;
+ word2--;
+ if (word2 < 0) {
+ // Underflow.
+ return 0;
+ }
+ }
+ }
+
+ long longWork = 0;
+
+ int shift = INITIAL_SHIFT;
+
+ if (word2 != 0L) {
+
+ // Skip leading zeroes in word2.
+
+ while (true) {
+ longWork = (word2 >> shift) & LONG_BYTE_MASK;
+ if (longWork != 0) {
+ break;
+ }
+ if (shift == 0) {
+ throw new RuntimeException("Unexpected #1");
+ }
+ shift -= Byte.SIZE;
+ }
+
+ // Now that we have found real data, emit sign byte if necessary and do negative fixup.
+
+ longWork = (~longWork & LONG_BYTE_MASK);
+ if (((longWork) & LONG_BYTE_HIGH_BIT_MASK) == 0) {
+ // Add sign byte since high bit is off.
+ buffer[byteIndex++] = BYTE_ALL_BITS;
+ }
+
+ // Invert words.
+ word2 = ~word2;
+ word1 = ~word1;
+ word0 = ~word0;
+
+ // Emit the rest of word2
+ while (true) {
+ buffer[byteIndex++] = (byte) longWork;
+ if (shift == 0) {
+ break;
+ }
+ shift -= Byte.SIZE;
+ longWork = (word2 >> shift) & LONG_BYTE_MASK;
+ }
+
+ shift = INITIAL_SHIFT;
+ }
+
+ if (byteIndex == 0 && word1 == 0L) {
+
+ // Skip word1, also.
+
+ } else {
+
+ if (byteIndex == 0) {
+
+ // Skip leading zeroes in word1.
+
+ while (true) {
+ longWork = (word1 >> shift) & LONG_BYTE_MASK;
+ if (longWork != 0) {
+ break;
+ }
+ if (shift == 0) {
+ throw new RuntimeException("Unexpected #2");
+ }
+ shift -= Byte.SIZE;
+ }
+
+ // Now that we have found real data, emit sign byte if necessary and do negative fixup.
+
+ longWork = (~longWork & LONG_BYTE_MASK);
+ if ((longWork & LONG_BYTE_HIGH_BIT_MASK) == 0) {
+ // Add sign byte since high bit is off.
+ buffer[byteIndex++] = BYTE_ALL_BITS;
+ }
+
+ // Invert words.
+ word1 = ~word1;
+ word0 = ~word0;
+
+ } else {
+ longWork = (word1 >> shift) & LONG_BYTE_MASK;
+ }
+
+ // Emit the rest of word1
+
+ while (true) {
+ buffer[byteIndex++] = (byte) longWork;
+ if (shift == 0) {
+ break;
+ }
+ shift -= Byte.SIZE;
+ longWork = (word1 >> shift) & LONG_BYTE_MASK;
+ }
+
+ shift = INITIAL_SHIFT;
+ }
+
+ if (byteIndex == 0) {
+
+ // Skip leading zeroes in word0.
+
+ while (true) {
+ longWork = (word0 >> shift) & LONG_BYTE_MASK;
+ if (longWork != 0) {
+ break;
+ }
+ if (shift == 0) {
+
+ // All zeroes.
+
+ // -1 special case. Unsigned magnitude 1 - two's compliment adjustment 1 = 0.
+ buffer[0] = BYTE_ALL_BITS;
+ return 1;
+ }
+ shift -= Byte.SIZE;
+ }
+
+ // Now that we have found real data, emit sign byte if necessary and do negative fixup.
+
+ longWork = (~longWork & LONG_BYTE_MASK);
+ if ((longWork & LONG_BYTE_HIGH_BIT_MASK) == 0) {
+ // Add sign byte since high bit is off.
+
<TRUNCATED>