You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2023/07/18 13:46:48 UTC

[arrow] branch main updated: GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 245141e00a GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)
245141e00a is described below

commit 245141e00a6fe2e72da701a72be6e72ad116154f
Author: Jin Shang <sh...@gmail.com>
AuthorDate: Tue Jul 18 21:46:42 2023 +0800

    GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)
    
    
    
    ### Rationale for this change
    
    The current implementation of `Decimal::ToReal` can be naively represented as the following pseudocode:
    ```
    Real v = static_cast<Real>(decimal.as_int128/256())
    return v * (10.0**-scale)
    ```
    It stores the intermediate unscaled int128/256 value as a float/double. The unscaled int128/256 value can be very large when the decimal has a large scale, which causes precision issues such as in #36602.
    
    ### What changes are included in this PR?
    
    Avoid storing the unscaled large int as float if the representation is not precise, by spliting the decimal into integral and fractional parts and dealing with them separately. This algorithm guarantees that:
    1. If the decimal is an integer, the conversion is exact.
    2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g. 8 for float and 16 for double), the conversion is within 1 ULP of the exact value. For example Decimal128::ToReal<float>(9999.999) falls into this category because the integer 9999999 is precisely representable by float, whereas 9999.9999 would be in the next category.
    3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1) (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
    
    Here "exact value" means the closest representable value by Real.
    
    I believe this algorithm is good enough, because an"exact" algorithm would require iterative multiplication and subtraction of decimals to determain the binary representation of its fractional part. Yet the result would still almost always be inaccurate because float/double can only accurately represent powers of two. IMHO It's not worth it to spend that many expensive operations just to improve the result by one ULP.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
     No.
    
    * Closes: #35942
    
    Lead-authored-by: Jin Shang <sh...@gmail.com>
    Co-authored-by: Antoine Pitrou <pi...@free.fr>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/compute/kernels/scalar_cast_test.cc |   3 +-
 cpp/src/arrow/util/basic_decimal.cc               |  10 ++
 cpp/src/arrow/util/basic_decimal.h                |   4 +
 cpp/src/arrow/util/decimal.cc                     |  58 ++++++++-
 cpp/src/arrow/util/decimal_internal.h             |   4 +
 cpp/src/arrow/util/decimal_test.cc                | 148 ++++++++++++++++------
 6 files changed, 183 insertions(+), 44 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 083a85eb34..1db06a7625 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) {
     }
   }
 
-  // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal()
+  // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in
+  // decimal_test.cc
 }
 
 TEST(Cast, DecimalToString) {
diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
index f2fd39d6f3..0835ab9074 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const {
   return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision];
 }
 
+void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole,
+                                          BasicDecimal256* fraction) const {
+  DCHECK_GE(scale, 0);
+  DCHECK_LE(scale, 76);
+
+  BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]);
+  auto s = Divide(multiplier, whole, fraction);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+}
+
 const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) {
   DCHECK_GE(scale, 0);
   DCHECK_LE(scale, 76);
diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
index b263bb234a..d8a91ea76b 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal<BasicDecimal256,
   /// \brief Get the lowest bits of the two's complement representation of the number.
   uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; }
 
+  /// \brief separate the integer and fractional parts for the given scale.
+  void GetWholeAndFraction(int32_t scale, BasicDecimal256* whole,
+                           BasicDecimal256* fraction) const;
+
   /// \brief Scale multiplier for given scale value.
   static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
   /// \brief Half-scale multiplier for given scale value.
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 1f8447059f..704b6bb9d4 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -305,12 +305,39 @@ struct Decimal128RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) {
     Real x = RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high_bits()));
     x += static_cast<Real>(decimal.low_bits());
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal128 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+    if (scale <= 0 || (decimal.high_bits() == 0 &&
+                       decimal.low_bits() <= RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split decimal into whole and fractional parts to avoid precision loss
+    BasicDecimal128 whole_decimal, fraction_decimal;
+    decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);
+
+    Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
+
+    return whole + fraction;
+  }
 };
 
 }  // namespace
@@ -967,7 +994,7 @@ struct Decimal256RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) {
     DCHECK_GE(decimal, 0);
     Real x = 0;
     const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array());
@@ -978,6 +1005,33 @@ struct Decimal256RealConversion
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal256 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+    const auto parts_le = decimal.little_endian_array();
+    if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 &&
+                       parts_le[0] < RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split the decimal into whole and fractional parts to avoid precision loss
+    BasicDecimal256 whole_decimal, fraction_decimal;
+    decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);
+
+    Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
+    return whole + fraction;
+  }
 };
 
 }  // namespace
diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h
index 041aac4ef8..51a7229ab6 100644
--- a/cpp/src/arrow/util/decimal_internal.h
+++ b/cpp/src/arrow/util/decimal_internal.h
@@ -451,6 +451,8 @@ struct RealTraits<float> {
   static constexpr int kMantissaBits = 24;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 8;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
 };
 
 template <>
@@ -464,6 +466,8 @@ struct RealTraits<double> {
   static constexpr int kMantissaBits = 53;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 16;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
 };
 
 template <typename DecimalType>
diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc
index 1401750ce7..6376a9545a 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex
       << "Decimal value: " << decimal_value << " Scale: " << scale;
 }
 
+template <typename Decimal, typename Real>
+void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale,
+                                    Real expected) {
+  Decimal dec(decimal_value);
+  auto result = dec.template ToReal<Real>(scale);
+  ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) ||
+              result == std::nextafter(expected, expected - 1))
+      << "Decimal value: " << decimal_value << " Scale: " << scale;
+}
+
+template <typename Decimal, typename Real>
+void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale,
+                                     Real epsilon, Real expected) {
+  Decimal dec(decimal_value);
+  ASSERT_TRUE(std::abs(dec.template ToReal<Real>(scale) - expected) <= epsilon)
+      << "Decimal value: " << decimal_value << " Scale: " << scale;
+}
+
 template <typename Decimal>
 void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
                               float expected) {
@@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test {
       }
     }
   }
+};
 
-  // Test precision of conversions to float values
-  void TestPrecision() {
-    // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
-    CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
-    CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
-    // 2**64 + 2**41 (exactly representable in a float)
-    CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
-    CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, -1.8446746e+19f);
-  }
+TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
+TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+
+// Custom test for Decimal::ToReal<float>
+template <typename DecimalType>
+class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<DecimalType, float>> {};
+TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes);
 
-  // Test conversions with a range of scales
-  void TestLargeValues(int32_t max_scale) {
-    // Note that exact comparisons would succeed on some platforms (Linux, macOS).
-    // Nevertheless, power-of-ten factors are not all exactly representable
-    // in binary floating point.
-    for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
+TYPED_TEST(TestDecimalToRealFloat, LargeValues) {
+  auto max_scale = TypeParam::kMaxScale;
+  // Note that exact comparisons would succeed on some platforms (Linux, macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
 #ifdef _WIN32
-      // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-      if (scale == 45) continue;
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
 #endif
-      CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
-    }
-    for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
+    CheckDecimalToRealApprox<TypeParam>("1", scale, this->Pow10(-scale));
+  }
+  for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
 #ifdef _WIN32
-      // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-      if (scale == 45) continue;
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
 #endif
-      const Real factor = static_cast<Real>(123);
-      CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
-    }
+    const auto factor = static_cast<float>(123);
+    CheckDecimalToRealApprox<TypeParam>("123", scale, factor * this->Pow10(-scale));
   }
-};
-
-TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
-
-TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+}
 
-// Custom test for Decimal128::ToReal<float>
-class TestDecimal128ToRealFloat : public TestDecimalToReal<std::pair<Decimal128, float>> {
-};
-TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); }
-TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); }
-// Custom test for Decimal256::ToReal<float>
-class TestDecimal256ToRealFloat : public TestDecimalToReal<std::pair<Decimal256, float>> {
-};
-TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); }
-TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); }
+TYPED_TEST(TestDecimalToRealFloat, Precision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<TypeParam, float>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<TypeParam, float>("-9223373136366403584", 0, -9.223373e+18f);
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<TypeParam, float>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<TypeParam, float>("-18446746272732807168", 0, -1.8446746e+19f);
+
+  // Integers are always exact
+  auto scale = TypeParam::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');  // pad with trailing zeros
+  CheckDecimalToReal<TypeParam, float>(seven, scale, 7.0f);
+  CheckDecimalToReal<TypeParam, float>("-" + seven, scale, -7.0f);
+
+  CheckDecimalToReal<TypeParam, float>("99999999999999999999.0000000000000000", 16,
+                                       99999999999999999999.0f);
+  CheckDecimalToReal<TypeParam, float>("-99999999999999999999.0000000000000000", 16,
+                                       -99999999999999999999.0f);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.9", 1, 9999999.9f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.9", 1, -9999999.9f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.999999", 6, 9999999.999999f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.999999", 6,
+                                                   -9999999.999999f);
+
+  // Large fractions are within 2^-23
+  constexpr float epsilon = 1.1920928955078125e-07f;  // 2^-23
+  CheckDecimalToRealWithinEpsilon<TypeParam, float>(
+      "112334829348925.99070703983306884765625", 23, epsilon,
+      112334829348925.99070703983306884765625f);
+  CheckDecimalToRealWithinEpsilon<TypeParam, float>(
+      "1.987748987892758765582589910934859345", 36, epsilon,
+      1.987748987892758765582589910934859345f);
+}
 
 // ToReal<double> tests are disabled on MinGW because of precision issues in results
 #ifndef __MINGW32__
 
-// Custom test for Decimal128::ToReal<double>
+// Custom test for Decimal::ToReal<double>
 template <typename DecimalType>
 class TestDecimalToRealDouble : public TestDecimalToReal<std::pair<DecimalType, double>> {
 };
@@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) {
                                         9.999999999999998e+47);
   CheckDecimalToReal<TypeParam, double>("-99999999999999978859343891977453174784", -10,
                                         -9.999999999999998e+47);
+  // Integers are always exact
+  auto scale = TypeParam::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');
+  CheckDecimalToReal<TypeParam, double>(seven, scale, 7.0);
+  CheckDecimalToReal<TypeParam, double>("-" + seven, scale, -7.0);
+
+  CheckDecimalToReal<TypeParam, double>("99999999999999999999.0000000000000000", 16,
+                                        99999999999999999999.0);
+  CheckDecimalToReal<TypeParam, double>("-99999999999999999999.0000000000000000", 16,
+                                        -99999999999999999999.0);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.9", 1, 9999999.9);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.9", 1, -9999999.9);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.999999999999999", 15,
+                                                    9999999.999999999999999);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.999999999999999", 15,
+                                                    -9999999.999999999999999);
+
+  // Large fractions are within 2^-52
+  constexpr double epsilon = 2.220446049250313080847263336181640625e-16;  // 2^-52
+  CheckDecimalToRealWithinEpsilon<TypeParam, double>(
+      "112334829348925.99070703983306884765625", 23, epsilon,
+      112334829348925.99070703983306884765625);
+  CheckDecimalToRealWithinEpsilon<TypeParam, double>(
+      "1.987748987892758765582589910934859345", 36, epsilon,
+      1.987748987892758765582589910934859345);
 }
 
 #endif  // __MINGW32__