You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/10 22:05:10 UTC

[GitHub] [arrow] Luminarys commented on a change in pull request #8344: Add BasicDecimal256 Multiplication Support (PR for decimal256 branch, not master)

Luminarys commented on a change in pull request #8344:
URL: https://github.com/apache/arrow/pull/8344#discussion_r502837080



##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -254,69 +252,148 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
 
 namespace {
 
-// TODO: Remove this guard once it's used by BasicDecimal256
-#ifndef ARROW_USE_NATIVE_INT128
-// This method losslessly multiplies x and y into a 128 bit unsigned integer
-// whose high bits will be stored in hi and low bits in lo.
-void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
-#ifdef ARROW_USE_NATIVE_INT128
-  const __uint128_t r = static_cast<__uint128_t>(x) * y;
-  *lo = r & kInt64Mask;
-  *hi = r >> 64;
-#else
-  // If we can't use a native fallback, perform multiplication
-  // by splitting up x and y into 32 bit high/low bit components,
+// Multiply two N bit word components into a 2*N bit result, with high bits
+// stored in hi and low bits in lo.
+template <typename Word>
+void ExtendAndMultiplyUint(Word x, Word y, Word* hi, Word* lo) {
+  // Perform multiplication on two N bit words x and y into a 2*N bit result
+  // by splitting up x and y into N/2 bit high/low bit components,
   // allowing us to represent the multiplication as
-  // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
-  // + x_hi * y_hi * 2^64.
+  // x * y = x_lo * y_lo + x_hi * y_lo * 2^N/2 + y_hi * x_lo * 2^N/2
+  // + x_hi * y_hi * 2^N
   //
-  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi.
+  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
   // Therefore,
   // lo_lo is (x_lo * y_lo)_lo,
   // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
   // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
   // hi_hi is (x_hi * y_hi)_hi
-  const uint64_t x_lo = x & kIntMask;
-  const uint64_t y_lo = y & kIntMask;
-  const uint64_t x_hi = x >> 32;
-  const uint64_t y_hi = y >> 32;
+  constexpr Word kHighBitShift = sizeof(Word) * 4;
+  constexpr Word kLowBitMask = (static_cast<Word>(1) << kHighBitShift) - 1;
 
-  const uint64_t t = x_lo * y_lo;
-  const uint64_t t_lo = t & kIntMask;
-  const uint64_t t_hi = t >> 32;
+  const Word x_lo = x & kLowBitMask;
+  const Word y_lo = y & kLowBitMask;
+  const Word x_hi = x >> kHighBitShift;
+  const Word y_hi = y >> kHighBitShift;
 
-  const uint64_t u = x_hi * y_lo + t_hi;
-  const uint64_t u_lo = u & kIntMask;
-  const uint64_t u_hi = u >> 32;
+  const Word t = x_lo * y_lo;
+  const Word t_lo = t & kLowBitMask;
+  const Word t_hi = t >> kHighBitShift;
 
-  const uint64_t v = x_lo * y_hi + u_lo;
-  const uint64_t v_hi = v >> 32;
+  const Word u = x_hi * y_lo + t_hi;
+  const Word u_lo = u & kLowBitMask;
+  const Word u_hi = u >> kHighBitShift;
+
+  const Word v = x_lo * y_hi + u_lo;
+  const Word v_hi = v >> kHighBitShift;
 
   *hi = x_hi * y_hi + u_hi + v_hi;
-  *lo = (v << 32) | t_lo;
-#endif
+  *lo = (v << kHighBitShift) + t_lo;
+}
+
+// Convenience wrapper type over 128 bit unsigned integers
+#ifdef ARROW_USE_NATIVE_INT128
+struct uint128_t {

Review comment:
       My main concern with moving and replacing the existing alias is that we'll now have to ensure that the type functions exactly how the native/boost type do, which is likely going to require a lot of unnecessary effort to provide extra functions. I'd rather keep this localized and lightweight until there's an actual need to reuse this (since it really is just a convenience wrapper here).

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -254,69 +252,148 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
 
 namespace {
 
-// TODO: Remove this guard once it's used by BasicDecimal256
-#ifndef ARROW_USE_NATIVE_INT128
-// This method losslessly multiplies x and y into a 128 bit unsigned integer
-// whose high bits will be stored in hi and low bits in lo.
-void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
-#ifdef ARROW_USE_NATIVE_INT128
-  const __uint128_t r = static_cast<__uint128_t>(x) * y;
-  *lo = r & kInt64Mask;
-  *hi = r >> 64;
-#else
-  // If we can't use a native fallback, perform multiplication
-  // by splitting up x and y into 32 bit high/low bit components,
+// Multiply two N bit word components into a 2*N bit result, with high bits
+// stored in hi and low bits in lo.
+template <typename Word>
+void ExtendAndMultiplyUint(Word x, Word y, Word* hi, Word* lo) {
+  // Perform multiplication on two N bit words x and y into a 2*N bit result
+  // by splitting up x and y into N/2 bit high/low bit components,
   // allowing us to represent the multiplication as
-  // x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
-  // + x_hi * y_hi * 2^64.
+  // x * y = x_lo * y_lo + x_hi * y_lo * 2^N/2 + y_hi * x_lo * 2^N/2
+  // + x_hi * y_hi * 2^N
   //
-  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi.
+  // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
   // Therefore,
   // lo_lo is (x_lo * y_lo)_lo,
   // lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
   // hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
   // hi_hi is (x_hi * y_hi)_hi
-  const uint64_t x_lo = x & kIntMask;
-  const uint64_t y_lo = y & kIntMask;
-  const uint64_t x_hi = x >> 32;
-  const uint64_t y_hi = y >> 32;
+  constexpr Word kHighBitShift = sizeof(Word) * 4;
+  constexpr Word kLowBitMask = (static_cast<Word>(1) << kHighBitShift) - 1;
 
-  const uint64_t t = x_lo * y_lo;
-  const uint64_t t_lo = t & kIntMask;
-  const uint64_t t_hi = t >> 32;
+  const Word x_lo = x & kLowBitMask;
+  const Word y_lo = y & kLowBitMask;
+  const Word x_hi = x >> kHighBitShift;
+  const Word y_hi = y >> kHighBitShift;
 
-  const uint64_t u = x_hi * y_lo + t_hi;
-  const uint64_t u_lo = u & kIntMask;
-  const uint64_t u_hi = u >> 32;
+  const Word t = x_lo * y_lo;
+  const Word t_lo = t & kLowBitMask;
+  const Word t_hi = t >> kHighBitShift;
 
-  const uint64_t v = x_lo * y_hi + u_lo;
-  const uint64_t v_hi = v >> 32;
+  const Word u = x_hi * y_lo + t_hi;
+  const Word u_lo = u & kLowBitMask;
+  const Word u_hi = u >> kHighBitShift;
+
+  const Word v = x_lo * y_hi + u_lo;
+  const Word v_hi = v >> kHighBitShift;
 
   *hi = x_hi * y_hi + u_hi + v_hi;
-  *lo = (v << 32) | t_lo;
-#endif
+  *lo = (v << kHighBitShift) + t_lo;
+}
+
+// Convenience wrapper type over 128 bit unsigned integers
+#ifdef ARROW_USE_NATIVE_INT128
+struct uint128_t {
+  uint128_t() {}
+  uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) << 64) | lo) {}
+  uint128_t(const BasicDecimal128& decimal) {
+    val_ = (static_cast<__uint128_t>(decimal.high_bits()) << 64) | decimal.low_bits();
+  }
+
+  uint64_t hi() { return val_ >> 64; }
+  uint64_t lo() { return val_ & kInt64Mask; }
+
+  uint128_t& operator+=(const uint128_t& other) {
+    val_ += other.val_;
+    return *this;
+  }
+
+  __uint128_t val_;
+};
+
+uint128_t operator*(const uint128_t& left, const uint128_t& right) {
+  uint128_t r;
+  r.val_ = left.val_ * right.val_;
+  return r;
+}
+#else
+struct uint128_t {
+  uint128_t() {}
+  uint128_t(uint64_t hi, uint64_t lo) : hi_(hi), lo_(lo) {}
+  uint128_t(const BasicDecimal128& decimal) {
+    hi_ = decimal.high_bits();
+    lo_ = decimal.low_bits();
+  }
+
+  uint64_t hi() const { return hi_; }
+  uint64_t lo() const { return lo_; }
+
+  uint128_t& operator+=(const uint128_t& other) {
+    // To deduce the carry bit, we perform "65 bit" addition on the low bits and
+    // seeing if the resulting high bit is 1. This is accomplished by shifting the
+    // low bits to the right by 1 (chopping off the lowest bit), then adding 1 if the
+    // result of adding the two chopped bits would have produced a carry.
+    uint64_t carry = (((lo_ & other.lo_) & 1) + (lo_ >> 1) + (other.lo_ >> 1)) >> 63;
+    hi_ += other.hi_ + carry;
+    lo_ += other.lo_;
+    return *this;
+  }
+
+  uint64_t hi_;
+  uint64_t lo_;
+};
+
+uint128_t operator*(const uint128_t& left, const uint128_t& right) {
+  uint128_t r;
+  ExtendAndMultiplyUint(left.lo_, right.lo_, &r.hi_, &r.lo_);
+  r.hi_ += (left.hi_ * right.lo_) + (left.lo_ * right.hi_);
+  return r;
 }
 #endif
 
-void MultiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t y_lo,
-                     uint64_t* hi, uint64_t* lo) {
+void ExtendAndMultiplyUint128(uint128_t x, uint128_t y, uint128_t* hi, uint128_t* lo) {

Review comment:
       This method is now removed.

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -254,69 +252,148 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t bits) {
 
 namespace {
 
-// TODO: Remove this guard once it's used by BasicDecimal256
-#ifndef ARROW_USE_NATIVE_INT128
-// This method losslessly multiplies x and y into a 128 bit unsigned integer
-// whose high bits will be stored in hi and low bits in lo.
-void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* lo) {
-#ifdef ARROW_USE_NATIVE_INT128
-  const __uint128_t r = static_cast<__uint128_t>(x) * y;
-  *lo = r & kInt64Mask;
-  *hi = r >> 64;
-#else
-  // If we can't use a native fallback, perform multiplication
-  // by splitting up x and y into 32 bit high/low bit components,
+// Multiply two N bit word components into a 2*N bit result, with high bits
+// stored in hi and low bits in lo.
+template <typename Word>
+void ExtendAndMultiplyUint(Word x, Word y, Word* hi, Word* lo) {

Review comment:
       Done. This saves a lot of code, though does take 60 ns for multiplication as opposed to 20 ns prior.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org