You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/27 21:00:50 UTC

[GitHub] [arrow] MingyuZhong commented on a change in pull request #8542: ARROW-10407: [C++] Add BasicDecimal256 Division Support

MingyuZhong commented on a change in pull request #8542:
URL: https://github.com/apache/arrow/pull/8542#discussion_r513015463



##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -490,49 +501,59 @@ static void FixDivisionSigns(BasicDecimal128* result, BasicDecimal128* remainder
   }
 }
 
-/// \brief Build a BasicDecimal128 from a list of ints.
+/// \brief Build a big endian array of uint64_t from a list of uint32_t.
+template <size_t N>
+static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
+                                    uint32_t* array, int64_t length) {
+  for (int64_t i = length - 2 * N - 1; i >= 0; i--) {
+    if (array[i] != 0) {
+      return DecimalStatus::kOverflow;
+    }
+  }
+  int64_t next_index = length - 1;
+  for (int64_t i = N - 1; i >= 0; i--) {
+    uint64_t lower_bits = (next_index < 0) ? 0 : array[next_index--];
+    (*result_array)[i] =
+        (next_index < 0)
+            ? lower_bits
+            : ((static_cast<uint64_t>(array[next_index--]) << 32) + lower_bits);
+  }
+  return DecimalStatus::kSuccess;
+}
+
+/// \brief Build a BasicDecimal128 from a list of uint32_t.
 static DecimalStatus BuildFromArray(BasicDecimal128* value, uint32_t* array,
                                     int64_t length) {
-  switch (length) {
-    case 0:
-      *value = {static_cast<int64_t>(0)};
-      break;
-    case 1:
-      *value = {static_cast<int64_t>(array[0])};
-      break;
-    case 2:
-      *value = {static_cast<int64_t>(0),
-                (static_cast<uint64_t>(array[0]) << 32) + array[1]};
-      break;
-    case 3:
-      *value = {static_cast<int64_t>(array[0]),
-                (static_cast<uint64_t>(array[1]) << 32) + array[2]};
-      break;
-    case 4:
-      *value = {(static_cast<int64_t>(array[0]) << 32) + array[1],
-                (static_cast<uint64_t>(array[2]) << 32) + array[3]};
-      break;
-    case 5:
-      if (array[0] != 0) {
-        return DecimalStatus::kOverflow;
-      }
-      *value = {(static_cast<int64_t>(array[1]) << 32) + array[2],
-                (static_cast<uint64_t>(array[3]) << 32) + array[4]};
-      break;
-    default:
-      return DecimalStatus::kOverflow;
+  std::array<uint64_t, 2> result_array;
+  auto status = BuildFromArray(&result_array, array, length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
   }
+  *value = {static_cast<int64_t>(result_array[0]), result_array[1]};
+  return DecimalStatus::kSuccess;
+}
 
+/// \brief Build a BasicDecimal256 from a list of uint32_t.
+static DecimalStatus BuildFromArray(BasicDecimal256* value, uint32_t* array,
+                                    int64_t length) {
+  std::array<uint64_t, 4> result_array;
+  auto status = BuildFromArray(&result_array, array, length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
+  }
+  std::reverse(result_array.begin(), result_array.end());

Review comment:
       Cna you make BuildFromArray take little endian array so you don't need to reverse it?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -549,24 +570,28 @@ static DecimalStatus SingleDivide(const uint32_t* dividend, int64_t dividend_len
   return DecimalStatus::kSuccess;
 }
 
-DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor,
-                                      BasicDecimal128* result,
-                                      BasicDecimal128* remainder) const {
+/// \brief Do a division where the divisor fits into a single 32 bit value.
+template <class DecimalClass>
+static DecimalStatus DecimalDivide(const DecimalClass& dividend,
+                                   const DecimalClass& divisor, DecimalClass* result,
+                                   DecimalClass* remainder) {
+  static int64_t kDecimalArrayLength =
+      std::is_same<DecimalClass, BasicDecimal128>::value ? 4 : 8;

Review comment:
       Use sizeof(DecimalClass) / sizeof(uint64_t)?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -549,24 +570,28 @@ static DecimalStatus SingleDivide(const uint32_t* dividend, int64_t dividend_len
   return DecimalStatus::kSuccess;
 }
 
-DecimalStatus BasicDecimal128::Divide(const BasicDecimal128& divisor,
-                                      BasicDecimal128* result,
-                                      BasicDecimal128* remainder) const {
+/// \brief Do a division where the divisor fits into a single 32 bit value.
+template <class DecimalClass>
+static DecimalStatus DecimalDivide(const DecimalClass& dividend,
+                                   const DecimalClass& divisor, DecimalClass* result,
+                                   DecimalClass* remainder) {
+  static int64_t kDecimalArrayLength =

Review comment:
       Please use constexpr

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -404,51 +432,33 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
 /// \result the output length of the array
 static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
                            bool& was_negative) {
-  uint64_t high;
-  uint64_t low;
-  const int64_t highbits = value.high_bits();
-  const uint64_t lowbits = value.low_bits();
-
-  if (highbits < 0) {
-    low = ~lowbits + 1;
-    high = static_cast<uint64_t>(~highbits);
-    if (low == 0) {
-      ++high;
-    }
-    was_negative = true;
-  } else {
-    low = lowbits;
-    high = static_cast<uint64_t>(highbits);
-    was_negative = false;
-  }
-
-  if (high != 0) {
-    if (high > std::numeric_limits<uint32_t>::max()) {
-      array[0] = static_cast<uint32_t>(high >> 32);
-      array[1] = static_cast<uint32_t>(high);
-      array[2] = static_cast<uint32_t>(low >> 32);
-      array[3] = static_cast<uint32_t>(low);
-      return 4;
-    }
-
-    array[0] = static_cast<uint32_t>(high);
-    array[1] = static_cast<uint32_t>(low >> 32);
-    array[2] = static_cast<uint32_t>(low);
-    return 3;
-  }
-
-  if (low >= std::numeric_limits<uint32_t>::max()) {
-    array[0] = static_cast<uint32_t>(low >> 32);
-    array[1] = static_cast<uint32_t>(low);
-    return 2;
-  }
+  BasicDecimal128 abs_value = BasicDecimal128::Abs(value);
+  was_negative = value.Sign() < 0;

Review comment:
       value.high_bits() < 0?

##########
File path: cpp/src/arrow/util/decimal_benchmark.cc
##########
@@ -158,6 +158,7 @@ static void BinaryMathOp256(benchmark::State& state) {  // NOLINT non-const refe
   for (auto _ : state) {
     for (int x = 0; x < kValueSize; x += 5) {
       benchmark::DoNotOptimize(v1[x + 2] * v2[x + 2]);
+      benchmark::DoNotOptimize(v1[x + 3] / v2[x + 3]);

Review comment:
       Did you verify that your change didn't make Decimal128 slower? What's the performance metrics for Decimal256 before and after your change (please add it to the PR description)?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -395,6 +395,34 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
   return *this;
 }
 
+/// Expands the given big endian array of uint64_t into an array of uint32_t.
+/// The value of input array is expected to be positive. The result_array will
+/// remove leading zeros from the input array.
+/// \param value_array an big endian array to represent the value
+/// \param result_array an array of length N*2 to set with the value
+/// \result the output length of the array
+template <size_t N>
+static int64_t FillInArray(std::array<uint64_t, N>& value_array, uint32_t* result_array) {
+  int64_t next_index = 0;
+  for (size_t i = 0; i < N; i++) {
+    if (value_array[i] == 0) {
+      if (next_index != 0) {
+        result_array[next_index++] = 0;
+        result_array[next_index++] = 0;
+      }
+    } else if (value_array[i] <= std::numeric_limits<uint32_t>::max()) {

Review comment:
       Why do we need this special case?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -479,7 +489,8 @@ static void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
 
 /// \brief Fix the signs of the result and remainder at the end of the division based on
 /// the signs of the dividend and divisor.
-static void FixDivisionSigns(BasicDecimal128* result, BasicDecimal128* remainder,
+template <class DecimalClass>
+static void FixDivisionSigns(DecimalClass* result, DecimalClass* remainder,

Review comment:
       I think this method should be declared as inline.

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -395,6 +395,34 @@ BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
   return *this;
 }
 
+/// Expands the given big endian array of uint64_t into an array of uint32_t.
+/// The value of input array is expected to be positive. The result_array will
+/// remove leading zeros from the input array.
+/// \param value_array an big endian array to represent the value
+/// \param result_array an array of length N*2 to set with the value
+/// \result the output length of the array
+template <size_t N>
+static int64_t FillInArray(std::array<uint64_t, N>& value_array, uint32_t* result_array) {
+  int64_t next_index = 0;
+  for (size_t i = 0; i < N; i++) {
+    if (value_array[i] == 0) {

Review comment:
       For efficiency, once we find a non-zero element, the subsequent iterations don't need to check zero again. So the following would be better:
   ```
   for (size_t i = 0; i < N; i++) {
     if (value_array[i] != 0) {
       for (size_t j = i; j < N; j++ ) {
         ...
       }
       break;
     }
   }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org