You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/07/06 17:21:17 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #10643: ARROW-13242: [C++] Improve random generation of decimal arrays

pitrou commented on a change in pull request #10643:
URL: https://github.com/apache/arrow/pull/10643#discussion_r664743766



##########
File path: cpp/src/arrow/testing/random.cc
##########
@@ -229,34 +230,95 @@ std::shared_ptr<Array> RandomArrayGenerator::Float64(int64_t size, double min, d
 #undef PRIMITIVE_RAND_INTEGER_IMPL
 #undef PRIMITIVE_RAND_IMPL
 
-std::shared_ptr<Array> RandomArrayGenerator::Decimal128(std::shared_ptr<DataType> type,
-                                                        int64_t size,
-                                                        double null_probability) {
-  const auto& decimal_type = checked_cast<const Decimal128Type&>(*type);
-  const auto digits = decimal_type.precision();
-  if (digits > 18) {
-    // More than 18 digits + sign don't fit in a int64_t
-    ABORT_NOT_OK(
-        Status::NotImplemented("random decimal128 generation with precision > 18"));
-  }
+namespace {
 
-  // Generate logical values as integers, then convert them
-  const auto max = static_cast<int64_t>(std::llround(std::pow(10.0, digits)) - 1);
-  const auto int_array =
-      checked_pointer_cast<Int64Array>(Int64(size, -max, max, null_probability));
+// A generic generator for random decimal arrays
+template <typename DecimalType>
+struct DecimalGenerator {
+  using DecimalBuilderType = typename TypeTraits<DecimalType>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static uint64_t MaxDecimalInteger(int32_t digits) {
+    // Need to decrement *after* the cast to uint64_t because, while
+    // 10**x is exactly representable in a double for x <= 19,
+    // 10**x - 1 is not.
+    return static_cast<uint64_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability) {
+    // 10**19 fits in a 64-bit unsigned integer
+    static constexpr int32_t kMaxDigitsInInteger = 19;
+    static constexpr int kNumIntegers = DecimalType::kByteWidth / 8;
+
+    static_assert(
+        kNumIntegers ==
+            (DecimalType::kMaxPrecision + kMaxDigitsInInteger - 1) / kMaxDigitsInInteger,
+        "inconsistent decimal metadata: kMaxPrecision doesn't match kByteWidth");
+
+    // First generate separate random values for individual components:
+    // boolean sign (including null-ness), and uint64 "digits" in big endian order.
+    const auto& decimal_type = checked_cast<const DecimalType&>(*type_);
+
+    const auto sign_array = checked_pointer_cast<BooleanArray>(
+        rng_->Boolean(size, /*true_probability=*/0.5, null_probability));
+    std::array<std::shared_ptr<UInt64Array>, kNumIntegers> digit_arrays;
+
+    auto remaining_digits = decimal_type.precision();
+    for (int i = kNumIntegers - 1; i >= 0; --i) {
+      const auto digits = std::min(kMaxDigitsInInteger, remaining_digits);
+      digit_arrays[i] = checked_pointer_cast<UInt64Array>(
+          rng_->UInt64(size, 0, MaxDecimalInteger(digits)));
+      DCHECK_EQ(digit_arrays[i]->null_count(), 0);
+      remaining_digits -= digits;
+    }
 
-  Decimal128Builder builder(type);
-  ABORT_NOT_OK(builder.Reserve(size));
-  for (int64_t i = 0; i < size; ++i) {
-    if (int_array->IsValid(i)) {
-      builder.UnsafeAppend(::arrow::Decimal128(int_array->Value(i)));
-    } else {
-      builder.UnsafeAppendNull();
+    // Second compute decimal values from the individual components,
+    // building up a decimal array.
+    DecimalBuilderType builder(type_);
+    ABORT_NOT_OK(builder.Reserve(size));
+
+    const DecimalValue kDigitsMultiplier =
+        DecimalValue::GetScaleMultiplier(kMaxDigitsInInteger);
+
+    for (int64_t i = 0; i < size; ++i) {
+      if (sign_array->IsValid(i)) {
+        DecimalValue dec_value{0};
+        for (int j = 0; j < kNumIntegers; ++j) {
+          dec_value =
+              dec_value * kDigitsMultiplier + DecimalValue(digit_arrays[j]->Value(i));

Review comment:
       `< 127` even ;-)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org