You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/06/04 14:20:52 UTC
[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645570717



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -537,6 +569,20 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+struct OutputAdapter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  template <typename Generator>
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    T* out_data = out_arr->GetMutableValues<T>(1);

Review comment:
       I don't think this is safe for big endian architectures, see definition of `endian_agnostic`

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,306 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,

Review comment:
       Instead of repeating this logic here, please write some tests exercising just the implicit casts. You can use the `CheckDispatchBest()` helper

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();

Review comment:
       if we add decimal128 + decimal256, shouldn't the output type be decimal256 instead of simply taking the LHS' width?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;

Review comment:
       I think this could be the default case; it's what would be used for any comparison kernel, for example

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));
+    return ValueDescr(std::move(out_type));

Review comment:
       ```suggestion
       return ValueDescr(std::move(out_type), GetBroadcastShape(args));
   ```

##########
File path: docs/source/cpp/compute.rst
##########
@@ -286,11 +286,29 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | power_checked            | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric             |
+| subtract                 | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric             |
+| subtract_checked         | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
 
+* \(1) Precision and scale of computed DECIMAL results
+
++------------+---------------------------------------------+
+| Operation  | Result precision and scale                  |
++============+=============================================+
+| | add      | | scale = max(s1, s2)                       |
+| | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
++------------+---------------------------------------------+
+| multiply   | | scale = s1 + s2                           |
+|            | | precision = p1 + p2 + 1                   |
++------------+---------------------------------------------+
+| divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+|            | | precision = p1 - s1 + s2 + scale          |

Review comment:
       ```suggestion
   | add        | scale = max(s1, s2)                         |
   | subtract   | precision = max(p1-s1, p2-s2) + 1 + scale   |
   +------------+---------------------------------------------+
   | multiply   | scale = s1 + s2                             |
   |            | precision = p1 + p2 + 1                     |
   +------------+---------------------------------------------+
   | divide     | scale = max(4, s1 + p2 - s2 + 1)            |
   |            | precision = p1 - s1 + s2 + scale            |
   ```
   
   Please mention that this is compatible with Redshift's decimal promotion rules.
   Additionally, please include an explanation of why this is reasonable.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);

Review comment:
       ```suggestion
     auto left_type = checked_cast<const DecimalType*>(values[0].type.get());
     auto right_type = checked_cast<const DecimalType*>(values[1].type.get());
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();
+  auto make = [id](int32_t precision, int32_t scale) {

Review comment:
       This seems reusable, could you expose it as `DecimalType::Make(width/id, precision, scale)`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,306 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,
+                                       const std::shared_ptr<DataType>& left_type,
+                                       const std::shared_ptr<DataType>& right_type) {
+    auto left_decimal_type = std::static_pointer_cast<DecimalType>(left_type);
+    auto right_decimal_type = std::static_pointer_cast<DecimalType>(right_type);
+
+    const int32_t p1 = left_decimal_type->precision(), s1 = left_decimal_type->scale();
+    const int32_t p2 = right_decimal_type->precision(), s2 = right_decimal_type->scale();
+
+    // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+    int32_t precision, scale;
+    if (op == "add" || op == "subtract") {
+      scale = std::max(s1, s2);
+      precision = std::max(p1 - s1, p2 - s2) + 1 + scale;
+    } else if (op == "multiply") {
+      scale = s1 + s2;
+      precision = p1 + p2 + 1;
+    } else if (op == "divide") {
+      scale = std::max(4, s1 + p2 - s2 + 1);
+      precision = p1 - s1 + s2 + scale;
+    } else {
+      ABORT_NOT_OK(Status::Invalid("invalid binary operator: ", op));
+    }
+
+    std::shared_ptr<DataType> type;
+    if (left_type->id() == Type::DECIMAL128) {
+      ASSIGN_OR_ABORT(type, Decimal128Type::Make(precision, scale));
+    } else {
+      ASSIGN_OR_ABORT(type, Decimal256Type::Make(precision, scale));
+    }
+    return type;
+  }
+
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+
+  Datum ToDatum(const std::shared_ptr<DataType>& type, const std::string& value) {
+    if (value.find("[") == std::string::npos) {
+      return Datum(MakeScalar(type, value));
+    } else {
+      return Datum(ArrayFromJSON(type, value));
+    }
+  }
+
+  void Assert(const std::string& op, const Arg& left, const Arg& right,

Review comment:
       This is confusingly indirect. Could you instead directly call CheckScalar in the test cases?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(

Review comment:
       Please open a follow up JIRA to make this public so it can be reused (for example in the comparison functions)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));

Review comment:
       GetDecimalBinaryOutput can't be reused here since the output resolver receives post-implicit-cast argument types. For example, in the case of `decimal(13, 3) / decimal(3, 0)`: we expect to cast the left to `decimal(20, 10)`, leave the right uncasted, and output to `decimal(20, 7)` (IIUC). So the output resolver needs to compute `decimal(20, 7)` given `decimal(20, 10)` and `decimal(3, 0)`

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();
+  auto make = [id](int32_t precision, int32_t scale) {
+    if (id == Type::DECIMAL128) {
+      return Decimal128Type::Make(precision, scale);
+    } else {
+      return Decimal256Type::Make(precision, scale);
+    }
+  };
+
+  if (replaced) {
+    replaced->resize(2);
+    ARROW_ASSIGN_OR_RAISE((*replaced)[0], make(p1 + left_scaleup, s1 + left_scaleup));
+    ARROW_ASSIGN_OR_RAISE((*replaced)[1], make(p2 + right_scaleup, s2 + right_scaleup));
+  }
+
+  return make(out_prec, out_scale);
+}
+
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
   Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
     RETURN_NOT_OK(CheckArity(*values));
 
+    const auto type_id = (*values)[0].type->id();

Review comment:
       This will currently fail for decimal + float and float + decimal. Please add tests for these cases (`CheckDispatchBest` again)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;

Review comment:
       Additionally, I think this could be defined for the varargs case (so it could be used in elementwise_min/elementwise_max/...)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,306 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,

Review comment:
       Also, please make the expected output type explicit instead of computed in the test cases below




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org