You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/05/20 09:30:48 UTC

[GitHub] [arrow] cyb70289 opened a new pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

cyb70289 opened a new pull request #10364:
URL: https://github.com/apache/arrow/pull/10364


   TODOs:
   
   - [ ] Subtract, Divide
   - [ ] Overflow check
   - [ ] Consolidate tests


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r650004667



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,120 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+Status CastBinaryDecimalArgs(const std::string& func_name,
+                             std::vector<ValueDescr>* values) {
+  auto& left_type = (*values)[0].type;
+  auto& right_type = (*values)[1].type;
+  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+
+  // decimal + float = float
+  if (is_floating(left_type->id())) {
+    right_type = left_type;
+    return Status::OK();
+  } else if (is_floating(right_type->id())) {
+    left_type = right_type;
+    return Status::OK();
+  }
+
+  // precision, scale of left and right args
+  int32_t p1, s1, p2, s2;
+
+  // decimal + integer = decimal

Review comment:
       well that's embarrassing. Would you add a follow up JIRA for integer->decimal casts, please? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r651870110



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {

Review comment:
       Can you reuse ScalarFromJSON instead?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+};
+

Review comment:
       Please add some tests specifically of the implicit casting behavior, using CheckDispatchBest

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -461,6 +706,8 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
     auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
+  AddDecimalBinaryKernels<Op>(name, &func);

Review comment:
       Please ensure we're not adding decimal kernels for functions which don't support decimals yet:
   ```suggestion
     // TODO($FOLLOW_UP_JIRA) remove when decimal is supported for all arithmetic kernels
     if (name == "add" || name == "add_checked" || ...) {
       AddDecimalBinaryKernels<Op>(name, &func);
     }
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -461,6 +706,8 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
     auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
+  AddDecimalBinaryKernels<Op>(name, &func);

Review comment:
       Please open a follow up JIRA for supporting decimals in the other arithmetic functions as well

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +655,49 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Status CheckDecimals(std::vector<ValueDescr>* values) const {
+    bool has_decimal = false;
+    for (const auto& value : *values) {
+      if (is_decimal(value.type->id())) {
+        has_decimal = true;
+        break;
+      }
+    }
+    if (!has_decimal) return Status::OK();
+
+    if (values->size() == 2) {
+      return CastBinaryDecimalArgs(name(), values);
+    }
+    return Status::OK();
+  }
+};
+
+// resolve decimal operation output type
+struct BinaryDecimalOutputResolver {
+  const std::string func_name;
+
+  explicit BinaryDecimalOutputResolver(std::string func_name)
+      : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto type, ResolveBinaryDecimalOutput(func_name, args));
+    return ValueDescr(std::move(type), GetBroadcastShape(args));
+  }
 };
 
+template <typename Op>
+void AddDecimalBinaryKernels(const std::string& name,
+                             std::shared_ptr<ArithmeticFunction>* func) {
+  auto out_type = OutputType(BinaryDecimalOutputResolver(name));

Review comment:
       I think it'd be more straightforward to dispatch on name here:
   ```suggestion
     OutputType out_type;
     const std::string op = func_name.substr(0, func_name.find("_"));
     if (op == "add" || op == "subtract") {
       out_type = OutputType(ResolveDecimalAdditionOrSubtractionOutput);
     } else if (op == "multiply") {
       out_type = OutputType(ResolveDecimalMultiplicationOutput);
     } else if (op == "divide") {
       out_type = OutputType(ResolveDecimalDivisionOutput);
     } else {
       DCHECK(false);
     }
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 edited a comment on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 edited a comment on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569






-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-852709410


   @bkietz , met with one problem, would like to hear your comments. Thanks.
   
   Decimal upscaling is operation dependent. E.g., `+,-` will upscale arg with small scale to align digit, `*` needn't scaling, `/` is more complicated.
   
   Implicit args casting happens before kernel is created. `DispatchBest` only knows arg types, no operation type (kernel dependent) is available. So we cannot figure out the "to be casted" arg type (new precision, scale).
   https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L175
   
   Maybe add another callback `kernel->explicit_cast()` and call it after or inside `DispatchBest`? Or one `ScalarFunction` struct (and DispatchBest) per binary decimal operation?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r649053658



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;

Review comment:
       Not done yet. Will follow up.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645633404



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();

Review comment:
       I only considered same width decimal ops now.
   Will think about mixed witdth case.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r654597223



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -461,6 +706,8 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
     auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
+  AddDecimalBinaryKernels<Op>(name, &func);

Review comment:
       Thanks, that will work
   
   > I'm afraid some arithmetic kernels (e.g., power) may not support decimals?
   
   This is one topic which the follow up JIRA would give us a better place to discuss




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r646714303



##########
File path: docs/source/cpp/compute.rst
##########
@@ -286,11 +286,29 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | power_checked            | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric             |
+| subtract                 | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric             |
+| subtract_checked         | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
 
+* \(1) Precision and scale of computed DECIMAL results
+
++------------+---------------------------------------------+
+| Operation  | Result precision and scale                  |
++============+=============================================+
+| | add      | | scale = max(s1, s2)                       |
+| | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
++------------+---------------------------------------------+
+| multiply   | | scale = s1 + s2                           |
+|            | | precision = p1 + p2 + 1                   |
++------------+---------------------------------------------+
+| divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+|            | | precision = p1 - s1 + s2 + scale          |

Review comment:
       Ah, I see. Thanks




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569


   Thanks @bkietz , it's almost done except one last catch.
   
   As the output type (precision, scale) is dependent on the inputs, I have a resolver object to calculate output type. The resolver is called with the *casted* input type, not the original type. It causes problem to division, as the output precision and scale should be calculated from original inputs. No trouble for add/subtract as the output precision/scale is the same for original and casted inputs (digit aligned). Multiply doesn't need cast.
   
   Does it make sense to pass both original input types and the casted types to the resolver [1][2]? We will have to update all existing custom resolver codes.
   Or there are better ways to handle this?
   
   [1] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L196
   [2] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/exec.cc#L495


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645570717



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -537,6 +569,20 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+struct OutputAdapter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  template <typename Generator>
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    T* out_data = out_arr->GetMutableValues<T>(1);

Review comment:
       I don't think this is safe for big endian architectures, see definition of `endian_agnostic`

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,306 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,

Review comment:
       Instead of repeating this logic here, please write some tests exercising just the implicit casts. You can use the `CheckDispatchBest()` helper

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();

Review comment:
       if we add decimal128 + decimal256, shouldn't the output type be decimal256 instead of simply taking the LHS' width?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;

Review comment:
       I think this could be the default case; it's what would be used for any comparison kernel, for example

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));
+    return ValueDescr(std::move(out_type));

Review comment:
       ```suggestion
       return ValueDescr(std::move(out_type), GetBroadcastShape(args));
   ```

##########
File path: docs/source/cpp/compute.rst
##########
@@ -286,11 +286,29 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | power_checked            | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric             |
+| subtract                 | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric             |
+| subtract_checked         | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
 
+* \(1) Precision and scale of computed DECIMAL results
+
++------------+---------------------------------------------+
+| Operation  | Result precision and scale                  |
++============+=============================================+
+| | add      | | scale = max(s1, s2)                       |
+| | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
++------------+---------------------------------------------+
+| multiply   | | scale = s1 + s2                           |
+|            | | precision = p1 + p2 + 1                   |
++------------+---------------------------------------------+
+| divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+|            | | precision = p1 - s1 + s2 + scale          |

Review comment:
       ```suggestion
   | add        | scale = max(s1, s2)                         |
   | subtract   | precision = max(p1-s1, p2-s2) + 1 + scale   |
   +------------+---------------------------------------------+
   | multiply   | scale = s1 + s2                             |
   |            | precision = p1 + p2 + 1                     |
   +------------+---------------------------------------------+
   | divide     | scale = max(4, s1 + p2 - s2 + 1)            |
   |            | precision = p1 - s1 + s2 + scale            |
   ```
   
   Please mention that this is compatible with Redshift's decimal promotion rules.
   Additionally, please include an explanation of why this is reasonable.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);

Review comment:
       ```suggestion
     auto left_type = checked_cast<const DecimalType*>(values[0].type.get());
     auto right_type = checked_cast<const DecimalType*>(values[1].type.get());
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();
+  auto make = [id](int32_t precision, int32_t scale) {

Review comment:
       This seems reusable, could you expose it as `DecimalType::Make(width/id, precision, scale)`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,306 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,
+                                       const std::shared_ptr<DataType>& left_type,
+                                       const std::shared_ptr<DataType>& right_type) {
+    auto left_decimal_type = std::static_pointer_cast<DecimalType>(left_type);
+    auto right_decimal_type = std::static_pointer_cast<DecimalType>(right_type);
+
+    const int32_t p1 = left_decimal_type->precision(), s1 = left_decimal_type->scale();
+    const int32_t p2 = right_decimal_type->precision(), s2 = right_decimal_type->scale();
+
+    // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+    int32_t precision, scale;
+    if (op == "add" || op == "subtract") {
+      scale = std::max(s1, s2);
+      precision = std::max(p1 - s1, p2 - s2) + 1 + scale;
+    } else if (op == "multiply") {
+      scale = s1 + s2;
+      precision = p1 + p2 + 1;
+    } else if (op == "divide") {
+      scale = std::max(4, s1 + p2 - s2 + 1);
+      precision = p1 - s1 + s2 + scale;
+    } else {
+      ABORT_NOT_OK(Status::Invalid("invalid binary operator: ", op));
+    }
+
+    std::shared_ptr<DataType> type;
+    if (left_type->id() == Type::DECIMAL128) {
+      ASSIGN_OR_ABORT(type, Decimal128Type::Make(precision, scale));
+    } else {
+      ASSIGN_OR_ABORT(type, Decimal256Type::Make(precision, scale));
+    }
+    return type;
+  }
+
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+
+  Datum ToDatum(const std::shared_ptr<DataType>& type, const std::string& value) {
+    if (value.find("[") == std::string::npos) {
+      return Datum(MakeScalar(type, value));
+    } else {
+      return Datum(ArrayFromJSON(type, value));
+    }
+  }
+
+  void Assert(const std::string& op, const Arg& left, const Arg& right,

Review comment:
       This is confusingly indirect. Could you instead directly call CheckScalar in the test cases?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(

Review comment:
       Please open a follow up JIRA to make this public so it can be reused (for example in the comparison functions)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));

Review comment:
       GetDecimalBinaryOutput can't be reused here since the output resolver receives post-implicit-cast argument types. For example, in the case of `decimal(13, 3) / decimal(3, 0)`: we expect to cast the left to `decimal(20, 10)`, leave the right uncasted, and output to `decimal(20, 7)` (IIUC). So the output resolver needs to compute `decimal(20, 7)` given `decimal(20, 10)` and `decimal(3, 0)`

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();
+  auto make = [id](int32_t precision, int32_t scale) {
+    if (id == Type::DECIMAL128) {
+      return Decimal128Type::Make(precision, scale);
+    } else {
+      return Decimal256Type::Make(precision, scale);
+    }
+  };
+
+  if (replaced) {
+    replaced->resize(2);
+    ARROW_ASSIGN_OR_RAISE((*replaced)[0], make(p1 + left_scaleup, s1 + left_scaleup));
+    ARROW_ASSIGN_OR_RAISE((*replaced)[1], make(p2 + right_scaleup, s2 + right_scaleup));
+  }
+
+  return make(out_prec, out_scale);
+}
+
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
   Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
     RETURN_NOT_OK(CheckArity(*values));
 
+    const auto type_id = (*values)[0].type->id();

Review comment:
       This will currently fail for decimal + float and float + decimal. Please add tests for these cases (`CheckDispatchBest` again)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;

Review comment:
       Additionally, I think this could be defined for the varargs case (so it could be used in elementwise_min/elementwise_max/...)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,306 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,

Review comment:
       Also, please make the expected output type explicit instead of computed in the test cases below




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-861509271


   Rebased


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 edited a comment on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 edited a comment on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569


   Thanks @bkietz , it's almost done except one last catch.
   
   As the output type (precision, scale) is dependent on the inputs, I have a resolver object to calculate output type. The resolver is called with the *casted* input type, not the original type. It causes problem to division, as the output precision and scale should be calculated from original inputs. No trouble for add/subtract as the output precision/scale is the same for original and casted inputs (digit aligned). Multiply doesn't need cast.
   
   Does it make sense to pass both original input types and the casted types to the resolver [1][2]? We will have to update all existing custom resolver codes.
   Maybe add a kernel flag to select passing original or casted input types to the resolver?
   Or there are better ways to handle this?
   
   [1] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L196
   [2] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/exec.cc#L495


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645469007



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -516,6 +520,141 @@ std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
   return func;
 }
 
+// Decimal arithmetics
+struct BinaryDecimal : public KernelState {
+  const std::shared_ptr<DecimalType> left_type, right_type;
+  std::shared_ptr<DataType> out_type;
+
+  explicit BinaryDecimal(const KernelInitArgs& args)
+      : left_type(checked_pointer_cast<DecimalType>(args.inputs[0].type)),
+        right_type(checked_pointer_cast<DecimalType>(args.inputs[1].type)) {
+    DCHECK_EQ(left_type->id(), right_type->id());
+  }
+
+  // create instance of derived class T
+  template <typename T>
+  static Result<std::unique_ptr<KernelState>> Make(const KernelInitArgs& args) {
+    auto op = ::arrow::internal::make_unique<T>(args);
+    if (op->left_type->scale() < 0 || op->right_type->scale() < 0) {
+      return Status::Invalid("Decimals with negative scales not supported");
+    }
+    RETURN_NOT_OK(op->Init(op->left_type->precision(), op->left_type->scale(),
+                           op->right_type->precision(), op->right_type->scale()));
+    return std::move(op);
+  }
+
+  // return error and stop kernel execution if output precision is out of bound
+  Status Init(int32_t out_prec, int32_t out_scale) {
+    if (left_type->id() == Type::DECIMAL128) {
+      ARROW_ASSIGN_OR_RAISE(out_type, Decimal128Type::Make(out_prec, out_scale));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(out_type, Decimal256Type::Make(out_prec, out_scale));
+    }
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<DataType>> ResolveOutput(const std::vector<ValueDescr>&) const {
+    return out_type;
+  }
+};
+
+template <bool IsSubtract>
+struct AddOrSubtractDecimal : public BinaryDecimal {
+  using BinaryDecimal::BinaryDecimal;
+
+  int32_t left_scaleup, right_scaleup;
+
+  // called by kernel::init()
+  static Result<std::unique_ptr<KernelState>> Make(KernelContext*,
+                                                   const KernelInitArgs& args) {
+    return BinaryDecimal::Make<AddOrSubtractDecimal<IsSubtract>>(args);
+  }
+
+  // figure out output type and arg scaling, called by Make()
+  Status Init(int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+    const int32_t out_scale = std::max(s1, s2);
+    const int32_t out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+    return BinaryDecimal::Init(out_prec, out_scale);
+  }
+
+  // called by kerne::exec() for each value pair
+  // TODO(yibo): avoid repeat rescaling of scalar arg
+  template <typename T, typename Arg0, typename Arg1>
+  T Call(KernelContext*, Arg0 left, Arg1 right, Status*) const {
+    if (left_scaleup > 0) left = left.IncreaseScaleBy(left_scaleup);
+    if (right_scaleup > 0) right = right.IncreaseScaleBy(right_scaleup);
+    if (IsSubtract) right = -right;
+    return left + right;
+  }
+};
+
+using AddDecimal = AddOrSubtractDecimal</*IsSubtract=*/false>;
+using SubtractDecimal = AddOrSubtractDecimal</*IsSubtract=*/true>;
+
+struct MultiplyDecimal : public BinaryDecimal {
+  using BinaryDecimal::BinaryDecimal;
+
+  static Result<std::unique_ptr<KernelState>> Make(KernelContext*,
+                                                   const KernelInitArgs& args) {
+    return BinaryDecimal::Make<MultiplyDecimal>(args);
+  }
+
+  Status Init(int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+    return BinaryDecimal::Init(p1 + p2 + 1, s1 + s2);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  T Call(KernelContext*, Arg0 left, Arg1 right, Status*) const {
+    return left * right;
+  }
+};
+
+struct DivideDecimal : public BinaryDecimal {
+  using BinaryDecimal::BinaryDecimal;
+
+  int32_t left_scaleup;
+
+  static Result<std::unique_ptr<KernelState>> Make(KernelContext*,
+                                                   const KernelInitArgs& args) {
+    return BinaryDecimal::Make<DivideDecimal>(args);
+  }
+
+  Status Init(int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+    // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html

Review comment:
       Added explanation to compute.rst




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r649056718



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+};
+
+// reference result from bc (precsion=100, scale=40)
+TEST_F(TestBinaryArithmeticDecimal, AddSubtract) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(30, 3),
+                              R"([
+        "1.000",
+        "-123456789012345678901234567.890",
+        "98765432109876543210.987",
+        "-999999999999999999999999999.999"
+      ])");
+    auto right = ArrayFromJSON(decimal128(20, 9),
+                               R"([
+        "-1.000000000",
+        "12345678901.234567890",
+        "98765.432101234",
+        "-99999999999.999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal128(37, 9),
+                               R"([
+      "0.000000000",
+      "-123456789012345666555555666.655432110",
+      "98765432109876641976.419101234",
+      "-1000000000000000099999999999.998999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal128(37, 9),
+                                    R"([
+      "2.000000000",
+      "-123456789012345691246913469.124567890",
+      "98765432109876444445.554898766",
+      "-999999999999999899999999999.999000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // array array, decimal256
+  {
+    auto left = ArrayFromJSON(decimal256(30, 20),
+                              R"([
+        "-1.00000000000000000001",
+        "1234567890.12345678900000000000",
+        "-9876543210.09876543210987654321",
+        "9999999999.99999999999999999999"
+      ])");
+    auto right = ArrayFromJSON(decimal256(30, 10),
+                               R"([
+        "1.0000000000",
+        "-1234567890.1234567890",
+        "6789.5432101234",
+        "99999999999999999999.9999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal256(41, 20),
+                               R"([
+      "-0.00000000000000000001",
+      "0.00000000000000000000",
+      "-9876536420.55555530870987654321",
+      "100000000009999999999.99999999989999999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal256(41, 20),
+                                    R"([
+      "-2.00000000000000000001",
+      "2469135780.24691357800000000000",
+      "-9876549999.64197555550987654321",
+      "-99999999989999999999.99999999990000000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // scalar array
+  {
+    auto left = this->MakeScalar(decimal128(6, 1), "12345.6");
+    auto right = ArrayFromJSON(decimal128(10, 3),
+                               R"(["1.234", "1234.000", "-9876.543", "666.888"])");
+    auto added = ArrayFromJSON(decimal128(11, 3),
+                               R"(["12346.834", "13579.600", "2469.057", "13012.488"])");
+    auto left_sub_right = ArrayFromJSON(
+        decimal128(11, 3), R"(["12344.366", "11111.600", "22222.143", "11678.712"])");
+    auto right_sub_left = ArrayFromJSON(
+        decimal128(11, 3), R"(["-12344.366", "-11111.600", "-22222.143", "-11678.712"])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+    CheckScalarBinary("subtract", left, right, left_sub_right);
+    CheckScalarBinary("subtract", right, left, right_sub_left);
+  }
+
+  // scalar scalar
+  {
+    auto left = this->MakeScalar(decimal256(3, 0), "666");
+    auto right = this->MakeScalar(decimal256(3, 0), "888");
+    auto added = this->MakeScalar(decimal256(4, 0), "1554");
+    auto subtracted = this->MakeScalar(decimal256(4, 0), "-222");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // decimal128 decimal256

Review comment:
       test mixed size decimals




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-853329894


   `DispatchBest` is aware of the operation; it has access to the function's name. You could write a `CommonDecimal()` function which returns differing scales/precisions for "add"/"subtract", "divide", and "multiply":
   
   ```c++
   if (auto type = CommonNumeric(*values)) {
     ReplaceTypes(type, values);
   } else if (auto type = CommonDecimal(name_, *values)) {
     ReplaceTypes(type, values);
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r646620882



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));

Review comment:
       Breaking the definitions out for efficient algebra:
   ![definitions](https://user-images.githubusercontent.com/1299904/121026941-90898f00-c774-11eb-9261-fd1558e7f8b6.png)
   <details>
   <pre>
   l_s, l_p \equiv& \text{scale, precision of left argument, before implicit cast} \\
   r_s, r_p \equiv& \text{scale, precision of right argument, before implicit cast} \\
   o_s, o_p \equiv& \text{scale, precision of output} \\
   \lambda_s, \lambda_p
   \equiv&
   \text{scale, precision of implicitly cast left argument} \\
   &\text{(passed to output type resolver)} \\
   \rho_s, \rho_p
   \equiv& \text{scale, precision of implicitly cast right argument}
   </pre>
   </details>
   
   For division, the output precision and scale are:
   ![division output precision, scale, scaleup](https://user-images.githubusercontent.com/1299904/121027164-be6ed380-c774-11eb-9267-bf82dfcf8f9c.png)
   <details>
   <pre>
   o_s =& max(4, l_s + (r_p - r_s) + 1) \\
   o_p  =& (l_p - l_s) + r_s + o_s \\
   \lambda_s =& l_s + (o_p - l_p) \\
   \lambda_p =& l_p + (o_p - l_p) \\
   \rho_s, \rho_p =& r_s, r_p
   </pre>
   </details>
   
   Since the post-implicit-cast types are the only types available to the output type resolver, we need to compute the output scale and precision from those implicitly cast types:
   ![solve for output from implicitly cast](https://user-images.githubusercontent.com/1299904/121028949-35589c00-c776-11eb-8395-b18cf1f58a98.png)
   <details>
   <pre>
   o_s &= \lambda_s - \rho_s &\Leftarrow
   \begin{bmatrix}
   \lambda_s
   &=& l_s + o_p - l_p \\
   &=& l_s + ((l_p - l_s) + r_s + o_s) - l_p \\
   &=& r_s + o_s
   \end{bmatrix} 
   \\
   o_p &= \lambda_p &\Leftarrow
   \begin{bmatrix}
   \lambda_p
   &= l_p + o_p - l_p\\
   &= o_p
   \end{bmatrix}
   </pre>
   </details>
   
   So in prose: the output precision is identical to the precision of the implicitly cast left argument and the output scale is the implicitly cast left argument's scale minus the right argument's scale. The output resolver can be written as:
   
   ```c++
   template <typename Op>
   void AddDecimalBinaryKernels(const std::string& name,
                                std::shared_ptr<ArithmeticFunction>* func) {
     OutputType out_type;
     if (name.find("divide") == 0) {
       out_type = [](KernelContext*, const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
         // validation...
         auto left = checked_cast<const DecimalType*>(args[0].type.get());
         auto right = checked_cast<const DecimalType*>(args[1].type.get());
         auto precision = left.precision();
         auto scale = left.scale() - right.scale();
         auto type_id = // DECIMAL128 if both are 128 otherwise DECIMAL256
         auto out_type = DecimalType::Make(type_id, precision, scale);
         return ValueDescr(std::move(out_type), GetBroadcastShape(args));
       };
     }
   ```
   
   Similar resolvers can be written for the other operations. Does that make sense?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] github-actions[bot] commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-844912219


   https://issues.apache.org/jira/browse/ARROW-12074


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645953547



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();
+  auto make = [id](int32_t precision, int32_t scale) {
+    if (id == Type::DECIMAL128) {
+      return Decimal128Type::Make(precision, scale);
+    } else {
+      return Decimal256Type::Make(precision, scale);
+    }
+  };
+
+  if (replaced) {
+    replaced->resize(2);
+    ARROW_ASSIGN_OR_RAISE((*replaced)[0], make(p1 + left_scaleup, s1 + left_scaleup));
+    ARROW_ASSIGN_OR_RAISE((*replaced)[1], make(p2 + right_scaleup, s2 + right_scaleup));
+  }
+
+  return make(out_prec, out_scale);
+}
+
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
   Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
     RETURN_NOT_OK(CheckArity(*values));
 
+    const auto type_id = (*values)[0].type->id();

Review comment:
       Looks it's straightforward to implement. Will add `decimal + float` operation in this PR.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645468186



##########
File path: cpp/src/arrow/compute/kernels/test_util.h
##########
@@ -113,6 +113,9 @@ void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
                        std::shared_ptr<Array> expected,
                        const FunctionOptions* options = nullptr);
 
+void CheckScalarGeneral(std::string func_name, const std::vector<Datum>& inputs,

Review comment:
       Done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 edited a comment on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 edited a comment on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569


   Thanks @bkietz , it's almost done except one last catch.
   
   As the output type (precision, scale) is dependent on the inputs, I have a resolver object to calculate output type. The resolver is called with the *casted* input type, not the original type. It causes problem to division, as the output precision and scale should be calculated from original inputs. No trouble for add/subtract as the output precision/scale is the same for original and casted inputs (digit aligned). Multiply doesn't need cast.
   
   Does it make sense to pass both original input types and the casted types to the resolver [1][2]? We will have to update all existing custom resolver codes.
   Maybe add a kernel flag to select original or casted input?
   Or there are better ways to handle this?
   
   [1] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L196
   [2] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/exec.cc#L495


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645468811



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,312 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,
+                                       const std::shared_ptr<DataType>& left_type,
+                                       const std::shared_ptr<DataType>& right_type) {
+    auto left_decimal_type = std::static_pointer_cast<DecimalType>(left_type);
+    auto right_decimal_type = std::static_pointer_cast<DecimalType>(right_type);
+
+    const int32_t p1 = left_decimal_type->precision(), s1 = left_decimal_type->scale();
+    const int32_t p2 = right_decimal_type->precision(), s2 = right_decimal_type->scale();
+
+    // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+    int32_t precision, scale;
+    if (op == "add" || op == "subtract") {
+      scale = std::max(s1, s2);
+      precision = std::max(p1 - s1, p2 - s2) + 1 + scale;
+    } else if (op == "multiply") {
+      scale = s1 + s2;
+      precision = p1 + p2 + 1;
+    } else if (op == "divide") {
+      scale = std::max(4, s1 + p2 - s2 + 1);
+      precision = p1 - s1 + s2 + scale;
+    } else {
+      ABORT_NOT_OK(Status::Invalid("invalid binary operator: ", op));
+    }
+
+    std::shared_ptr<DataType> type;
+    if (left_type->id() == Type::DECIMAL128) {
+      ASSIGN_OR_ABORT(type, Decimal128Type::Make(precision, scale));
+    } else {
+      ASSIGN_OR_ABORT(type, Decimal256Type::Make(precision, scale));
+    }
+    return type;
+  }
+
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+
+  Datum ToDatum(const std::shared_ptr<DataType>& type, const std::string& value) {
+    if (value.find("[") == std::string::npos) {
+      return Datum(MakeScalar(type, value));
+    } else {
+      return Datum(ArrayFromJSON(type, value));
+    }
+  }
+
+  void Assert(const std::string& op, const Arg& left, const Arg& right,
+              const std::string& expected) {
+    const Datum arg0 = ToDatum(left.type, left.value);
+    const Datum arg1 = ToDatum(right.type, right.value);
+
+    auto out_type = GetOutType(op, left.type, right.type);
+    CheckScalarGeneral(op, {arg0, arg1}, ToDatum(out_type, expected), &options_);
+
+    // commutative operations
+    if (op == "add" || op == "multiply") {
+      CheckScalarGeneral(op, {arg1, arg0}, ToDatum(out_type, expected), &options_);
+    }
+  }
+
+  void AssertFail(const std::string& op, const Arg& left, const Arg& right) {
+    const Datum arg0 = ToDatum(left.type, left.value);
+    const Datum arg1 = ToDatum(right.type, right.value);
+
+    ASSERT_NOT_OK(CallFunction(op, {arg0, arg1}, &options_));
+    if (op == "add" || op == "multiply") {
+      ASSERT_NOT_OK(CallFunction(op, {arg1, arg0}, &options_));
+    }
+  }
+
+  ArithmeticOptions options_ = ArithmeticOptions();
+};
+
+// reference result from bc (precsion=100, scale=40)
+TEST_F(TestBinaryArithmeticDecimal, AddSubtract) {
+  Arg left, right;
+  std::string added, subtracted;
+
+  // array array, decimal128
+  // clang-format off
+  left = {
+    decimal128(30, 3),
+    R"([
+      "1.000",
+      "-123456789012345678901234567.890",
+      "98765432109876543210.987",
+      "-999999999999999999999999999.999"
+    ])",
+  };
+  right = {
+    decimal128(20, 9),
+    R"([
+      "-1.000000000",
+      "12345678901.234567890",
+      "98765.432101234",
+      "-99999999999.999999999"
+    ])",
+  };
+  added = R"([
+    "0.000000000",
+    "-123456789012345666555555666.655432110",
+    "98765432109876641976.419101234",
+    "-1000000000000000099999999999.998999999"
+  ])";
+  subtracted = R"([
+    "2.000000000",
+    "-123456789012345691246913469.124567890",
+    "98765432109876444445.554898766",
+    "-999999999999999899999999999.999000001"
+  ])";
+  this->Assert("add", left, right, added);
+  this->Assert("subtract", left, right, subtracted);
+
+  // array array, decimal256
+  left = {
+    decimal256(30, 20),
+    R"([
+      "-1.00000000000000000001",
+      "1234567890.12345678900000000000",
+      "-9876543210.09876543210987654321",
+      "9999999999.99999999999999999999"
+    ])",
+  };
+  right = {
+    decimal256(30, 10),
+    R"([
+      "1.0000000000",
+      "-1234567890.1234567890",
+      "6789.5432101234",
+      "99999999999999999999.9999999999"
+    ])",
+  };
+  added = R"([
+    "-0.00000000000000000001",
+    "0.00000000000000000000",
+    "-9876536420.55555530870987654321",
+    "100000000009999999999.99999999989999999999"
+  ])";
+  subtracted = R"([
+    "-2.00000000000000000001",
+    "2469135780.24691357800000000000",
+    "-9876549999.64197555550987654321",
+    "-99999999989999999999.99999999990000000001"
+  ])";
+  this->Assert("add", left, right, added);
+  this->Assert("subtract", left, right, subtracted);
+  // clang-format on
+
+  // scalar array
+  left = {decimal128(6, 1), "12345.6"};
+  right = {decimal128(10, 3), R"(["1.234", "1234.000", "-9876.543", "666.888"])"};
+  added = R"(["12346.834", "13579.600", "2469.057", "13012.488"])";
+  subtracted = R"(["12344.366", "11111.600", "22222.143", "11678.712"])";
+  this->Assert("add", left, right, added);
+  this->Assert("subtract", left, right, subtracted);
+  // right - left
+  subtracted = R"(["-12344.366", "-11111.600", "-22222.143", "-11678.712"])";
+  this->Assert("subtract", right, left, subtracted);
+
+  // scalar scalar
+  left = {decimal256(3, 0), "666"};
+  right = {decimal256(3, 0), "888"};
+  this->Assert("add", left, right, "1554");
+  this->Assert("subtract", left, right, "-222");
+
+  // failed case: result *maybe* overflow
+  left = {decimal128(21, 20), "0.12345678901234567890"};
+  right = {decimal128(21, 1), "1.0"};
+  this->AssertFail("add", left, right);
+  this->AssertFail("subtract", left, right);
+
+  left = {decimal256(75, 0), "0"};
+  right = {decimal256(2, 1), "0.0"};
+  this->AssertFail("add", left, right);
+  this->AssertFail("subtract", left, right);
+}
+
+TEST_F(TestBinaryArithmeticDecimal, Multiply) {
+  Arg left, right;
+  std::string expected;
+
+  // array array
+  // clang-format off
+  left = {
+    decimal128(20, 10),
+    R"([
+      "1234567890.1234567890",
+      "-0.0000000001",
+      "-9999999999.9999999999"
+    ])",
+  };
+  right = {
+    decimal128(13, 3),
+    R"([
+      "1234567890.123",
+      "0.001",
+      "-9999999999.999"
+    ])",
+  };
+  expected = R"([
+    "1524157875323319737.9870903950470",
+    "-0.0000000000001",
+    "99999999999989999999.0000000000001"
+  ])";
+  this->Assert("multiply", left, right, expected);
+
+  left = {
+    decimal256(30, 3),
+    R"([
+      "123456789012345678901234567.890",
+      "0.000"
+    ])",
+  };
+  right = {
+    decimal256(20, 9),
+    R"([
+      "-12345678901.234567890",
+      "99999999999.999999999"
+    ])",
+  };
+  expected = R"([
+    "-1524157875323883675034293577501905199.875019052100",
+    "0.000000000000"
+  ])";
+  this->Assert("multiply", left, right, expected);
+  // clang-format on
+
+  // scalar array
+  left = {decimal128(3, 2), "3.14"};
+  right = {decimal128(1, 0), R"(["1", "2", "3", "4", "5"])"};
+  expected = R"(["3.14", "6.28", "9.42", "12.56", "15.70"])";
+  this->Assert("multiply", left, right, expected);
+
+  // scalar scalar
+  left = {decimal128(1, 0), "1"};
+  right = {decimal128(1, 0), "1"};
+  this->Assert("multiply", left, right, "1");
+
+  // failed case: result *maybe* overflow
+  left = {decimal128(20, 0), "1"};
+  right = {decimal128(18, 1), "1.0"};
+  this->AssertFail("multiply", left, right);
+}
+
+TEST_F(TestBinaryArithmeticDecimal, Divide) {
+  Arg left, right;
+  std::string expected;
+
+  // array array
+  // clang-format off
+  left = {decimal128(13, 3), R"(["1234567890.123", "0.001"])"};
+  right = {decimal128(3, 0), R"(["-987", "999"])"};
+  // scale = 7
+  expected = R"(["-1250828.6627386", "0.0000010"])";
+  this->Assert("divide", left, right, expected);
+
+  left = {decimal256(20, 10), R"(["1234567890.1234567890", "9999999999.9999999999"])"};
+  right = {decimal256(13, 3), R"(["1234567890.123", "0.001"])"};
+  // scale = 21
+  expected = R"(["1.000000000000369999093", "9999999999999.999999900000000000000"])";
+  this->Assert("divide", left, right, expected);
+  // clang-format on

Review comment:
       Done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r646620882



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));

Review comment:
       Breaking the definitions out for efficient algebra:
   ![definitions](https://user-images.githubusercontent.com/1299904/121026941-90898f00-c774-11eb-9261-fd1558e7f8b6.png)
   <details>
   <pre>
   l_s, l_p \equiv& \text{scale, precision of left argument, before implicit cast} \\
   r_s, r_p \equiv& \text{scale, precision of right argument, before implicit cast} \\
   o_s, o_p \equiv& \text{scale, precision of output} \\
   \lambda_s, \lambda_p
   \equiv&
   \text{scale, precision of implicitly cast left argument} \\
   &\text{(passed to output type resolver)} \\
   \rho_s, \rho_p
   \equiv& \text{scale, precision of implicitly cast right argument}
   </pre>
   </details>
   
   For division, the output precision and scale are:
   ![division output precision, scale, scaleup](https://user-images.githubusercontent.com/1299904/121027164-be6ed380-c774-11eb-9267-bf82dfcf8f9c.png)
   <details>
   <pre>
   o_s =& max(4, l_s + (r_p - r_s) + 1) \\
   o_p  =& (l_p - l_s) + r_s + o_s \\
   \lambda_s =& l_s + (o_p - l_p) \\
   \lambda_p =& l_p + (o_p - l_p) \\
   \rho_s, \rho_p =& r_s, r_p
   </pre>
   </details>
   
   Since the post-implicit-cast types are the only types available to the output type resolver, we need to compute the output scale and precision from those implicitly cast types:
   ![solve for output from implicitly cast](https://user-images.githubusercontent.com/1299904/121028949-35589c00-c776-11eb-8395-b18cf1f58a98.png)
   <details>
   <pre>
   o_s &= \lambda_s - \rho_s &\Leftarrow
   \begin{bmatrix}
   \lambda_s
   &=& l_s + o_p - l_p \\
   &=& l_s + ((l_p - l_s) + r_s + o_s) - l_p \\
   &=& r_s + o_s
   \end{bmatrix} 
   \\
   o_p &= \lambda_p &\Leftarrow
   \begin{bmatrix}
   \lambda_p
   &= l_p + o_p - l_p\\
   &= o_p
   \end{bmatrix}
   </pre>
   </details>
   
   So in prose: the output precision is identical to the precision of the implicitly cast left argument and the output scale is the implicitly cast left argument's scale minus the right argument's scale. The output resolver can be written as:
   
   ```c++
   template <typename Op>
   void AddDecimalBinaryKernels(const std::string& name,
                                std::shared_ptr<ArithmeticFunction>* func) {
     OutputType out_type;
     if (name.find("divide") == 0) {
       out_type = [](KernelContext*, const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
         // validation...
         auto left = checked_cast<const DecimalType*>(args[0].type.get());
         auto right = checked_cast<const DecimalType*>(args[1].type.get());
         auto precision = left.precision();
         auto scale = left.scale() - right.scale();
         auto out_type = DecimalType::Make(left->id(), precision, scale);
         return ValueDescr(std::move(out_type), GetBroadcastShape(args));
       };
     }
   ```
   
   Similar resolvers can be written for the other operations. Does that make sense?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz closed pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz closed pull request #10364:
URL: https://github.com/apache/arrow/pull/10364


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r641132241



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -516,6 +520,141 @@ std::shared_ptr<ScalarFunction> MakeUnarySignedArithmeticFunctionNotNull(
   return func;
 }
 
+// Decimal arithmetics
+struct BinaryDecimal : public KernelState {
+  const std::shared_ptr<DecimalType> left_type, right_type;
+  std::shared_ptr<DataType> out_type;
+
+  explicit BinaryDecimal(const KernelInitArgs& args)
+      : left_type(checked_pointer_cast<DecimalType>(args.inputs[0].type)),
+        right_type(checked_pointer_cast<DecimalType>(args.inputs[1].type)) {
+    DCHECK_EQ(left_type->id(), right_type->id());
+  }
+
+  // create instance of derived class T
+  template <typename T>
+  static Result<std::unique_ptr<KernelState>> Make(const KernelInitArgs& args) {
+    auto op = ::arrow::internal::make_unique<T>(args);
+    if (op->left_type->scale() < 0 || op->right_type->scale() < 0) {
+      return Status::Invalid("Decimals with negative scales not supported");
+    }
+    RETURN_NOT_OK(op->Init(op->left_type->precision(), op->left_type->scale(),
+                           op->right_type->precision(), op->right_type->scale()));
+    return std::move(op);
+  }
+
+  // return error and stop kernel execution if output precision is out of bound
+  Status Init(int32_t out_prec, int32_t out_scale) {
+    if (left_type->id() == Type::DECIMAL128) {
+      ARROW_ASSIGN_OR_RAISE(out_type, Decimal128Type::Make(out_prec, out_scale));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(out_type, Decimal256Type::Make(out_prec, out_scale));
+    }
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<DataType>> ResolveOutput(const std::vector<ValueDescr>&) const {
+    return out_type;
+  }
+};
+
+template <bool IsSubtract>
+struct AddOrSubtractDecimal : public BinaryDecimal {
+  using BinaryDecimal::BinaryDecimal;
+
+  int32_t left_scaleup, right_scaleup;
+
+  // called by kernel::init()
+  static Result<std::unique_ptr<KernelState>> Make(KernelContext*,
+                                                   const KernelInitArgs& args) {
+    return BinaryDecimal::Make<AddOrSubtractDecimal<IsSubtract>>(args);
+  }
+
+  // figure out output type and arg scaling, called by Make()
+  Status Init(int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+    const int32_t out_scale = std::max(s1, s2);
+    const int32_t out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+    return BinaryDecimal::Init(out_prec, out_scale);
+  }
+
+  // called by kerne::exec() for each value pair
+  // TODO(yibo): avoid repeat rescaling of scalar arg
+  template <typename T, typename Arg0, typename Arg1>
+  T Call(KernelContext*, Arg0 left, Arg1 right, Status*) const {
+    if (left_scaleup > 0) left = left.IncreaseScaleBy(left_scaleup);
+    if (right_scaleup > 0) right = right.IncreaseScaleBy(right_scaleup);
+    if (IsSubtract) right = -right;
+    return left + right;
+  }
+};
+
+using AddDecimal = AddOrSubtractDecimal</*IsSubtract=*/false>;
+using SubtractDecimal = AddOrSubtractDecimal</*IsSubtract=*/true>;
+
+struct MultiplyDecimal : public BinaryDecimal {
+  using BinaryDecimal::BinaryDecimal;
+
+  static Result<std::unique_ptr<KernelState>> Make(KernelContext*,
+                                                   const KernelInitArgs& args) {
+    return BinaryDecimal::Make<MultiplyDecimal>(args);
+  }
+
+  Status Init(int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+    return BinaryDecimal::Init(p1 + p2 + 1, s1 + s2);
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  T Call(KernelContext*, Arg0 left, Arg1 right, Status*) const {
+    return left * right;
+  }
+};
+
+struct DivideDecimal : public BinaryDecimal {
+  using BinaryDecimal::BinaryDecimal;
+
+  int32_t left_scaleup;
+
+  static Result<std::unique_ptr<KernelState>> Make(KernelContext*,
+                                                   const KernelInitArgs& args) {
+    return BinaryDecimal::Make<DivideDecimal>(args);
+  }
+
+  Status Init(int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+    // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html

Review comment:
       Please write out an explanation of the upscaling behavior either as a comment or in compute.rst

##########
File path: cpp/src/arrow/compute/kernels/test_util.h
##########
@@ -113,6 +113,9 @@ void CheckScalarBinary(std::string func_name, std::shared_ptr<Array> left_input,
                        std::shared_ptr<Array> expected,
                        const FunctionOptions* options = nullptr);
 
+void CheckScalarGeneral(std::string func_name, const std::vector<Datum>& inputs,

Review comment:
       Why not CheckScalar()?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1161,5 +1161,312 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  struct Arg {
+    std::shared_ptr<DataType> type;
+    std::string value;
+  };
+
+  std::shared_ptr<DataType> GetOutType(const std::string& op,
+                                       const std::shared_ptr<DataType>& left_type,
+                                       const std::shared_ptr<DataType>& right_type) {
+    auto left_decimal_type = std::static_pointer_cast<DecimalType>(left_type);
+    auto right_decimal_type = std::static_pointer_cast<DecimalType>(right_type);
+
+    const int32_t p1 = left_decimal_type->precision(), s1 = left_decimal_type->scale();
+    const int32_t p2 = right_decimal_type->precision(), s2 = right_decimal_type->scale();
+
+    // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+    int32_t precision, scale;
+    if (op == "add" || op == "subtract") {
+      scale = std::max(s1, s2);
+      precision = std::max(p1 - s1, p2 - s2) + 1 + scale;
+    } else if (op == "multiply") {
+      scale = s1 + s2;
+      precision = p1 + p2 + 1;
+    } else if (op == "divide") {
+      scale = std::max(4, s1 + p2 - s2 + 1);
+      precision = p1 - s1 + s2 + scale;
+    } else {
+      ABORT_NOT_OK(Status::Invalid("invalid binary operator: ", op));
+    }
+
+    std::shared_ptr<DataType> type;
+    if (left_type->id() == Type::DECIMAL128) {
+      ASSIGN_OR_ABORT(type, Decimal128Type::Make(precision, scale));
+    } else {
+      ASSIGN_OR_ABORT(type, Decimal256Type::Make(precision, scale));
+    }
+    return type;
+  }
+
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+
+  Datum ToDatum(const std::shared_ptr<DataType>& type, const std::string& value) {
+    if (value.find("[") == std::string::npos) {
+      return Datum(MakeScalar(type, value));
+    } else {
+      return Datum(ArrayFromJSON(type, value));
+    }
+  }
+
+  void Assert(const std::string& op, const Arg& left, const Arg& right,
+              const std::string& expected) {
+    const Datum arg0 = ToDatum(left.type, left.value);
+    const Datum arg1 = ToDatum(right.type, right.value);
+
+    auto out_type = GetOutType(op, left.type, right.type);
+    CheckScalarGeneral(op, {arg0, arg1}, ToDatum(out_type, expected), &options_);
+
+    // commutative operations
+    if (op == "add" || op == "multiply") {
+      CheckScalarGeneral(op, {arg1, arg0}, ToDatum(out_type, expected), &options_);
+    }
+  }
+
+  void AssertFail(const std::string& op, const Arg& left, const Arg& right) {
+    const Datum arg0 = ToDatum(left.type, left.value);
+    const Datum arg1 = ToDatum(right.type, right.value);
+
+    ASSERT_NOT_OK(CallFunction(op, {arg0, arg1}, &options_));
+    if (op == "add" || op == "multiply") {
+      ASSERT_NOT_OK(CallFunction(op, {arg1, arg0}, &options_));
+    }
+  }
+
+  ArithmeticOptions options_ = ArithmeticOptions();
+};
+
+// reference result from bc (precsion=100, scale=40)
+TEST_F(TestBinaryArithmeticDecimal, AddSubtract) {
+  Arg left, right;
+  std::string added, subtracted;
+
+  // array array, decimal128
+  // clang-format off
+  left = {
+    decimal128(30, 3),
+    R"([
+      "1.000",
+      "-123456789012345678901234567.890",
+      "98765432109876543210.987",
+      "-999999999999999999999999999.999"
+    ])",
+  };
+  right = {
+    decimal128(20, 9),
+    R"([
+      "-1.000000000",
+      "12345678901.234567890",
+      "98765.432101234",
+      "-99999999999.999999999"
+    ])",
+  };
+  added = R"([
+    "0.000000000",
+    "-123456789012345666555555666.655432110",
+    "98765432109876641976.419101234",
+    "-1000000000000000099999999999.998999999"
+  ])";
+  subtracted = R"([
+    "2.000000000",
+    "-123456789012345691246913469.124567890",
+    "98765432109876444445.554898766",
+    "-999999999999999899999999999.999000001"
+  ])";
+  this->Assert("add", left, right, added);
+  this->Assert("subtract", left, right, subtracted);
+
+  // array array, decimal256
+  left = {
+    decimal256(30, 20),
+    R"([
+      "-1.00000000000000000001",
+      "1234567890.12345678900000000000",
+      "-9876543210.09876543210987654321",
+      "9999999999.99999999999999999999"
+    ])",
+  };
+  right = {
+    decimal256(30, 10),
+    R"([
+      "1.0000000000",
+      "-1234567890.1234567890",
+      "6789.5432101234",
+      "99999999999999999999.9999999999"
+    ])",
+  };
+  added = R"([
+    "-0.00000000000000000001",
+    "0.00000000000000000000",
+    "-9876536420.55555530870987654321",
+    "100000000009999999999.99999999989999999999"
+  ])";
+  subtracted = R"([
+    "-2.00000000000000000001",
+    "2469135780.24691357800000000000",
+    "-9876549999.64197555550987654321",
+    "-99999999989999999999.99999999990000000001"
+  ])";
+  this->Assert("add", left, right, added);
+  this->Assert("subtract", left, right, subtracted);
+  // clang-format on
+
+  // scalar array
+  left = {decimal128(6, 1), "12345.6"};
+  right = {decimal128(10, 3), R"(["1.234", "1234.000", "-9876.543", "666.888"])"};
+  added = R"(["12346.834", "13579.600", "2469.057", "13012.488"])";
+  subtracted = R"(["12344.366", "11111.600", "22222.143", "11678.712"])";
+  this->Assert("add", left, right, added);
+  this->Assert("subtract", left, right, subtracted);
+  // right - left
+  subtracted = R"(["-12344.366", "-11111.600", "-22222.143", "-11678.712"])";
+  this->Assert("subtract", right, left, subtracted);
+
+  // scalar scalar
+  left = {decimal256(3, 0), "666"};
+  right = {decimal256(3, 0), "888"};
+  this->Assert("add", left, right, "1554");
+  this->Assert("subtract", left, right, "-222");
+
+  // failed case: result *maybe* overflow
+  left = {decimal128(21, 20), "0.12345678901234567890"};
+  right = {decimal128(21, 1), "1.0"};
+  this->AssertFail("add", left, right);
+  this->AssertFail("subtract", left, right);
+
+  left = {decimal256(75, 0), "0"};
+  right = {decimal256(2, 1), "0.0"};
+  this->AssertFail("add", left, right);
+  this->AssertFail("subtract", left, right);
+}
+
+TEST_F(TestBinaryArithmeticDecimal, Multiply) {
+  Arg left, right;
+  std::string expected;
+
+  // array array
+  // clang-format off
+  left = {
+    decimal128(20, 10),
+    R"([
+      "1234567890.1234567890",
+      "-0.0000000001",
+      "-9999999999.9999999999"
+    ])",
+  };
+  right = {
+    decimal128(13, 3),
+    R"([
+      "1234567890.123",
+      "0.001",
+      "-9999999999.999"
+    ])",
+  };
+  expected = R"([
+    "1524157875323319737.9870903950470",
+    "-0.0000000000001",
+    "99999999999989999999.0000000000001"
+  ])";
+  this->Assert("multiply", left, right, expected);
+
+  left = {
+    decimal256(30, 3),
+    R"([
+      "123456789012345678901234567.890",
+      "0.000"
+    ])",
+  };
+  right = {
+    decimal256(20, 9),
+    R"([
+      "-12345678901.234567890",
+      "99999999999.999999999"
+    ])",
+  };
+  expected = R"([
+    "-1524157875323883675034293577501905199.875019052100",
+    "0.000000000000"
+  ])";
+  this->Assert("multiply", left, right, expected);
+  // clang-format on
+
+  // scalar array
+  left = {decimal128(3, 2), "3.14"};
+  right = {decimal128(1, 0), R"(["1", "2", "3", "4", "5"])"};
+  expected = R"(["3.14", "6.28", "9.42", "12.56", "15.70"])";
+  this->Assert("multiply", left, right, expected);
+
+  // scalar scalar
+  left = {decimal128(1, 0), "1"};
+  right = {decimal128(1, 0), "1"};
+  this->Assert("multiply", left, right, "1");
+
+  // failed case: result *maybe* overflow
+  left = {decimal128(20, 0), "1"};
+  right = {decimal128(18, 1), "1.0"};
+  this->AssertFail("multiply", left, right);
+}
+
+TEST_F(TestBinaryArithmeticDecimal, Divide) {
+  Arg left, right;
+  std::string expected;
+
+  // array array
+  // clang-format off
+  left = {decimal128(13, 3), R"(["1234567890.123", "0.001"])"};
+  right = {decimal128(3, 0), R"(["-987", "999"])"};
+  // scale = 7
+  expected = R"(["-1250828.6627386", "0.0000010"])";
+  this->Assert("divide", left, right, expected);
+
+  left = {decimal256(20, 10), R"(["1234567890.1234567890", "9999999999.9999999999"])"};
+  right = {decimal256(13, 3), R"(["1234567890.123", "0.001"])"};
+  // scale = 21
+  expected = R"(["1.000000000000369999093", "9999999999999.999999900000000000000"])";
+  this->Assert("divide", left, right, expected);
+  // clang-format on

Review comment:
       Please don't disable clang format so frequently




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 edited a comment on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 edited a comment on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569


   Thanks @bkietz , it's almost done except one last catch.
   
   As the output type (precision, scale) is dependent on the inputs, I have a resolver object to calculate output type. The resolver is called with the *casted* input type, not the original type. It causes problem to division, as the output precision and scale should be calculated from original inputs. No trouble for add/subtract as the output precision/scale is the same for original and casted inputs (digit aligned). Multiply doesn't need cast.
   
   Does it make sense to pass both original input types and the casted types to the resolver [1][2]? We will have to update all existing custom resolver codes.
   Maybe add a kernel flag to select original or casted input types?
   Or there are better ways to handle this?
   
   [1] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L196
   [2] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/exec.cc#L495


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r649056135



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,120 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+Status CastBinaryDecimalArgs(const std::string& func_name,
+                             std::vector<ValueDescr>* values) {
+  auto& left_type = (*values)[0].type;
+  auto& right_type = (*values)[1].type;
+  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+
+  // decimal + float = float
+  if (is_floating(left_type->id())) {
+    right_type = left_type;
+    return Status::OK();
+  } else if (is_floating(right_type->id())) {
+    left_type = right_type;
+    return Status::OK();
+  }
+
+  // precision, scale of left and right args
+  int32_t p1, s1, p2, s2;
+
+  // decimal + integer = decimal

Review comment:
       integer to decimal cast is not available, this feature is not tested yet.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r646669503



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));

Review comment:
       Ah, it's cool. A bit stuck at using the same function for both casts.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r646711505



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();

Review comment:
       If you want to defer mixed width to a follow up that's acceptable, but DispatchDecimal should fail gracefully for that case




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645625763



##########
File path: docs/source/cpp/compute.rst
##########
@@ -286,11 +286,29 @@ an ``Invalid`` :class:`Status` when overflow is detected.
 +--------------------------+------------+--------------------+---------------------+
 | power_checked            | Binary     | Numeric            | Numeric             |
 +--------------------------+------------+--------------------+---------------------+
-| subtract                 | Binary     | Numeric            | Numeric             |
+| subtract                 | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
-| subtract_checked         | Binary     | Numeric            | Numeric             |
+| subtract_checked         | Binary     | Numeric            | Numeric (1)         |
 +--------------------------+------------+--------------------+---------------------+
 
+* \(1) Precision and scale of computed DECIMAL results
+
++------------+---------------------------------------------+
+| Operation  | Result precision and scale                  |
++============+=============================================+
+| | add      | | scale = max(s1, s2)                       |
+| | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
++------------+---------------------------------------------+
+| multiply   | | scale = s1 + s2                           |
+|            | | precision = p1 + p2 + 1                   |
++------------+---------------------------------------------+
+| divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+|            | | precision = p1 - s1 + s2 + scale          |

Review comment:
       The extra `|` are added delibrately to show multiple lines in the table.
   Otherwise, the two lines in a table cell will be shown in one line in the browser.
   I checked the rst output in an [online rst editor](http://rst.ninjs.org/#Ky0tLS0tLS0tLS0tLSstLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0rCnwgT3BlcmF0aW9uICB8IFJlc3VsdCBwcmVjaXNpb24gYW5kIHNjYWxlICAgICAgICAgICAgICAgICAgfAorPT09PT09PT09PT09Kz09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PSsKfCB8IGFkZCAgICAgIHwgfCBzY2FsZSA9IG1heChzMSwgczIpICAgICAgICAgICAgICAgICAgICAgICB8CnwgfCBzdWJ0cmFjdCB8IHwgcHJlY2lzaW9uID0gbWF4KHAxLXMxLCBwMi1zMikgKyAxICsgc2NhbGUgfAorLS0tLS0tLS0tLS0tKy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSsKCgorLS0tLS0tLS0tLS0tKy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSsKfCBPcGVyYXRpb24gIHwgUmVzdWx0IHByZWNpc2lvbiBhbmQgc2NhbGUgICAgICAgICAgICAgICAgICB8Cis9PT09PT09PT09PT0rPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09Kwp8IGFkZCAgICAgICAgfCBzY2FsZSA9IG1heChzMSwgczIpICAgICAgICAgICAgICAgICAgICAgICAgIHwKfCBzdWJ0cmFjdCAgIHwgcHJlY2lzaW9uID0gbWF4KHAxLXMxLCBwMi1zMikgKyAxICsgc2NhbGUgICB8CistLS0tLS0tLS0tLS0rLS0tLS
 0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tKw==).




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645630243



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -537,6 +569,20 @@ struct OutputAdapter<Type, enable_if_base_binary<Type>> {
   }
 };
 
+template <typename Type>
+struct OutputAdapter<Type, enable_if_decimal<Type>> {
+  using T = typename TypeTraits<Type>::ScalarType::ValueType;
+  template <typename Generator>
+  static Status Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    T* out_data = out_arr->GetMutableValues<T>(1);

Review comment:
       I used `endian_agnostic` trick in `OutputArrayWriter`.
   Looks this function is safe (s390 ci did pass). Will double check.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569


   Thanks @bkietz , it's almost done except one last catch.
   
   As the output type (precision, scale) is dependent on the inputs, I have a resolver object to calculate output type. The resolver is called with the *casted* input type, not the original type. It causes problem to division, as the output precision and scale should be calculated from original inputs. No trouble for add/subtract as the output precision/scale is the same for original and casted inputs (digit aligned). Multiply doesn't need cast.
   
   Does it make sense to pass both original input types and the casted types to the resolver [1][2]? We will have to update all existing custom resolver codes.
   Or there are better ways to handle this?
   
   [1] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L196
   [2] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/exec.cc#L495


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 edited a comment on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 edited a comment on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-852709410


   @bkietz , met with one problem, would like to hear your comments. Thanks.
   
   Decimal upscaling is operation dependent. E.g., `+,-` will upscale arg with smaller scale to align digit, `*` needn't scaling, `/` is more complicated.
   
   Implicit args casting happens before kernel is created. `DispatchBest` only knows arg types, no operation type (kernel dependent) is available. So we cannot figure out the "to be casted" arg type (new precision, scale).
   https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L175
   
   Maybe add another callback `kernel->explicit_cast()` and call it after or inside `DispatchBest`? Or  create different `ScalarFunction` struct (and DispatchBest) for each decimal operation?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 edited a comment on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 edited a comment on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-854364569


   Thanks @bkietz , it's almost done except one last catch.
   
   As the output type (precision, scale) is dependent on the inputs, I have a resolver object to calculate output type. The resolver is called with the *casted* input type, not the original type. It causes problem to division, as the output precision and scale should be calculated from original inputs. No trouble for add/subtract as the output precision/scale is the same for original and casted inputs (digit aligned). Multiply doesn't need cast.
   
   Does it make sense to pass both original input types and the casted types to the resolver [1][2]? We will have to update all existing custom resolver codes.
   Maybe add a kernel flag to select passing original or casted input types to the resolver?
   Or there are better ways to handle this?
   
   [1] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/function.cc#L196
   [2] https://github.com/apache/arrow/blob/master/cpp/src/arrow/compute/exec.cc#L495
   
   **EDIT:** Pushed latest code to ease reviewing. Unit test fails due to this issue.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r654172014



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {

Review comment:
       Yes

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -461,6 +706,8 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
     auto exec = ArithmeticExecFromOp<ScalarBinaryEqualTypes, Op>(ty);
     DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
   }
+  AddDecimalBinaryKernels<Op>(name, &func);

Review comment:
       I moved `AddDecimalBinaryKernels` calls out of `MakeArithmeticFunction` and `MakeArithmeticFunctionNotNull`, and put it under each kernel supporting decimal operations.
   https://github.com/apache/arrow/pull/10364/files#diff-3eafd7246f6a8c699f10d46e3276852fe44b6853b5517ef10396e561730c09f4R840
   
   I'm afraid some arithmetic kernels (e.g., `power`) may not support decimals?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {

Review comment:
       Done

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+};
+

Review comment:
       Done

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +655,49 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Status CheckDecimals(std::vector<ValueDescr>* values) const {
+    bool has_decimal = false;
+    for (const auto& value : *values) {
+      if (is_decimal(value.type->id())) {
+        has_decimal = true;
+        break;
+      }
+    }
+    if (!has_decimal) return Status::OK();
+
+    if (values->size() == 2) {
+      return CastBinaryDecimalArgs(name(), values);
+    }
+    return Status::OK();
+  }
+};
+
+// resolve decimal operation output type
+struct BinaryDecimalOutputResolver {
+  const std::string func_name;
+
+  explicit BinaryDecimalOutputResolver(std::string func_name)
+      : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto type, ResolveBinaryDecimalOutput(func_name, args));
+    return ValueDescr(std::move(type), GetBroadcastShape(args));
+  }
 };
 
+template <typename Op>
+void AddDecimalBinaryKernels(const std::string& name,
+                             std::shared_ptr<ArithmeticFunction>* func) {
+  auto out_type = OutputType(BinaryDecimalOutputResolver(name));

Review comment:
       Done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645641543



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,69 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+// calculate output precision/scale and args rescaling per operation type
+Result<std::shared_ptr<DataType>> GetDecimalBinaryOutput(
+    const std::string& op, const std::vector<ValueDescr>& values,
+    std::vector<std::shared_ptr<DataType>>* replaced = nullptr) {
+  const auto& left_type = checked_pointer_cast<DecimalType>(values[0].type);
+  const auto& right_type = checked_pointer_cast<DecimalType>(values[1].type);
+
+  const int32_t p1 = left_type->precision(), s1 = left_type->scale();
+  const int32_t p2 = right_type->precision(), s2 = right_type->scale();
+  if (s1 < 0 || s2 < 0) {
+    return Status::NotImplemented("Decimals with negative scales not supported");
+  }
+
+  int32_t out_prec, out_scale;
+  int32_t left_scaleup = 0, right_scaleup = 0;
+
+  // decimal upscaling behaviour references amazon redshift
+  // https://docs.aws.amazon.com/redshift/latest/dg/r_numeric_computations201.html
+  if (op.find("add") == 0 || op.find("subtract") == 0) {
+    out_scale = std::max(s1, s2);
+    out_prec = std::max(p1 - s1, p2 - s2) + 1 + out_scale;
+    left_scaleup = out_scale - s1;
+    right_scaleup = out_scale - s2;
+  } else if (op.find("multiply") == 0) {
+    out_scale = s1 + s2;
+    out_prec = p1 + p2 + 1;
+  } else if (op.find("divide") == 0) {
+    out_scale = std::max(4, s1 + p2 - s2 + 1);
+    out_prec = p1 - s1 + s2 + out_scale;  // >= p1 + p2 + 1
+    left_scaleup = out_prec - p1;
+  } else {
+    return Status::Invalid("Invalid decimal operation: ", op);
+  }
+
+  const auto id = left_type->id();
+  auto make = [id](int32_t precision, int32_t scale) {
+    if (id == Type::DECIMAL128) {
+      return Decimal128Type::Make(precision, scale);
+    } else {
+      return Decimal256Type::Make(precision, scale);
+    }
+  };
+
+  if (replaced) {
+    replaced->resize(2);
+    ARROW_ASSIGN_OR_RAISE((*replaced)[0], make(p1 + left_scaleup, s1 + left_scaleup));
+    ARROW_ASSIGN_OR_RAISE((*replaced)[1], make(p2 + right_scaleup, s2 + right_scaleup));
+  }
+
+  return make(out_prec, out_scale);
+}
+
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
   Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
     RETURN_NOT_OK(CheckArity(*values));
 
+    const auto type_id = (*values)[0].type->id();

Review comment:
       Do you mean operations like `DecimalType + DoubleType`? I didn't implement it now. Only decimal + decimal. Maybe in a followup task?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r650350724



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -428,12 +524,120 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
   }
 }
 
+Status CastBinaryDecimalArgs(const std::string& func_name,
+                             std::vector<ValueDescr>* values) {
+  auto& left_type = (*values)[0].type;
+  auto& right_type = (*values)[1].type;
+  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+
+  // decimal + float = float
+  if (is_floating(left_type->id())) {
+    right_type = left_type;
+    return Status::OK();
+  } else if (is_floating(right_type->id())) {
+    left_type = right_type;
+    return Status::OK();
+  }
+
+  // precision, scale of left and right args
+  int32_t p1, s1, p2, s2;
+
+  // decimal + integer = decimal

Review comment:
       Sure. Created JIRA https://issues.apache.org/jira/browse/ARROW-13067.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r645635893



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -451,8 +604,45 @@ struct ArithmeticFunction : ScalarFunction {
     if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
     return arrow::compute::detail::NoMatchingKernel(this, *values);
   }
+
+  Result<const Kernel*> DispatchDecimal(std::vector<ValueDescr>* values) const {
+    if (values->size() == 2) {
+      std::vector<std::shared_ptr<DataType>> replaced;
+      RETURN_NOT_OK(GetDecimalBinaryOutput(name(), *values, &replaced));
+      (*values)[0].type = std::move(replaced[0]);
+      (*values)[1].type = std::move(replaced[1]);
+    }
+
+    using arrow::compute::detail::DispatchExactImpl;
+    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *values);
+  }
+};
+
+// resolve decimal operation output type
+struct DecimalBinaryOutputResolver {
+  std::string func_name;
+
+  DecimalBinaryOutputResolver(std::string func_name) : func_name(std::move(func_name)) {}
+
+  Result<ValueDescr> operator()(KernelContext*, const std::vector<ValueDescr>& args) {
+    ARROW_ASSIGN_OR_RAISE(auto out_type, GetDecimalBinaryOutput(func_name, args));

Review comment:
       This is indeed the problem I'm trying to fix and would like to ask for comment.
   See my last comment at https://github.com/apache/arrow/pull/10364#issuecomment-854364569




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#issuecomment-851806478


   > Please extract the decimal upscaling from the addition kernel into an implicit cast. This will simplify the addition kernel to stateless addition (IIUC) and give callers control over how to handle upscaling. 
   
   Thanks @bkietz , casting args (rescaling decimal inputs) implicitly before exec looks much better than my current implementation. Will try the approach.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #10364: ARROW-12074: [C++][Compute] Add scalar arithmetic kernels for decimal

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #10364:
URL: https://github.com/apache/arrow/pull/10364#discussion_r649057551



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1148,5 +1148,326 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+class TestBinaryArithmeticDecimal : public TestBase {
+ protected:
+  std::shared_ptr<Scalar> MakeScalar(const std::shared_ptr<DataType>& type,
+                                     const std::string& str) {
+    std::shared_ptr<Scalar> scalar;
+    if (type->id() == Type::DECIMAL128) {
+      Decimal128 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal128::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    } else {
+      Decimal256 value;
+      int32_t dummy;
+      ABORT_NOT_OK(Decimal256::FromString(str, &value, &dummy));
+      ASSIGN_OR_ABORT(scalar, arrow::MakeScalar(type, value));
+    }
+    return scalar;
+  }
+};
+
+// reference result from bc (precsion=100, scale=40)
+TEST_F(TestBinaryArithmeticDecimal, AddSubtract) {
+  // array array, decimal128
+  {
+    auto left = ArrayFromJSON(decimal128(30, 3),
+                              R"([
+        "1.000",
+        "-123456789012345678901234567.890",
+        "98765432109876543210.987",
+        "-999999999999999999999999999.999"
+      ])");
+    auto right = ArrayFromJSON(decimal128(20, 9),
+                               R"([
+        "-1.000000000",
+        "12345678901.234567890",
+        "98765.432101234",
+        "-99999999999.999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal128(37, 9),
+                               R"([
+      "0.000000000",
+      "-123456789012345666555555666.655432110",
+      "98765432109876641976.419101234",
+      "-1000000000000000099999999999.998999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal128(37, 9),
+                                    R"([
+      "2.000000000",
+      "-123456789012345691246913469.124567890",
+      "98765432109876444445.554898766",
+      "-999999999999999899999999999.999000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // array array, decimal256
+  {
+    auto left = ArrayFromJSON(decimal256(30, 20),
+                              R"([
+        "-1.00000000000000000001",
+        "1234567890.12345678900000000000",
+        "-9876543210.09876543210987654321",
+        "9999999999.99999999999999999999"
+      ])");
+    auto right = ArrayFromJSON(decimal256(30, 10),
+                               R"([
+        "1.0000000000",
+        "-1234567890.1234567890",
+        "6789.5432101234",
+        "99999999999999999999.9999999999"
+      ])");
+    auto added = ArrayFromJSON(decimal256(41, 20),
+                               R"([
+      "-0.00000000000000000001",
+      "0.00000000000000000000",
+      "-9876536420.55555530870987654321",
+      "100000000009999999999.99999999989999999999"
+    ])");
+    auto subtracted = ArrayFromJSON(decimal256(41, 20),
+                                    R"([
+      "-2.00000000000000000001",
+      "2469135780.24691357800000000000",
+      "-9876549999.64197555550987654321",
+      "-99999999989999999999.99999999990000000001"
+    ])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // scalar array
+  {
+    auto left = this->MakeScalar(decimal128(6, 1), "12345.6");
+    auto right = ArrayFromJSON(decimal128(10, 3),
+                               R"(["1.234", "1234.000", "-9876.543", "666.888"])");
+    auto added = ArrayFromJSON(decimal128(11, 3),
+                               R"(["12346.834", "13579.600", "2469.057", "13012.488"])");
+    auto left_sub_right = ArrayFromJSON(
+        decimal128(11, 3), R"(["12344.366", "11111.600", "22222.143", "11678.712"])");
+    auto right_sub_left = ArrayFromJSON(
+        decimal128(11, 3), R"(["-12344.366", "-11111.600", "-22222.143", "-11678.712"])");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+    CheckScalarBinary("subtract", left, right, left_sub_right);
+    CheckScalarBinary("subtract", right, left, right_sub_left);
+  }
+
+  // scalar scalar
+  {
+    auto left = this->MakeScalar(decimal256(3, 0), "666");
+    auto right = this->MakeScalar(decimal256(3, 0), "888");
+    auto added = this->MakeScalar(decimal256(4, 0), "1554");
+    auto subtracted = this->MakeScalar(decimal256(4, 0), "-222");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("subtract", left, right, subtracted);
+  }
+
+  // decimal128 decimal256
+  {
+    auto left = this->MakeScalar(decimal128(3, 0), "666");
+    auto right = this->MakeScalar(decimal256(3, 0), "888");
+    auto added = this->MakeScalar(decimal256(4, 0), "1554");
+    CheckScalarBinary("add", left, right, added);
+    CheckScalarBinary("add", right, left, added);
+  }
+
+  // decimal float

Review comment:
       test decimal + float, float + decimal




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org