You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/04/14 16:38:37 UTC

[GitHub] [arrow] bkietz commented on a change in pull request #10016: ARROW-11950: [C++][Compute] Add unary negative kernel

bkietz commented on a change in pull request #10016:
URL: https://github.com/apache/arrow/pull/10016#discussion_r613356580



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -739,6 +739,10 @@ struct ScalarUnaryNotNull {
   }
 };
 
+// A kernel exec generator for unary kernels
+template <typename OutType, typename ArgType, typename Op>
+using ScalarUnaryType = ScalarUnary<OutType, ArgType, Op>;

Review comment:
       This alias doesn't add anything, please revert it.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+
+  // NOTE [EPM]: How to handle unsigned integers?
+  //  * Promote to signed?
+  //  * State that unsigned numbers are not supported (i.e., undefined behavior)?
+  //  * Use C++ integral conversions (e.g., Negate(-128) = -128)?
+  //    * https://timsong-cpp.github.io/cppwp/n4659/conv.integral
+  template <typename T, typename Arg0>
+  static constexpr enable_if_integer<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+};
+
+struct NegateChecked {
+  template <typename T, typename Arg0>
+  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 arg) {

Review comment:
       ```suggestion
     static enable_if_signed_integer<T> Call(KernelContext* ctx, Arg0 arg) {
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -709,5 +709,76 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
                                      ArrayFromJSON(uint64(), "[18446744073709551615]")}));
 }
 
+TEST(TestUnaryArithmeticSigned, Negate) {
+  for (const auto& ty : internal::SignedIntTypes()) {
+    // No input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[]"), ArrayFromJSON(ty, "[]"));
+    // Null input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[null]"), ArrayFromJSON(ty, "[null]"));
+    // Zeros as inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[0, 0, -0]"), ArrayFromJSON(ty, "[0, -0, 0]"));

Review comment:
       -0 is not distinct from 0 for integral types
   ```suggestion
       CheckScalarUnary("negate", ArrayFromJSON(ty, "[0, 0, 0]"), ArrayFromJSON(ty, "[0, 0, 0]"));
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+
+  // NOTE [EPM]: How to handle unsigned integers?
+  //  * Promote to signed?
+  //  * State that unsigned numbers are not supported (i.e., undefined behavior)?
+  //  * Use C++ integral conversions (e.g., Negate(-128) = -128)?
+  //    * https://timsong-cpp.github.io/cppwp/n4659/conv.integral
+  template <typename T, typename Arg0>
+  static constexpr enable_if_integer<T> Call(KernelContext*, Arg0 arg) {

Review comment:
       ```suggestion
     static constexpr enable_if_signed_integer<T> Call(KernelContext*, Arg0 arg) {
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -309,6 +348,21 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarArithmeticFunction(std::string name,
+                                                             const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+  // 8-bit signed integer
+  // ArrayKernelExec exec = ScalarUnary<Int8Type, Int8Type, Op>::Exec;
+  // DCHECK_OK(func->AddKernel({int8()}, int8(), exec));
+
+  for (const auto& ty : NumericTypes()) {
+    auto exec = NumericEqualTypesBinary<ScalarUnaryType, Op>(ty);

Review comment:
       Also, note that although `ScalarUnary` is acceptable for "negate", "negate_checked" will need `ScalarUnaryNotNull` to ensure that we don't generate spurious errors when looking at values "under" null bits

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -309,6 +348,21 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarArithmeticFunction(std::string name,
+                                                             const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+  // 8-bit signed integer
+  // ArrayKernelExec exec = ScalarUnary<Int8Type, Int8Type, Op>::Exec;
+  // DCHECK_OK(func->AddKernel({int8()}, int8(), exec));
+
+  for (const auto& ty : NumericTypes()) {
+    auto exec = NumericEqualTypesBinary<ScalarUnaryType, Op>(ty);

Review comment:
       It's fortunate that NumericEqualTypesBinary can be reused here, but we should rename it to avoid confusion. Maybe `ArithmeticExecFromOp`?
   ```suggestion
       auto exec = ArithmeticExecFromOp<ScalarUnary, Op>(ty);
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+
+  // NOTE [EPM]: How to handle unsigned integers?
+  //  * Promote to signed?

Review comment:
       I think promotion to signed is the correct way to handle this. Only kernels for signed integer types will be included, and when negating an unsigned integer an implicit cast to the next largest signed integer must be performed first.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -709,5 +709,76 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
                                      ArrayFromJSON(uint64(), "[18446744073709551615]")}));
 }
 
+TEST(TestUnaryArithmeticSigned, Negate) {
+  for (const auto& ty : internal::SignedIntTypes()) {
+    // No input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[]"), ArrayFromJSON(ty, "[]"));
+    // Null input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[null]"), ArrayFromJSON(ty, "[null]"));
+    // Zeros as inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[0, 0, -0]"), ArrayFromJSON(ty, "[0, -0, 0]"));
+    // Positive inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[1, 10, 100]"), ArrayFromJSON(ty, "[-1, -10, -100]"));
+    // Negative inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[-1, -10, -100]"), ArrayFromJSON(ty, "[1, 10, 100]"));
+  }
+}
+
+TEST(TestUnaryArithmeticSignedMinMax, Negate) {
+  // NOTE [EPM]: Can these tests be done by iterating types?
+
+  // Min input
+  // Out-of-bounds after operation (C++ 2's complement wrap around, architecture dependent)
+  auto int8_min = std::numeric_limits<int8_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int8_min), MakeScalar(int8_min));
+  auto int16_min = std::numeric_limits<int16_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int16_min), MakeScalar(int16_min));
+  auto int32_min = std::numeric_limits<int32_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int32_min), MakeScalar(int32_min));
+  auto int64_min = std::numeric_limits<int64_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int64_min), MakeScalar(int64_min));
+
+  // Max input
+  // NOTE [EPM]: Why do these fail? The expected result is promoted to int32.
+  // auto int8_max = std::numeric_limits<int8_t>::max();
+  // CheckScalarUnary("negate", MakeScalar(int8_max), MakeScalar(-int8_max));
+  // auto int16_max = std::numeric_limits<int16_t>::max();
+  // CheckScalarUnary("negate", MakeScalar(int16_max), MakeScalar(-int16_max));

Review comment:
       MakeScalar decides the DataType of the scalar based on its argument type, and `decltype(-int8_max)` is 32 bit signed integer. Adding an explicit cast to 8 bit should fix it 
   ```suggestion
     auto int8_max = std::numeric_limits<int8_t>::max();
     CheckScalarUnary("negate", MakeScalar(int8_max), MakeScalar(static_cast<int8_t>(-int8_max)));
     auto int16_max = std::numeric_limits<int16_t>::max();
     CheckScalarUnary("negate", MakeScalar(int16_max), MakeScalar(static_cast<int16_t>(-int16_max)));
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.

Review comment:
       I would say that it's not the negate kernel's responsibility to coerce -0 to 0.
   For follow up work: it might be useful to have another kernel which normalizes floating point values by replacing NaNs with nulls, ensuring only positive 0s, etc
   ```suggestion
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -309,6 +348,21 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarArithmeticFunction(std::string name,
+                                                             const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+  // 8-bit signed integer
+  // ArrayKernelExec exec = ScalarUnary<Int8Type, Int8Type, Op>::Exec;
+  // DCHECK_OK(func->AddKernel({int8()}, int8(), exec));

Review comment:
       ```suggestion
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -309,6 +348,21 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarArithmeticFunction(std::string name,

Review comment:
       ```suggestion
   std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunction(std::string name,
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -309,6 +348,21 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
   return func;
 }
 

Review comment:
       Insertion of implicit casts is accomplished by overriding Function::DispatchBest. For example, to ensure that unsigned types are supported by casting to a compatible unsigned type, use:
   
   ```suggestion
   struct UnaryArithmeticFunction : ScalarFunction {
     using ScalarFunction::ScalarFunction;
     Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
       RETURN_NOT_OK(CheckArity(*values));
       
       using arrow::compute::detail::DispatchExactImpl;
       
       if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
       
       EnsureDictionaryDecoded(values);
       if (auto type = CommonNumeric({values->at(0), int8()})) {
         ReplaceTypes(type, values);
       }
    
       if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
       return arrow::compute::detail::NoMatchingKernel(this, *values);
     }
   };
   ```
   
   (UnaryScalarFunction will replace ScalarFunction below in `auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);`)

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+
+  // NOTE [EPM]: How to handle unsigned integers?
+  //  * Promote to signed?
+  //  * State that unsigned numbers are not supported (i.e., undefined behavior)?
+  //  * Use C++ integral conversions (e.g., Negate(-128) = -128)?
+  //    * https://timsong-cpp.github.io/cppwp/n4659/conv.integral
+  template <typename T, typename Arg0>
+  static constexpr enable_if_integer<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+};
+
+struct NegateChecked {
+  template <typename T, typename Arg0>
+  static enable_if_integer<T> Call(KernelContext* ctx, Arg0 arg) {
+    static_assert(std::is_same<T, Arg0>::value, "");
+    T result = 0;
+    // NOTE [EPM]: Check this edge case of overflow. What are we trying to check here?
+    if (ARROW_PREDICT_FALSE(SubtractWithOverflow(0, arg, &result))) {
+      ctx->SetStatus(Status::Invalid("overflow"));
+    }
+    return result;

Review comment:
       ```suggestion
       if (arg == std::numeric_limits<T>::min()) {
         // two's complement can represent a negative number which has no corresponding positive,
         // for example int8_t(-128) cannot be negated since 128 is not respresentable in int8_t
         ctx->SetStatus(Status::Invalid("overflow"));
         return 0;
       }
       return -arg;
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+
+  // NOTE [EPM]: How to handle unsigned integers?
+  //  * Promote to signed?
+  //  * State that unsigned numbers are not supported (i.e., undefined behavior)?
+  //  * Use C++ integral conversions (e.g., Negate(-128) = -128)?
+  //    * https://timsong-cpp.github.io/cppwp/n4659/conv.integral
+  template <typename T, typename Arg0>
+  static constexpr enable_if_integer<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;

Review comment:
       To avoid undefined behavior warnings in the overflow case, use bit ops instead of `-`
   ```suggestion
       return static_cast<T>(to_unsigned(~arg) + 1);
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -233,6 +235,43 @@ struct DivideChecked {
   }
 };
 
+struct Negate {
+  template <typename T, typename Arg0>
+  // NOTE [EPM]: Discuss on 0 vs. -0.
+  static constexpr enable_if_floating_point<T> Call(KernelContext*, Arg0 arg) {
+    return -arg;
+  }
+
+  // NOTE [EPM]: How to handle unsigned integers?
+  //  * Promote to signed?
+  //  * State that unsigned numbers are not supported (i.e., undefined behavior)?
+  //  * Use C++ integral conversions (e.g., Negate(-128) = -128)?

Review comment:
       To be clear, this is overflow behavior which is a separate concern from handling unsigned integers.
   
   To match the behavior of the other arithmetic kernels, negation of 8 bit -128 should wrap around to -128 as you describe.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -709,5 +709,76 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) {
                                      ArrayFromJSON(uint64(), "[18446744073709551615]")}));
 }
 
+TEST(TestUnaryArithmeticSigned, Negate) {
+  for (const auto& ty : internal::SignedIntTypes()) {
+    // No input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[]"), ArrayFromJSON(ty, "[]"));
+    // Null input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[null]"), ArrayFromJSON(ty, "[null]"));
+    // Zeros as inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[0, 0, -0]"), ArrayFromJSON(ty, "[0, -0, 0]"));
+    // Positive inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[1, 10, 100]"), ArrayFromJSON(ty, "[-1, -10, -100]"));
+    // Negative inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[-1, -10, -100]"), ArrayFromJSON(ty, "[1, 10, 100]"));
+  }
+}
+
+TEST(TestUnaryArithmeticSignedMinMax, Negate) {
+  // NOTE [EPM]: Can these tests be done by iterating types?
+
+  // Min input
+  // Out-of-bounds after operation (C++ 2's complement wrap around, architecture dependent)
+  auto int8_min = std::numeric_limits<int8_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int8_min), MakeScalar(int8_min));
+  auto int16_min = std::numeric_limits<int16_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int16_min), MakeScalar(int16_min));
+  auto int32_min = std::numeric_limits<int32_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int32_min), MakeScalar(int32_min));
+  auto int64_min = std::numeric_limits<int64_t>::min();
+  CheckScalarUnary("negate", MakeScalar(int64_min), MakeScalar(int64_min));
+
+  // Max input
+  // NOTE [EPM]: Why do these fail? The expected result is promoted to int32.
+  // auto int8_max = std::numeric_limits<int8_t>::max();
+  // CheckScalarUnary("negate", MakeScalar(int8_max), MakeScalar(-int8_max));
+  // auto int16_max = std::numeric_limits<int16_t>::max();
+  // CheckScalarUnary("negate", MakeScalar(int16_max), MakeScalar(-int16_max));
+  auto int32_max = std::numeric_limits<int32_t>::max();
+  CheckScalarUnary("negate", MakeScalar(int32_max), MakeScalar(-int32_max));
+  auto int64_max = std::numeric_limits<int64_t>::max();
+  CheckScalarUnary("negate", MakeScalar(int64_max), MakeScalar(-int64_max));
+}
+
+TEST(TestUnaryArithmeticUnsigned, Negate) {
+  for (const auto& ty : internal::UnsignedIntTypes()) {
+    // No input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[]"), ArrayFromJSON(ty, "[]"));
+    // Null input
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[null]"), ArrayFromJSON(ty, "[null]"));
+    // Zeros as inputs
+    CheckScalarUnary("negate", ArrayFromJSON(ty, "[0]"), ArrayFromJSON(ty, "[0]"));
+  }

Review comment:
       Please flesh these out

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -309,6 +348,21 @@ std::shared_ptr<ScalarFunction> MakeArithmeticFunctionNotNull(std::string name,
   return func;
 }
 
+template <typename Op>
+std::shared_ptr<ScalarFunction> MakeScalarArithmeticFunction(std::string name,
+                                                             const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+  // 8-bit signed integer
+  // ArrayKernelExec exec = ScalarUnary<Int8Type, Int8Type, Op>::Exec;
+  // DCHECK_OK(func->AddKernel({int8()}, int8(), exec));
+
+  for (const auto& ty : NumericTypes()) {

Review comment:
       This will generate kernels for the unsigned integer types as well.
   ```suggestion
     for (const auto& ty : FlattenVectors({SignedIntTypes(), FloatingPointTypes()})) {
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -321,14 +375,14 @@ const FunctionDoc add_checked_doc{
      "doesn't fail on overflow, use function \"add\"."),
     {"x", "y"}};
 
-const FunctionDoc sub_doc{"Substract the arguments element-wise",
+const FunctionDoc sub_doc{"Subtract the arguments element-wise",

Review comment:
       :+1: 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org