You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/01/26 15:20:58 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #12270: ARROW-15218: [C++] Add decimal support to the indices_nonzero compute function

pitrou commented on a change in pull request #12270:
URL: https://github.com/apache/arrow/pull/12270#discussion_r792731592



##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -2392,6 +2392,29 @@ struct NonZeroVisitor {
 
     return Status::OK();
   }
+
+  template <typename Type>
+  enable_if_t<is_decimal128_type<Type>::value || is_decimal256_type<Type>::value, Status>
+  Visit(const Type&) {

Review comment:
       This is morally the same code as the previous override above, can we merge them in a single method declaration?

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -2426,25 +2449,33 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
                                                            const FunctionDoc* doc) {
   auto func = std::make_shared<VectorFunction>(name, Arity::Unary(), doc);
 
-  for (const auto& ty : NumericTypes()) {
-    VectorKernel kernel;
-    kernel.exec = IndicesNonZeroExec;
-    kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
-    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-    kernel.output_chunked = false;
-    kernel.can_execute_chunkwise = false;
-    kernel.signature = KernelSignature::Make({InputType(ty->id())}, uint64());
-    DCHECK_OK(func->AddKernel(kernel));
-  }
-
-  VectorKernel boolkernel;
-  boolkernel.exec = IndicesNonZeroExec;
-  boolkernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
-  boolkernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-  boolkernel.output_chunked = false;
-  boolkernel.can_execute_chunkwise = false;
-  boolkernel.signature = KernelSignature::Make({boolean()}, uint64());
-  DCHECK_OK(func->AddKernel(boolkernel));
+  VectorKernel kernel;
+  kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.output_chunked = false;
+  kernel.can_execute_chunkwise = false;
+
+  auto AddKernels = [&](const std::vector<std::shared_ptr<DataType>>& types) {
+    for (const std::shared_ptr<DataType>& ty : types) {
+      kernel.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+      kernel.exec = IndicesNonZeroExec;

Review comment:
       This line can probably be moved out of this lambda?

##########
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##########
@@ -2365,38 +2365,91 @@ TEST_F(TestDropNullKernelWithTable, DropNullTableWithSlices) {
   });
 }
 
-TEST(TestIndicesNonZero, IndicesNonZero) {
+template <typename Type>
+class TestIndicesNonZero : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestIndicesNonZero, NumericArrowTypes);
+TYPED_TEST(TestIndicesNonZero, IndicesNonZero) {
   Datum actual;
   std::shared_ptr<Array> result;
+  auto type = TypeTraits<TypeParam>::type_singleton();
 
-  ASSERT_OK_AND_ASSIGN(
-      actual,
-      CallFunction("indices_nonzero", {ArrayFromJSON(uint32(), "[null, 50, 0, 10]")}));
+  ASSERT_OK_AND_ASSIGN(actual, CallFunction("indices_nonzero",
+                                            {ArrayFromJSON(type, "[null, 50, 0, 10]")}));
   result = actual.make_array();
-  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[1, 3]"), *result);
+
+  // empty
+  ASSERT_OK_AND_ASSIGN(actual,
+                       CallFunction("indices_nonzero", {ArrayFromJSON(type, "[]")}));
+  result = actual.make_array();
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[]"), *result);
+
+  // chunked
+  ChunkedArray chunked_arr(
+      {ArrayFromJSON(type, "[1, 0, 3]"), ArrayFromJSON(type, "[4, 0, 6]")});
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction("indices_nonzero", {static_cast<Datum>(chunked_arr)}));

Review comment:
       I think `{chunked_arr}` would simply work and it would convert to a Datum implicitly.

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -2426,25 +2449,33 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
                                                            const FunctionDoc* doc) {
   auto func = std::make_shared<VectorFunction>(name, Arity::Unary(), doc);
 
-  for (const auto& ty : NumericTypes()) {
-    VectorKernel kernel;
-    kernel.exec = IndicesNonZeroExec;
-    kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
-    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-    kernel.output_chunked = false;
-    kernel.can_execute_chunkwise = false;
-    kernel.signature = KernelSignature::Make({InputType(ty->id())}, uint64());
-    DCHECK_OK(func->AddKernel(kernel));
-  }
-
-  VectorKernel boolkernel;
-  boolkernel.exec = IndicesNonZeroExec;
-  boolkernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
-  boolkernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-  boolkernel.output_chunked = false;
-  boolkernel.can_execute_chunkwise = false;
-  boolkernel.signature = KernelSignature::Make({boolean()}, uint64());
-  DCHECK_OK(func->AddKernel(boolkernel));
+  VectorKernel kernel;
+  kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  kernel.output_chunked = false;
+  kernel.can_execute_chunkwise = false;
+
+  auto AddKernels = [&](const std::vector<std::shared_ptr<DataType>>& types) {
+    for (const std::shared_ptr<DataType>& ty : types) {
+      kernel.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+      kernel.exec = IndicesNonZeroExec;
+      DCHECK_OK(func->AddKernel(kernel));
+    }
+  };
+
+  AddKernels(NumericTypes());
+  AddKernels({boolean()});
+
+  VectorKernel deckernel;

Review comment:
       Why not simply reuse the `kernel` variable already defined?

##########
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##########
@@ -2365,38 +2365,91 @@ TEST_F(TestDropNullKernelWithTable, DropNullTableWithSlices) {
   });
 }
 
-TEST(TestIndicesNonZero, IndicesNonZero) {
+template <typename Type>
+class TestIndicesNonZero : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestIndicesNonZero, NumericArrowTypes);
+TYPED_TEST(TestIndicesNonZero, IndicesNonZero) {
   Datum actual;
   std::shared_ptr<Array> result;
+  auto type = TypeTraits<TypeParam>::type_singleton();
 
-  ASSERT_OK_AND_ASSIGN(
-      actual,
-      CallFunction("indices_nonzero", {ArrayFromJSON(uint32(), "[null, 50, 0, 10]")}));
+  ASSERT_OK_AND_ASSIGN(actual, CallFunction("indices_nonzero",
+                                            {ArrayFromJSON(type, "[null, 50, 0, 10]")}));
   result = actual.make_array();
-  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[1, 3]"), *result);
+
+  // empty
+  ASSERT_OK_AND_ASSIGN(actual,
+                       CallFunction("indices_nonzero", {ArrayFromJSON(type, "[]")}));
+  result = actual.make_array();
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[]"), *result);
+
+  // chunked
+  ChunkedArray chunked_arr(
+      {ArrayFromJSON(type, "[1, 0, 3]"), ArrayFromJSON(type, "[4, 0, 6]")});
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction("indices_nonzero", {static_cast<Datum>(chunked_arr)}));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0, 2, 3, 5]"), *actual.make_array());
+
+  // empty chunked
+  ChunkedArray chunked_arr_empty({ArrayFromJSON(type, "[1, 0, 3]"),
+                                  ArrayFromJSON(type, "[]"),
+                                  ArrayFromJSON(type, "[4, 0, 6]")});
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction("indices_nonzero", {static_cast<Datum>(chunked_arr_empty)}));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0, 2, 3, 5]"), *actual.make_array());
+}
 
+TEST(TestIndicesNonZero, IndicesNonZeroBoolean) {
+  Datum actual;
+  std::shared_ptr<Array> result;
+
+  // boool
   ASSERT_OK_AND_ASSIGN(
       actual, CallFunction("indices_nonzero",
                            {ArrayFromJSON(boolean(), "[null, true, false, true]")}));
   result = actual.make_array();
   AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+}
 
-  ASSERT_OK_AND_ASSIGN(actual,
-                       CallFunction("indices_nonzero",
-                                    {ArrayFromJSON(float64(), "[null, 1.3, 0.0, 5.0]")}));
+TEST(TestIndicesNonZero, IndicesNonZeroDecimal) {
+  Datum actual;
+  std::shared_ptr<Array> result;
+
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction(
+                  "indices_nonzero",
+                  {DecimalArrayFromJSON(decimal128(2, -2), R"(["12E2",null,"0","0"])")}));
   result = actual.make_array();
-  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0]"), *result, true);

Review comment:
       Please make sure that arguments are intelligible:
   
   ```suggestion
     AssertArraysEqual(*ArrayFromJSON(uint64(), "[0]"), *result, /*verbose=*/true);
   ```

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -2392,6 +2392,29 @@ struct NonZeroVisitor {
 
     return Status::OK();
   }
+
+  template <typename Type>
+  enable_if_t<is_decimal128_type<Type>::value || is_decimal256_type<Type>::value, Status>

Review comment:
       Can use `is_decimal_type` for more future-proof code.

##########
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##########
@@ -2365,38 +2365,91 @@ TEST_F(TestDropNullKernelWithTable, DropNullTableWithSlices) {
   });
 }
 
-TEST(TestIndicesNonZero, IndicesNonZero) {
+template <typename Type>
+class TestIndicesNonZero : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestIndicesNonZero, NumericArrowTypes);
+TYPED_TEST(TestIndicesNonZero, IndicesNonZero) {
   Datum actual;
   std::shared_ptr<Array> result;
+  auto type = TypeTraits<TypeParam>::type_singleton();
 
-  ASSERT_OK_AND_ASSIGN(
-      actual,
-      CallFunction("indices_nonzero", {ArrayFromJSON(uint32(), "[null, 50, 0, 10]")}));
+  ASSERT_OK_AND_ASSIGN(actual, CallFunction("indices_nonzero",
+                                            {ArrayFromJSON(type, "[null, 50, 0, 10]")}));
   result = actual.make_array();
-  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[1, 3]"), *result);
+
+  // empty
+  ASSERT_OK_AND_ASSIGN(actual,
+                       CallFunction("indices_nonzero", {ArrayFromJSON(type, "[]")}));
+  result = actual.make_array();
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[]"), *result);
+
+  // chunked
+  ChunkedArray chunked_arr(
+      {ArrayFromJSON(type, "[1, 0, 3]"), ArrayFromJSON(type, "[4, 0, 6]")});
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction("indices_nonzero", {static_cast<Datum>(chunked_arr)}));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0, 2, 3, 5]"), *actual.make_array());
+
+  // empty chunked
+  ChunkedArray chunked_arr_empty({ArrayFromJSON(type, "[1, 0, 3]"),
+                                  ArrayFromJSON(type, "[]"),
+                                  ArrayFromJSON(type, "[4, 0, 6]")});
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction("indices_nonzero", {static_cast<Datum>(chunked_arr_empty)}));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0, 2, 3, 5]"), *actual.make_array());
+}
 
+TEST(TestIndicesNonZero, IndicesNonZeroBoolean) {
+  Datum actual;
+  std::shared_ptr<Array> result;
+
+  // boool
   ASSERT_OK_AND_ASSIGN(
       actual, CallFunction("indices_nonzero",
                            {ArrayFromJSON(boolean(), "[null, true, false, true]")}));
   result = actual.make_array();
   AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+}
 
-  ASSERT_OK_AND_ASSIGN(actual,
-                       CallFunction("indices_nonzero",
-                                    {ArrayFromJSON(float64(), "[null, 1.3, 0.0, 5.0]")}));
+TEST(TestIndicesNonZero, IndicesNonZeroDecimal) {
+  Datum actual;
+  std::shared_ptr<Array> result;
+
+  ASSERT_OK_AND_ASSIGN(
+      actual, CallFunction(
+                  "indices_nonzero",
+                  {DecimalArrayFromJSON(decimal128(2, -2), R"(["12E2",null,"0","0"])")}));
   result = actual.make_array();
-  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0]"), *result, true);
 
-  ASSERT_OK_AND_ASSIGN(actual,
-                       CallFunction("indices_nonzero", {ArrayFromJSON(float64(), "[]")}));
+  ASSERT_OK_AND_ASSIGN(
+      actual,
+      CallFunction(
+          "indices_nonzero",
+          {DecimalArrayFromJSON(
+              decimal128(6, 9),
+              R"(["765483.999999999","0.000000000",null,"-987645.000000001"])")}));
   result = actual.make_array();
-  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[]"));
+  AssertArraysEqual(*ArrayFromJSON(uint64(), "[0, 3]"), *result, true);
 
-  ChunkedArray chunkedarr(
-      {ArrayFromJSON(uint32(), "[1, 0, 3]"), ArrayFromJSON(uint32(), "[4, 0, 6]")});
-  ASSERT_OK_AND_ASSIGN(actual,
-                       CallFunction("indices_nonzero", {static_cast<Datum>(chunkedarr)}));
-  AssertArraysEqual(*actual.make_array(), *ArrayFromJSON(uint64(), "[0, 2, 3, 5]"));
+  ASSERT_OK_AND_ASSIGN(

Review comment:
       Can we avoid copy-pasting? Example solution (untested):
   ```c++
     for (const auto& decimal_factory : {decimal128, decimal256}) {
   
       ASSERT_OK_AND_ASSIGN(
           actual, CallFunction(
                       "indices_nonzero",
                       {DecimalArrayFromJSON(decimal_factory(2, -2), R"(["12E2",null,"0","0"])")}));
       // etc.
   ```

##########
File path: cpp/src/arrow/compute/kernels/vector_selection_test.cc
##########
@@ -2365,38 +2365,91 @@ TEST_F(TestDropNullKernelWithTable, DropNullTableWithSlices) {
   });
 }
 
-TEST(TestIndicesNonZero, IndicesNonZero) {
+template <typename Type>
+class TestIndicesNonZero : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestIndicesNonZero, NumericArrowTypes);

Review comment:
       This will compile a distinct version of the tests for each tested type, but this doesn't seem necessary.
   How about instead:
   ```c++
   TEST(TestIndicesNonZero, IndicesNonZero) {
     Datum actual;
     std::shared_ptr<Array> result;
     for (const auto& type : NumericTypes()) {
       ARROW_SCOPED_TRACE("Input type = ", type->ToString());
       ASSERT_OK_AND_ASSIGN(actual,  // etc.
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org