You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/22 11:30:08 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #11886: ARROW-13035: [C++] indices_nonzero compute function

pitrou commented on a change in pull request #11886:
URL: https://github.com/apache/arrow/pull/11886#discussion_r773815517



##########
File path: cpp/src/arrow/compute/kernels/scalar_validity.cc
##########
@@ -307,6 +379,11 @@ const FunctionDoc is_nan_doc("Return true if NaN",
                              ("For each input value, emit true iff the value is NaN."),
                              {"values"});
 
+const FunctionDoc indices_nonzero_doc{
+    "Compare to zero or false",
+    ("Return the indices of the array containing non false or zero values."),

Review comment:
       Add a sentence stating that nulls are skipped?

##########
File path: cpp/src/arrow/compute/kernels/scalar_validity.cc
##########
@@ -307,6 +379,11 @@ const FunctionDoc is_nan_doc("Return true if NaN",
                              ("For each input value, emit true iff the value is NaN."),
                              {"values"});
 
+const FunctionDoc indices_nonzero_doc{
+    "Compare to zero or false",

Review comment:
       The summary line is a bit vague, how about just using the sentence below?

##########
File path: cpp/src/arrow/compute/kernels/scalar_validity_test.cc
##########
@@ -241,5 +241,28 @@ TYPED_TEST(TestFloatingPointValidityKernels, IsInf) { this->TestIsInf(); }
 
 TYPED_TEST(TestFloatingPointValidityKernels, IsNan) { this->TestIsNan(); }
 
+TEST(TestValidityKernels, NonZero) {
+  Datum actual;
+  std::shared_ptr<Array> result;
+
+  ASSERT_OK_AND_ASSIGN(
+      actual,
+      CallFunction("indices_nonzero", {ArrayFromJSON(uint32(), "[null, 50, 0, 10]")}));
+  result = actual.make_array();
+  AssertArraysEqual(*result, *ArrayFromJSON(uint64(), "[1, 3]"));

Review comment:
       Add a test with an empty array as well and/or an empty output?

##########
File path: cpp/src/arrow/compute/kernels/scalar_validity.cc
##########
@@ -189,6 +194,73 @@ Status ConstBoolExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   return Status::OK();
 }
 
+struct NonZeroVisitor {
+  UInt64Builder* builder;
+  const ArrayData& array;
+
+  NonZeroVisitor(UInt64Builder* builder, const ArrayData& array)
+      : builder(builder), array(array) {}
+
+  Status Visit(const DataType& type) { return Status::NotImplemented(type.ToString()); }
+
+  template <typename Type>
+  enable_if_t<is_primitive_ctype<Type>::value, Status> Visit(const Type&) {
+    using T = typename GetViewType<Type>::T;
+    uint32_t index = 0;
+
+    return VisitArrayDataInline<Type>(
+        this->array,
+        [&](T v) {
+          if (v) {
+            this->builder->UnsafeAppend(index);
+          }
+          ++index;
+          return Status::OK();
+        },
+        [&]() {
+          ++index;
+          return Status::OK();
+        });
+  }
+};
+
+Status IndicesNonZeroExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  std::shared_ptr<ArrayData> array = batch[0].array();
+  UInt64Builder builder;
+
+  RETURN_NOT_OK(builder.Reserve(array->length));
+  NonZeroVisitor visitor(&builder, *array.get());
+  RETURN_NOT_OK(VisitTypeInline(*(array->type), &visitor));
+
+  std::shared_ptr<ArrayData> out_data;
+  RETURN_NOT_OK(builder.FinishInternal(&out_data));
+  out->value = std::move(out_data);
+  return Status::OK();
+}
+
+std::shared_ptr<ScalarFunction> MakeIndicesNonZeroFunction(std::string name,

Review comment:
       So, this is not a scalar function as the output doesn't have the same length as the input.
   (a scalar function is a function where each value in the output can be derived independently from the corresponding value in the input)
   
   This should be registered as a vector function.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org