You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/11/20 22:06:34 UTC

[GitHub] [arrow] bkietz opened a new pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

bkietz opened a new pull request #8728:
URL: https://github.com/apache/arrow/pull/8728


   - "and", "or", "xor", "and_kleene", and "or_kleene" gain support for scalar arguments.
   - Added new functions "and_not" and "and_not_kleene"
   - repaired and simplified some null propagation logic


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
pitrou commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528714541



##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -213,20 +214,33 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
 
 namespace {
 
-bool ArrayHasNulls(const ArrayData& data) {
-  // As discovered in ARROW-8863 (and not only for that reason)
-  // ArrayData::null_count can -1 even when buffers[0] is nullptr. So we check
-  // for both cases (nullptr means no nulls, or null_count already computed)
-  if (data.type->id() == Type::NA) {
-    return true;
-  } else if (data.buffers[0] == nullptr) {
-    return false;
-  } else {
+struct NullGeneralization {
+  enum type { NONE, ALL_VALID, ALL_NULL };

Review comment:
       Rather than "NONE", "UNKNOWN" (or "PERHAPS_NULL") sounds less confusing perhaps.

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace compute {
 
-using BinaryKernelFunc =
-    std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-  }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+  for (std::shared_ptr<Scalar> scalar :
+       {std::make_shared<BooleanScalar>(), std::make_shared<BooleanScalar>(true),
+        std::make_shared<BooleanScalar>(false)}) {
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar), array}));
 
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-  }
+    ASSERT_OK_AND_ASSIGN(auto constant_array,
+                         MakeArrayFromScalar(*scalar, array.length()));
 
-  void TestBinaryKernel(const BinaryKernelFunc& kernel,
-                        const std::shared_ptr<Array>& left,
-                        const std::shared_ptr<Array>& right,
-                        const std::shared_ptr<Array>& expected) {
-    TestArrayBinary(kernel, left, right, expected);
-    TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1));
-
-    // ChunkedArray
-    auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left, left->Slice(1)});
-    auto cright = std::make_shared<ChunkedArray>(ArrayVector{right, right->Slice(1)});
-    auto cexpected =
-        std::make_shared<ChunkedArray>(ArrayVector{expected, expected->Slice(1)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
-    // ChunkedArray with different chunks
-    cleft = std::make_shared<ChunkedArray>(ArrayVector{
-        left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-  }
+    ASSERT_OK_AND_ASSIGN(Datum expected,
+                         CallFunction(func_name, {Datum(constant_array), array}));
+    AssertDatumsEqual(expected, actual);
 
-  void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
-                                 const std::vector<bool>& left,
-                                 const std::vector<bool>& right,
-                                 const std::vector<bool>& expected,
-                                 const std::vector<bool>& expected_nulls) {
-    auto type = boolean();
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
-                     _MakeArray<BooleanType, bool>(type, right, {}),
-                     _MakeArray<BooleanType, bool>(type, expected, {}));
-
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
-                     _MakeArray<BooleanType, bool>(type, right, right),
-                     _MakeArray<BooleanType, bool>(type, expected, expected_nulls));
+    ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array, Datum(scalar)}));
+    ASSERT_OK_AND_ASSIGN(expected,
+                         CallFunction(func_name, {array, Datum(constant_array)}));
+    AssertDatumsEqual(expected, actual);
   }
-
- protected:
-  ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  std::vector<bool> values1 = {true, false, true, false};
-  std::vector<bool> values2 = {false, true, false, true};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true, false});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true, false});
-
-  // Plain array
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
-  // Array with offset
-  ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
-  // Contiguous preallocation, so a single output chunk even though there were
-  // two input chunks
-  ASSERT_EQ(1, result_ca->num_chunks());
-  AssertChunkedEquivalent(*ca2, *result_ca);
 }
 
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+  auto arr =
+      ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false, null]");
+  auto expected =
+      ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true, null]");
+  CheckScalarUnary("invert", arr, expected);
 }
 
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
-  auto type = boolean();
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
-  // Result should be empty as well.
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,  null]");
+  // CheckScalarBinary("and", left, right, expected);
+  CheckBooleanScalarArrayBinary("and", left);
 }
 
-TEST_F(TestBooleanKernel, And) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(And, values1, values2, values3, values3);
+TEST(TestBooleanKernel, Or) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, true,  null, false, null,  null]");
+  CheckScalarBinary("or", left, right, expected);
+  CheckBooleanScalarArrayBinary("or", left);
 }
 
-TEST_F(TestBooleanKernel, Or) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, true, true, false, true, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, Xor) {
+  auto left = ArrayFromJSON(boolean(), "    [true,  true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true,  false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[false, true,  null, false, null,  null]");
+  CheckScalarBinary("xor", left, right, expected);
+  CheckBooleanScalarArrayBinary("xor", left);
 }
 
-TEST_F(TestBooleanKernel, Xor) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {false, true, true, false, false, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, AndNot) {
+  auto left = ArrayFromJSON(
+      boolean(), "[true,  true,  true, false, false, false, null, null,  null]");
+  auto right = ArrayFromJSON(
+      boolean(), "[true,  false, null, true,  false, null,  true, false, null]");
+  auto expected = ArrayFromJSON(
+      boolean(), "[false, true,  null, false, false, null,  null, null,  null]");
+  CheckScalarBinary("and_not", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_not", left);
 }
 
-TEST_F(TestBooleanKernel, KleeneAnd) {
+TEST(TestBooleanKernel, KleeneAnd) {
   auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
   auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
   auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false, null]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);
 
   left = ArrayFromJSON(boolean(), "    [true, true,  false, null, null]");
   right = ArrayFromJSON(boolean(), "   [true, false, false, true, false]");
   expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);

Review comment:
       Are you running almost the same tests twice here?

##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -272,79 +287,61 @@ class NullPropagator {
     return Status::OK();
   }
 
-  Result<bool> ShortCircuitIfAllNull() {
-    // An all-null value (scalar null or all-null array) gives us a short
-    // circuit opportunity
-    bool is_all_null = false;
-    std::shared_ptr<Buffer> all_null_bitmap;
+  Status AllNullShortCircuit() {
+    // OK, the output should be all null
+    output_->null_count = output_->length;
+
+    if (bitmap_preallocated_) {
+      BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+      return Status::OK();
+    }
 
     // Walk all the values with nulls instead of breaking on the first in case
     // we find a bitmap that can be reused in the non-preallocated case
-    for (const Datum* value : values_with_nulls_) {
-      if (value->type()->id() == Type::NA) {
-        // No bitmap
-        is_all_null = true;
-      } else if (value->kind() == Datum::ARRAY) {
-        const ArrayData& arr = *value->array();
-        if (arr.null_count.load() == arr.length) {
-          // Pluck the all null bitmap so we can set it in the output if it was
-          // not pre-allocated
-          all_null_bitmap = arr.buffers[0];
-          is_all_null = true;
-        }
-      } else {
-        // Scalar
-        is_all_null = !value->scalar()->is_valid;
+    for (const ArrayData* arr : arrays_with_nulls_) {
+      if (arr->null_count.load() == arr->length && arr->buffers[0] != nullptr) {
+        // Reuse this all null bitmap
+        output_->buffers[0] = arr->buffers[0];
+        return Status::OK();
       }
     }
-    if (!is_all_null) {
-      return false;
-    }
 
-    // OK, the output should be all null
-    output_->null_count = output_->length;
-
-    if (!bitmap_preallocated_ && all_null_bitmap) {
-      // If we did not pre-allocate memory, and we observed an all-null bitmap,
-      // then we can zero-copy it into the output
-      output_->buffers[0] = std::move(all_null_bitmap);
-    } else {
-      RETURN_NOT_OK(EnsureAllocated());
-      BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
-    }
-    return true;
+    RETURN_NOT_OK(EnsureAllocated());
+    BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+    return Status::OK();
   }
 
   Status PropagateSingle() {
     // One array
-    const ArrayData& arr = *values_with_nulls_[0]->array();
+    const ArrayData& arr = *arrays_with_nulls_[0];
     const std::shared_ptr<Buffer>& arr_bitmap = arr.buffers[0];
 
     // Reuse the null count if it's known
     output_->null_count = arr.null_count.load();
 
     if (bitmap_preallocated_) {
       CopyBitmap(arr_bitmap->data(), arr.offset, arr.length, bitmap_, output_->offset);
+      return Status::OK();
+    }
+
+    // Two cases when memory was not pre-allocated:
+    //
+    // * Offset is zero: we reuse the bitmap as is
+    // * Offset is nonzero but a multiple of 8: we can slice the bitmap
+    // * Offset is not a multiple of 8: we must allocate and use CopyBitmap
+    //
+    // Keep in mind that output_->offset is not permitted to be nonzero when
+    // the bitmap is not preallocated, and that precondition is asserted
+    // higher in the call stack.
+    if (arr.offset == 0) {
+      output_->buffers[0] = arr_bitmap;
+    } else if (arr.offset % 8 == 0) {
+      output_->buffers[0] =
+          SliceBuffer(arr_bitmap, arr.offset / 8, BitUtil::BytesForBits(arr.length));

Review comment:
       Would it be useful to add a `SliceBitmap` helper to `bitmap_ops.h`?

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -427,12 +427,28 @@ void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 //
 // static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
 //                  ArrayData* out)
+// static void Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
+//                  ArrayData* out)
+// static void Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
+//                  ArrayData* out)
+// static void Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
+//                  Scalar* out)

Review comment:
       Mention that heterogenous implementation ((scalar, array) and (array, scalar)) must broadcast the scalar value?

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace compute {
 
-using BinaryKernelFunc =
-    std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-  }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+  for (std::shared_ptr<Scalar> scalar :
+       {std::make_shared<BooleanScalar>(), std::make_shared<BooleanScalar>(true),
+        std::make_shared<BooleanScalar>(false)}) {
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar), array}));
 
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-  }
+    ASSERT_OK_AND_ASSIGN(auto constant_array,
+                         MakeArrayFromScalar(*scalar, array.length()));
 
-  void TestBinaryKernel(const BinaryKernelFunc& kernel,
-                        const std::shared_ptr<Array>& left,
-                        const std::shared_ptr<Array>& right,
-                        const std::shared_ptr<Array>& expected) {
-    TestArrayBinary(kernel, left, right, expected);
-    TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1));
-
-    // ChunkedArray
-    auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left, left->Slice(1)});
-    auto cright = std::make_shared<ChunkedArray>(ArrayVector{right, right->Slice(1)});
-    auto cexpected =
-        std::make_shared<ChunkedArray>(ArrayVector{expected, expected->Slice(1)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
-    // ChunkedArray with different chunks
-    cleft = std::make_shared<ChunkedArray>(ArrayVector{
-        left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-  }
+    ASSERT_OK_AND_ASSIGN(Datum expected,
+                         CallFunction(func_name, {Datum(constant_array), array}));
+    AssertDatumsEqual(expected, actual);
 
-  void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
-                                 const std::vector<bool>& left,
-                                 const std::vector<bool>& right,
-                                 const std::vector<bool>& expected,
-                                 const std::vector<bool>& expected_nulls) {
-    auto type = boolean();
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
-                     _MakeArray<BooleanType, bool>(type, right, {}),
-                     _MakeArray<BooleanType, bool>(type, expected, {}));
-
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
-                     _MakeArray<BooleanType, bool>(type, right, right),
-                     _MakeArray<BooleanType, bool>(type, expected, expected_nulls));
+    ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array, Datum(scalar)}));
+    ASSERT_OK_AND_ASSIGN(expected,
+                         CallFunction(func_name, {array, Datum(constant_array)}));
+    AssertDatumsEqual(expected, actual);
   }
-
- protected:
-  ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  std::vector<bool> values1 = {true, false, true, false};
-  std::vector<bool> values2 = {false, true, false, true};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true, false});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true, false});
-
-  // Plain array
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
-  // Array with offset
-  ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
-  // Contiguous preallocation, so a single output chunk even though there were
-  // two input chunks
-  ASSERT_EQ(1, result_ca->num_chunks());
-  AssertChunkedEquivalent(*ca2, *result_ca);
 }
 
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+  auto arr =
+      ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false, null]");
+  auto expected =
+      ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true, null]");
+  CheckScalarUnary("invert", arr, expected);
 }
 
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
-  auto type = boolean();
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
-  // Result should be empty as well.
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,  null]");
+  // CheckScalarBinary("and", left, right, expected);

Review comment:
       Is there a reason this is commented out?

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -61,40 +59,78 @@ void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayDa
     ++i;
   };
 
-  if (right.null_count == 0 || left.null_count == 0) {
-    if (left.null_count == 0) {
-      // ensure only bitmaps[RIGHT_VALID].buffer might be null
-      std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]);
-      std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]);
-    }
-    // override bitmaps[RIGHT_VALID] to make it safe for Visit()
+  if (right.null_count == 0) {
+    // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
     bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-
     Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
       apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]);
     });
-  } else {
+    return;
+  }
+
+  if (left.null_count == 0) {
+    // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
+    bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
     Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
+      apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
     });
+    return;
   }
+
+  DCHECK(left.null_count != 0 || right.null_count != 0);
+  Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+    apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
+  });
+}
+
+inline BooleanScalar InvertScalar(const Scalar& in) {
+  return in.is_valid ? BooleanScalar(!checked_cast<const BooleanScalar&>(in).value)
+                     : BooleanScalar();
+}
+
+inline Bitmap GetBitmap(const ArrayData& arr, int index) {
+  return Bitmap{arr.buffers[index], arr.offset, arr.length};
 }
 
 struct Invert {
   static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
-    if (in.is_valid) {
-      checked_cast<BooleanScalar*>(out)->value =
-          !checked_cast<const BooleanScalar&>(in).value;
-    }
+    *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
   }
 
   static void Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
-    ::arrow::internal::InvertBitmap(in.buffers[1]->data(), in.offset, in.length,
-                                    out->buffers[1]->mutable_data(), out->offset);
+    GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
   }
 };
 
-struct And {
+template <typename Op>
+struct Commutative {
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    Op::Call(ctx, right, left, out);
+  }
+};
+
+struct And : Commutative<And> {
+  using Commutative<And>::Call;
+
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    if (left.is_valid && right.is_valid) {
+      checked_cast<BooleanScalar*>(out)->value =
+          checked_cast<const BooleanScalar&>(left).value &&
+          checked_cast<const BooleanScalar&>(right).value;
+    }
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    if (!right.is_valid) return;  // all null case
+
+    return checked_cast<const BooleanScalar&>(right).value
+               ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))

Review comment:
       Hmm... Perhaps we can avoid a copy in most cases and just re-use the bitmap buffer?
   (do we need a `BitmapSliceOrCopy` helper?)

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -156,14 +307,94 @@ struct Xor {
   }
 };
 
+struct AndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (!left.is_valid) return;  // all null case
+
+    return checked_cast<const BooleanScalar&>(left).value
+               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+               : GetBitmap(*out, 1).SetBitsTo(false);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
+                                    right.buffers[1]->data(), right.offset, right.length,
+                                    out->offset, out->buffers[1]->mutable_data());
+  }
+};
+
+struct KleeneAndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    if (left_false) {
+      return GetBitmap(*out, 0).SetBitsTo(true),
+             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+    }
+
+    if (left_true) {
+      return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
+             GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+    }
+
+    // scalar was null: out[i] is valid iff right[i] was true
+    ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                 right.buffers[1]->data(), right.offset, right.length,
+                                 out->offset, out->buffers[0]->mutable_data());
+    ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[1]->mutable_data(), out->offset);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
+      GetBitmap(*out, 0).SetBitsTo(true);
+      return AndNot::Call(ctx, left, right, out);
+    }
+
+    static auto compute_word = [](uint64_t left_true, uint64_t left_false,
+                                  uint64_t right_true, uint64_t right_false,
+                                  uint64_t* out_valid, uint64_t* out_data) {
+      *out_data = left_true & right_false;
+      *out_valid = left_false | right_true | (left_true & right_false);

Review comment:
       I would expect `~right_false` to be involved here. Am I missing something? Otherwise, the tests should also be expanded to exercise this code path.

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -129,13 +222,51 @@ struct Or {
   }
 };
 
-struct KleeneOr {
+struct KleeneOr : Commutative<KleeneOr> {
+  using Commutative<KleeneOr>::Call;
+
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
+    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
+
+    checked_cast<BooleanScalar*>(out)->value = left_true || right_true;
+    out->is_valid = left_true || right_true || (left_false && right_false);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
+    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
+
+    if (right_true) {
+      return GetBitmap(*out, 0).SetBitsTo(true),
+             GetBitmap(*out, 1).SetBitsTo(true);  // all true case
+    }
+
+    if (right_false) {
+      return GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0)),
+             GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
+    }
+
+    // scalar was null: out[i] is valid iff left[i] was true
+    ::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
+                                 left.buffers[1]->data(), left.offset, left.length,
+                                 out->offset, out->buffers[0]->mutable_data());
+    ::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
+                                  out->buffers[1]->mutable_data(), out->offset);
+  }
+
   static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
                    ArrayData* out) {
     if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
-      BitUtil::SetBitsTo(out->buffers[0]->mutable_data(), out->offset, out->length, true);
+      GetBitmap(*out, 0).SetBitsTo(true);

Review comment:
       Should you make the same change in `KleeneAnd::Call(KernelContext*, const ArrayData&, const ArrayData&, ArrayData*)`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -61,40 +59,78 @@ void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayDa
     ++i;
   };
 
-  if (right.null_count == 0 || left.null_count == 0) {
-    if (left.null_count == 0) {
-      // ensure only bitmaps[RIGHT_VALID].buffer might be null
-      std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]);
-      std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]);
-    }
-    // override bitmaps[RIGHT_VALID] to make it safe for Visit()
+  if (right.null_count == 0) {
+    // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
     bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-
     Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
       apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]);
     });
-  } else {
+    return;
+  }
+
+  if (left.null_count == 0) {
+    // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
+    bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
     Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
+      apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
     });
+    return;
   }
+
+  DCHECK(left.null_count != 0 || right.null_count != 0);

Review comment:
       You mean `&&`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528781337



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -61,40 +59,78 @@ void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayDa
     ++i;
   };
 
-  if (right.null_count == 0 || left.null_count == 0) {
-    if (left.null_count == 0) {
-      // ensure only bitmaps[RIGHT_VALID].buffer might be null
-      std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]);
-      std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]);
-    }
-    // override bitmaps[RIGHT_VALID] to make it safe for Visit()
+  if (right.null_count == 0) {
+    // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit()
     bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];
-
     Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
       apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]);
     });
-  } else {
+    return;
+  }
+
+  if (left.null_count == 0) {
+    // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit()
+    bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA];
     Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
-      apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
+      apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
     });
+    return;
   }
+
+  DCHECK(left.null_count != 0 || right.null_count != 0);
+  Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
+    apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
+  });
+}
+
+inline BooleanScalar InvertScalar(const Scalar& in) {
+  return in.is_valid ? BooleanScalar(!checked_cast<const BooleanScalar&>(in).value)
+                     : BooleanScalar();
+}
+
+inline Bitmap GetBitmap(const ArrayData& arr, int index) {
+  return Bitmap{arr.buffers[index], arr.offset, arr.length};
 }
 
 struct Invert {
   static void Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
-    if (in.is_valid) {
-      checked_cast<BooleanScalar*>(out)->value =
-          !checked_cast<const BooleanScalar&>(in).value;
-    }
+    *checked_cast<BooleanScalar*>(out) = InvertScalar(in);
   }
 
   static void Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
-    ::arrow::internal::InvertBitmap(in.buffers[1]->data(), in.offset, in.length,
-                                    out->buffers[1]->mutable_data(), out->offset);
+    GetBitmap(*out, 1).CopyFromInverted(GetBitmap(in, 1));
   }
 };
 
-struct And {
+template <typename Op>
+struct Commutative {
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    Op::Call(ctx, right, left, out);
+  }
+};
+
+struct And : Commutative<And> {
+  using Commutative<And>::Call;
+
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    if (left.is_valid && right.is_valid) {
+      checked_cast<BooleanScalar*>(out)->value =
+          checked_cast<const BooleanScalar&>(left).value &&
+          checked_cast<const BooleanScalar&>(right).value;
+    }
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    if (!right.is_valid) return;  // all null case
+
+    return checked_cast<const BooleanScalar&>(right).value
+               ? GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1))

Review comment:
       Reusing the bitmap like this will make it harder to refactor Kleene kernels to support writing into slices, which (I'm guessing) would be the greater performance win. In any case, it can wait till follow up




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528793473



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace compute {
 
-using BinaryKernelFunc =
-    std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-  }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+  for (std::shared_ptr<Scalar> scalar :
+       {std::make_shared<BooleanScalar>(), std::make_shared<BooleanScalar>(true),
+        std::make_shared<BooleanScalar>(false)}) {
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar), array}));
 
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-  }
+    ASSERT_OK_AND_ASSIGN(auto constant_array,
+                         MakeArrayFromScalar(*scalar, array.length()));
 
-  void TestBinaryKernel(const BinaryKernelFunc& kernel,
-                        const std::shared_ptr<Array>& left,
-                        const std::shared_ptr<Array>& right,
-                        const std::shared_ptr<Array>& expected) {
-    TestArrayBinary(kernel, left, right, expected);
-    TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1));
-
-    // ChunkedArray
-    auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left, left->Slice(1)});
-    auto cright = std::make_shared<ChunkedArray>(ArrayVector{right, right->Slice(1)});
-    auto cexpected =
-        std::make_shared<ChunkedArray>(ArrayVector{expected, expected->Slice(1)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
-    // ChunkedArray with different chunks
-    cleft = std::make_shared<ChunkedArray>(ArrayVector{
-        left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-  }
+    ASSERT_OK_AND_ASSIGN(Datum expected,
+                         CallFunction(func_name, {Datum(constant_array), array}));
+    AssertDatumsEqual(expected, actual);
 
-  void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
-                                 const std::vector<bool>& left,
-                                 const std::vector<bool>& right,
-                                 const std::vector<bool>& expected,
-                                 const std::vector<bool>& expected_nulls) {
-    auto type = boolean();
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
-                     _MakeArray<BooleanType, bool>(type, right, {}),
-                     _MakeArray<BooleanType, bool>(type, expected, {}));
-
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
-                     _MakeArray<BooleanType, bool>(type, right, right),
-                     _MakeArray<BooleanType, bool>(type, expected, expected_nulls));
+    ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array, Datum(scalar)}));
+    ASSERT_OK_AND_ASSIGN(expected,
+                         CallFunction(func_name, {array, Datum(constant_array)}));
+    AssertDatumsEqual(expected, actual);
   }
-
- protected:
-  ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  std::vector<bool> values1 = {true, false, true, false};
-  std::vector<bool> values2 = {false, true, false, true};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true, false});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true, false});
-
-  // Plain array
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
-  // Array with offset
-  ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
-  // Contiguous preallocation, so a single output chunk even though there were
-  // two input chunks
-  ASSERT_EQ(1, result_ca->num_chunks());
-  AssertChunkedEquivalent(*ca2, *result_ca);
 }
 
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+  auto arr =
+      ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false, null]");
+  auto expected =
+      ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true, null]");
+  CheckScalarUnary("invert", arr, expected);
 }
 
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
-  auto type = boolean();
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
-  // Result should be empty as well.
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,  null]");
+  // CheckScalarBinary("and", left, right, expected);
+  CheckBooleanScalarArrayBinary("and", left);
 }
 
-TEST_F(TestBooleanKernel, And) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(And, values1, values2, values3, values3);
+TEST(TestBooleanKernel, Or) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, true,  null, false, null,  null]");
+  CheckScalarBinary("or", left, right, expected);
+  CheckBooleanScalarArrayBinary("or", left);
 }
 
-TEST_F(TestBooleanKernel, Or) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, true, true, false, true, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, Xor) {
+  auto left = ArrayFromJSON(boolean(), "    [true,  true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true,  false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[false, true,  null, false, null,  null]");
+  CheckScalarBinary("xor", left, right, expected);
+  CheckBooleanScalarArrayBinary("xor", left);
 }
 
-TEST_F(TestBooleanKernel, Xor) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {false, true, true, false, false, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, AndNot) {
+  auto left = ArrayFromJSON(
+      boolean(), "[true,  true,  true, false, false, false, null, null,  null]");
+  auto right = ArrayFromJSON(
+      boolean(), "[true,  false, null, true,  false, null,  true, false, null]");
+  auto expected = ArrayFromJSON(
+      boolean(), "[false, true,  null, false, false, null,  null, null,  null]");
+  CheckScalarBinary("and_not", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_not", left);
 }
 
-TEST_F(TestBooleanKernel, KleeneAnd) {
+TEST(TestBooleanKernel, KleeneAnd) {
   auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
   auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
   auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false, null]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);
 
   left = ArrayFromJSON(boolean(), "    [true, true,  false, null, null]");
   right = ArrayFromJSON(boolean(), "   [true, false, false, true, false]");
   expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);

Review comment:
       Ah, I see. Yes they're the same input values (the full truth table) just with a the different call signatures




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528767690



##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -213,20 +214,33 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
 
 namespace {
 
-bool ArrayHasNulls(const ArrayData& data) {
-  // As discovered in ARROW-8863 (and not only for that reason)
-  // ArrayData::null_count can -1 even when buffers[0] is nullptr. So we check
-  // for both cases (nullptr means no nulls, or null_count already computed)
-  if (data.type->id() == Type::NA) {
-    return true;
-  } else if (data.buffers[0] == nullptr) {
-    return false;
-  } else {
+struct NullGeneralization {
+  enum type { NONE, ALL_VALID, ALL_NULL };

Review comment:
       Alright




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
pitrou commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528777466



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -156,14 +307,94 @@ struct Xor {
   }
 };
 
+struct AndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (!left.is_valid) return;  // all null case
+
+    return checked_cast<const BooleanScalar&>(left).value
+               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+               : GetBitmap(*out, 1).SetBitsTo(false);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
+                                    right.buffers[1]->data(), right.offset, right.length,
+                                    out->offset, out->buffers[1]->mutable_data());
+  }
+};
+
+struct KleeneAndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    if (left_false) {
+      return GetBitmap(*out, 0).SetBitsTo(true),
+             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+    }
+
+    if (left_true) {
+      return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
+             GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+    }
+
+    // scalar was null: out[i] is valid iff right[i] was true
+    ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                 right.buffers[1]->data(), right.offset, right.length,
+                                 out->offset, out->buffers[0]->mutable_data());
+    ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[1]->mutable_data(), out->offset);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
+      GetBitmap(*out, 0).SetBitsTo(true);
+      return AndNot::Call(ctx, left, right, out);
+    }
+
+    static auto compute_word = [](uint64_t left_true, uint64_t left_false,
+                                  uint64_t right_true, uint64_t right_false,
+                                  uint64_t* out_valid, uint64_t* out_data) {
+      *out_data = left_true & right_false;
+      *out_valid = left_false | right_true | (left_true & right_false);

Review comment:
       Wow, sorry, I was reading this badly (I use my eyes more and my brain less :-)).

##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -156,14 +307,94 @@ struct Xor {
   }
 };
 
+struct AndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (!left.is_valid) return;  // all null case
+
+    return checked_cast<const BooleanScalar&>(left).value
+               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+               : GetBitmap(*out, 1).SetBitsTo(false);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
+                                    right.buffers[1]->data(), right.offset, right.length,
+                                    out->offset, out->buffers[1]->mutable_data());
+  }
+};
+
+struct KleeneAndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    if (left_false) {
+      return GetBitmap(*out, 0).SetBitsTo(true),
+             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+    }
+
+    if (left_true) {
+      return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
+             GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+    }
+
+    // scalar was null: out[i] is valid iff right[i] was true
+    ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                 right.buffers[1]->data(), right.offset, right.length,
+                                 out->offset, out->buffers[0]->mutable_data());
+    ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[1]->mutable_data(), out->offset);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
+      GetBitmap(*out, 0).SetBitsTo(true);
+      return AndNot::Call(ctx, left, right, out);
+    }
+
+    static auto compute_word = [](uint64_t left_true, uint64_t left_false,
+                                  uint64_t right_true, uint64_t right_false,
+                                  uint64_t* out_valid, uint64_t* out_data) {
+      *out_data = left_true & right_false;
+      *out_valid = left_false | right_true | (left_true & right_false);

Review comment:
       Wow, sorry, I was reading this badly (I should use my eyes more and my brain less :-)).




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528677163



##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -253,6 +253,33 @@ Result<Datum> KleeneOr(const Datum& left, const Datum& right, ExecContext* ctx =
 ARROW_EXPORT
 Result<Datum> Xor(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);
 
+/// \brief Element-wise AND NOT of two boolean datums which always propagates nulls
+/// (null and not true is null).
+///
+/// \param[in] left left operand
+/// \param[in] right right operand
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 1.0.0

Review comment:
       Ah, thanks. Copy paste error




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz closed pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz closed pull request #8728:
URL: https://github.com/apache/arrow/pull/8728


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528766456



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace compute {
 
-using BinaryKernelFunc =
-    std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-  }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+  for (std::shared_ptr<Scalar> scalar :
+       {std::make_shared<BooleanScalar>(), std::make_shared<BooleanScalar>(true),
+        std::make_shared<BooleanScalar>(false)}) {
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar), array}));
 
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-  }
+    ASSERT_OK_AND_ASSIGN(auto constant_array,
+                         MakeArrayFromScalar(*scalar, array.length()));
 
-  void TestBinaryKernel(const BinaryKernelFunc& kernel,
-                        const std::shared_ptr<Array>& left,
-                        const std::shared_ptr<Array>& right,
-                        const std::shared_ptr<Array>& expected) {
-    TestArrayBinary(kernel, left, right, expected);
-    TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1));
-
-    // ChunkedArray
-    auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left, left->Slice(1)});
-    auto cright = std::make_shared<ChunkedArray>(ArrayVector{right, right->Slice(1)});
-    auto cexpected =
-        std::make_shared<ChunkedArray>(ArrayVector{expected, expected->Slice(1)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
-    // ChunkedArray with different chunks
-    cleft = std::make_shared<ChunkedArray>(ArrayVector{
-        left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-  }
+    ASSERT_OK_AND_ASSIGN(Datum expected,
+                         CallFunction(func_name, {Datum(constant_array), array}));
+    AssertDatumsEqual(expected, actual);
 
-  void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
-                                 const std::vector<bool>& left,
-                                 const std::vector<bool>& right,
-                                 const std::vector<bool>& expected,
-                                 const std::vector<bool>& expected_nulls) {
-    auto type = boolean();
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
-                     _MakeArray<BooleanType, bool>(type, right, {}),
-                     _MakeArray<BooleanType, bool>(type, expected, {}));
-
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
-                     _MakeArray<BooleanType, bool>(type, right, right),
-                     _MakeArray<BooleanType, bool>(type, expected, expected_nulls));
+    ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array, Datum(scalar)}));
+    ASSERT_OK_AND_ASSIGN(expected,
+                         CallFunction(func_name, {array, Datum(constant_array)}));
+    AssertDatumsEqual(expected, actual);
   }
-
- protected:
-  ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  std::vector<bool> values1 = {true, false, true, false};
-  std::vector<bool> values2 = {false, true, false, true};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true, false});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true, false});
-
-  // Plain array
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
-  // Array with offset
-  ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
-  // Contiguous preallocation, so a single output chunk even though there were
-  // two input chunks
-  ASSERT_EQ(1, result_ca->num_chunks());
-  AssertChunkedEquivalent(*ca2, *result_ca);
 }
 
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+  auto arr =
+      ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false, null]");
+  auto expected =
+      ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true, null]");
+  CheckScalarUnary("invert", arr, expected);
 }
 
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
-  auto type = boolean();
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
-  // Result should be empty as well.
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,  null]");
+  // CheckScalarBinary("and", left, right, expected);
+  CheckBooleanScalarArrayBinary("and", left);
 }
 
-TEST_F(TestBooleanKernel, And) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(And, values1, values2, values3, values3);
+TEST(TestBooleanKernel, Or) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, true,  null, false, null,  null]");
+  CheckScalarBinary("or", left, right, expected);
+  CheckBooleanScalarArrayBinary("or", left);
 }
 
-TEST_F(TestBooleanKernel, Or) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, true, true, false, true, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, Xor) {
+  auto left = ArrayFromJSON(boolean(), "    [true,  true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true,  false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[false, true,  null, false, null,  null]");
+  CheckScalarBinary("xor", left, right, expected);
+  CheckBooleanScalarArrayBinary("xor", left);
 }
 
-TEST_F(TestBooleanKernel, Xor) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {false, true, true, false, false, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, AndNot) {
+  auto left = ArrayFromJSON(
+      boolean(), "[true,  true,  true, false, false, false, null, null,  null]");
+  auto right = ArrayFromJSON(
+      boolean(), "[true,  false, null, true,  false, null,  true, false, null]");
+  auto expected = ArrayFromJSON(
+      boolean(), "[false, true,  null, false, false, null,  null, null,  null]");
+  CheckScalarBinary("and_not", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_not", left);
 }
 
-TEST_F(TestBooleanKernel, KleeneAnd) {
+TEST(TestBooleanKernel, KleeneAnd) {
   auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
   auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
   auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false, null]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);
 
   left = ArrayFromJSON(boolean(), "    [true, true,  false, null, null]");
   right = ArrayFromJSON(boolean(), "   [true, false, false, true, false]");
   expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);

Review comment:
       No, the second tests exclusively `array, scalar` and `scalar, array` which isn't covered by CheckScalarBinary




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528776241



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean.cc
##########
@@ -156,14 +307,94 @@ struct Xor {
   }
 };
 
+struct AndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (!left.is_valid) return;  // all null case
+
+    return checked_cast<const BooleanScalar&>(left).value
+               ? GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1))
+               : GetBitmap(*out, 1).SetBitsTo(false);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    And::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    ::arrow::internal::BitmapAndNot(left.buffers[1]->data(), left.offset,
+                                    right.buffers[1]->data(), right.offset, right.length,
+                                    out->offset, out->buffers[1]->mutable_data());
+  }
+};
+
+struct KleeneAndNot {
+  static void Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
+                   Scalar* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
+                   ArrayData* out) {
+    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
+    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
+
+    if (left_false) {
+      return GetBitmap(*out, 0).SetBitsTo(true),
+             GetBitmap(*out, 1).SetBitsTo(false);  // all false case
+    }
+
+    if (left_true) {
+      return GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0)),
+             GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
+    }
+
+    // scalar was null: out[i] is valid iff right[i] was true
+    ::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
+                                 right.buffers[1]->data(), right.offset, right.length,
+                                 out->offset, out->buffers[0]->mutable_data());
+    ::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
+                                    out->buffers[1]->mutable_data(), out->offset);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
+                   ArrayData* out) {
+    KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+  }
+
+  static void Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
+                   ArrayData* out) {
+    if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
+      GetBitmap(*out, 0).SetBitsTo(true);
+      return AndNot::Call(ctx, left, right, out);
+    }
+
+    static auto compute_word = [](uint64_t left_true, uint64_t left_false,
+                                  uint64_t right_true, uint64_t right_false,
+                                  uint64_t* out_valid, uint64_t* out_data) {
+      *out_data = left_true & right_false;
+      *out_valid = left_false | right_true | (left_true & right_false);

Review comment:
       `right_false` is derived from both the validity and the value bitmaps; it's true iff a slot was valid&false. To unpack this expression a bit: `and_not_kleene(left, right)` is valid if
   - `left` is false, since `and_kleene(false, x)` is false (even if `x` is null)
   - `right` is true, since `and_kleene(x, not true)` is false (even if `x` is null)
   - `left` is true AND `right` is false, since `and_kleene(true, not false)` is true
   
   I believe the tests exercise the full truth table of and_not_kleene so if this still looks suspicious then I'd look there first.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528768308



##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -272,79 +287,61 @@ class NullPropagator {
     return Status::OK();
   }
 
-  Result<bool> ShortCircuitIfAllNull() {
-    // An all-null value (scalar null or all-null array) gives us a short
-    // circuit opportunity
-    bool is_all_null = false;
-    std::shared_ptr<Buffer> all_null_bitmap;
+  Status AllNullShortCircuit() {
+    // OK, the output should be all null
+    output_->null_count = output_->length;
+
+    if (bitmap_preallocated_) {
+      BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+      return Status::OK();
+    }
 
     // Walk all the values with nulls instead of breaking on the first in case
     // we find a bitmap that can be reused in the non-preallocated case
-    for (const Datum* value : values_with_nulls_) {
-      if (value->type()->id() == Type::NA) {
-        // No bitmap
-        is_all_null = true;
-      } else if (value->kind() == Datum::ARRAY) {
-        const ArrayData& arr = *value->array();
-        if (arr.null_count.load() == arr.length) {
-          // Pluck the all null bitmap so we can set it in the output if it was
-          // not pre-allocated
-          all_null_bitmap = arr.buffers[0];
-          is_all_null = true;
-        }
-      } else {
-        // Scalar
-        is_all_null = !value->scalar()->is_valid;
+    for (const ArrayData* arr : arrays_with_nulls_) {
+      if (arr->null_count.load() == arr->length && arr->buffers[0] != nullptr) {
+        // Reuse this all null bitmap
+        output_->buffers[0] = arr->buffers[0];
+        return Status::OK();
       }
     }
-    if (!is_all_null) {
-      return false;
-    }
 
-    // OK, the output should be all null
-    output_->null_count = output_->length;
-
-    if (!bitmap_preallocated_ && all_null_bitmap) {
-      // If we did not pre-allocate memory, and we observed an all-null bitmap,
-      // then we can zero-copy it into the output
-      output_->buffers[0] = std::move(all_null_bitmap);
-    } else {
-      RETURN_NOT_OK(EnsureAllocated());
-      BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
-    }
-    return true;
+    RETURN_NOT_OK(EnsureAllocated());
+    BitUtil::SetBitsTo(bitmap_, output_->offset, output_->length, false);
+    return Status::OK();
   }
 
   Status PropagateSingle() {
     // One array
-    const ArrayData& arr = *values_with_nulls_[0]->array();
+    const ArrayData& arr = *arrays_with_nulls_[0];
     const std::shared_ptr<Buffer>& arr_bitmap = arr.buffers[0];
 
     // Reuse the null count if it's known
     output_->null_count = arr.null_count.load();
 
     if (bitmap_preallocated_) {
       CopyBitmap(arr_bitmap->data(), arr.offset, arr.length, bitmap_, output_->offset);
+      return Status::OK();
+    }
+
+    // Two cases when memory was not pre-allocated:
+    //
+    // * Offset is zero: we reuse the bitmap as is
+    // * Offset is nonzero but a multiple of 8: we can slice the bitmap
+    // * Offset is not a multiple of 8: we must allocate and use CopyBitmap
+    //
+    // Keep in mind that output_->offset is not permitted to be nonzero when
+    // the bitmap is not preallocated, and that precondition is asserted
+    // higher in the call stack.
+    if (arr.offset == 0) {
+      output_->buffers[0] = arr_bitmap;
+    } else if (arr.offset % 8 == 0) {
+      output_->buffers[0] =
+          SliceBuffer(arr_bitmap, arr.offset / 8, BitUtil::BytesForBits(arr.length));

Review comment:
       Sounds worthwhile but I'd prefer to do it in follow up




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] cyb70289 commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
cyb70289 commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528486641



##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -253,6 +253,33 @@ Result<Datum> KleeneOr(const Datum& left, const Datum& right, ExecContext* ctx =
 ARROW_EXPORT
 Result<Datum> Xor(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);
 
+/// \brief Element-wise AND NOT of two boolean datums which always propagates nulls
+/// (null and not true is null).
+///
+/// \param[in] left left operand
+/// \param[in] right right operand
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 1.0.0

Review comment:
       3.0.0? or 2.0.0?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] github-actions[bot] commented on pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#issuecomment-731442083


   https://issues.apache.org/jira/browse/ARROW-10669


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528767005



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace compute {
 
-using BinaryKernelFunc =
-    std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-  }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+  for (std::shared_ptr<Scalar> scalar :
+       {std::make_shared<BooleanScalar>(), std::make_shared<BooleanScalar>(true),
+        std::make_shared<BooleanScalar>(false)}) {
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar), array}));
 
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-  }
+    ASSERT_OK_AND_ASSIGN(auto constant_array,
+                         MakeArrayFromScalar(*scalar, array.length()));
 
-  void TestBinaryKernel(const BinaryKernelFunc& kernel,
-                        const std::shared_ptr<Array>& left,
-                        const std::shared_ptr<Array>& right,
-                        const std::shared_ptr<Array>& expected) {
-    TestArrayBinary(kernel, left, right, expected);
-    TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1));
-
-    // ChunkedArray
-    auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left, left->Slice(1)});
-    auto cright = std::make_shared<ChunkedArray>(ArrayVector{right, right->Slice(1)});
-    auto cexpected =
-        std::make_shared<ChunkedArray>(ArrayVector{expected, expected->Slice(1)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
-    // ChunkedArray with different chunks
-    cleft = std::make_shared<ChunkedArray>(ArrayVector{
-        left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-  }
+    ASSERT_OK_AND_ASSIGN(Datum expected,
+                         CallFunction(func_name, {Datum(constant_array), array}));
+    AssertDatumsEqual(expected, actual);
 
-  void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
-                                 const std::vector<bool>& left,
-                                 const std::vector<bool>& right,
-                                 const std::vector<bool>& expected,
-                                 const std::vector<bool>& expected_nulls) {
-    auto type = boolean();
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
-                     _MakeArray<BooleanType, bool>(type, right, {}),
-                     _MakeArray<BooleanType, bool>(type, expected, {}));
-
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
-                     _MakeArray<BooleanType, bool>(type, right, right),
-                     _MakeArray<BooleanType, bool>(type, expected, expected_nulls));
+    ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array, Datum(scalar)}));
+    ASSERT_OK_AND_ASSIGN(expected,
+                         CallFunction(func_name, {array, Datum(constant_array)}));
+    AssertDatumsEqual(expected, actual);
   }
-
- protected:
-  ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  std::vector<bool> values1 = {true, false, true, false};
-  std::vector<bool> values2 = {false, true, false, true};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true, false});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true, false});
-
-  // Plain array
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
-  // Array with offset
-  ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
-  // Contiguous preallocation, so a single output chunk even though there were
-  // two input chunks
-  ASSERT_EQ(1, result_ca->num_chunks());
-  AssertChunkedEquivalent(*ca2, *result_ca);
 }
 
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+  auto arr =
+      ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false, null]");
+  auto expected =
+      ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true, null]");
+  CheckScalarUnary("invert", arr, expected);
 }
 
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
-  auto type = boolean();
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
-  // Result should be empty as well.
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,  null]");
+  // CheckScalarBinary("and", left, right, expected);

Review comment:
       Not a good one; I was debugging the `array, scalar` case and wanted to skip this. Will uncomment




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] bkietz commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
bkietz commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528767500



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -427,12 +427,28 @@ void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
 //
 // static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& arg1,
 //                  ArrayData* out)
+// static void Call(KernelContext*, const ArrayData& arg0, const Scalar& arg1,
+//                  ArrayData* out)
+// static void Call(KernelContext*, const Scalar& arg0, const ArrayData& arg1,
+//                  ArrayData* out)
+// static void Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
+//                  Scalar* out)

Review comment:
       That's implicit in the contract of ScalarFunctions though




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on a change in pull request #8728: ARROW-10669: [C++][Compute] Support scalar arguments to Boolean compute functions

Posted by GitBox <gi...@apache.org>.
pitrou commented on a change in pull request #8728:
URL: https://github.com/apache/arrow/pull/8728#discussion_r528768746



##########
File path: cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
##########
@@ -27,185 +27,114 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/util/checked_cast.h"
 
 namespace arrow {
+
+using internal::checked_pointer_cast;
+
 namespace compute {
 
-using BinaryKernelFunc =
-    std::function<Result<Datum>(const Datum&, const Datum&, ExecContext*)>;
-
-class TestBooleanKernel : public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    result_array = result.make_array();
-    ASSERT_OK(result_array->ValidateFull());
-    AssertArraysEqual(*expected, *result_array, /*verbose=*/true);
-  }
+void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) {
+  for (std::shared_ptr<Scalar> scalar :
+       {std::make_shared<BooleanScalar>(), std::make_shared<BooleanScalar>(true),
+        std::make_shared<BooleanScalar>(false)}) {
+    ASSERT_OK_AND_ASSIGN(Datum actual, CallFunction(func_name, {Datum(scalar), array}));
 
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    ASSERT_OK_AND_ASSIGN(Datum result, kernel(left, right, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-
-    ASSERT_OK_AND_ASSIGN(result, kernel(right, left, &ctx_));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    result_ca = result.chunked_array();
-    AssertChunkedEquivalent(*expected, *result_ca);
-  }
+    ASSERT_OK_AND_ASSIGN(auto constant_array,
+                         MakeArrayFromScalar(*scalar, array.length()));
 
-  void TestBinaryKernel(const BinaryKernelFunc& kernel,
-                        const std::shared_ptr<Array>& left,
-                        const std::shared_ptr<Array>& right,
-                        const std::shared_ptr<Array>& expected) {
-    TestArrayBinary(kernel, left, right, expected);
-    TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1));
-
-    // ChunkedArray
-    auto cleft = std::make_shared<ChunkedArray>(ArrayVector{left, left->Slice(1)});
-    auto cright = std::make_shared<ChunkedArray>(ArrayVector{right, right->Slice(1)});
-    auto cexpected =
-        std::make_shared<ChunkedArray>(ArrayVector{expected, expected->Slice(1)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-
-    // ChunkedArray with different chunks
-    cleft = std::make_shared<ChunkedArray>(ArrayVector{
-        left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)});
-    TestChunkedArrayBinary(kernel, cleft, cright, cexpected);
-  }
+    ASSERT_OK_AND_ASSIGN(Datum expected,
+                         CallFunction(func_name, {Datum(constant_array), array}));
+    AssertDatumsEqual(expected, actual);
 
-  void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel,
-                                 const std::vector<bool>& left,
-                                 const std::vector<bool>& right,
-                                 const std::vector<bool>& expected,
-                                 const std::vector<bool>& expected_nulls) {
-    auto type = boolean();
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, {}),
-                     _MakeArray<BooleanType, bool>(type, right, {}),
-                     _MakeArray<BooleanType, bool>(type, expected, {}));
-
-    TestBinaryKernel(kernel, _MakeArray<BooleanType, bool>(type, left, left),
-                     _MakeArray<BooleanType, bool>(type, right, right),
-                     _MakeArray<BooleanType, bool>(type, expected, expected_nulls));
+    ASSERT_OK_AND_ASSIGN(actual, CallFunction(func_name, {array, Datum(scalar)}));
+    ASSERT_OK_AND_ASSIGN(expected,
+                         CallFunction(func_name, {array, Datum(constant_array)}));
+    AssertDatumsEqual(expected, actual);
   }
-
- protected:
-  ExecContext ctx_;
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  std::vector<bool> values1 = {true, false, true, false};
-  std::vector<bool> values2 = {false, true, false, true};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {true, true, true, false});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {true, true, true, false});
-
-  // Plain array
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(a1));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2, *result.make_array());
-
-  // Array with offset
-  ASSERT_OK_AND_ASSIGN(result, Invert(a1->Slice(1)));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  ASSERT_ARRAYS_EQUAL(*a2->Slice(1), *result.make_array());
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK_AND_ASSIGN(result, Invert(ca1));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-
-  // Contiguous preallocation, so a single output chunk even though there were
-  // two input chunks
-  ASSERT_EQ(1, result_ca->num_chunks());
-  AssertChunkedEquivalent(*ca2, *result_ca);
 }
 
-TEST_F(TestBooleanKernel, InvertEmptyArray) {
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, Invert(input));
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, Invert) {
+  auto arr =
+      ArrayFromJSON(boolean(), "[true, false, true, null, false, true, false, null]");
+  auto expected =
+      ArrayFromJSON(boolean(), "[false, true, false, null, true, false, true, null]");
+  CheckScalarUnary("invert", arr, expected);
 }
 
-TEST_F(TestBooleanKernel, BinaryOpOnEmptyArray) {
-  auto type = boolean();
-  std::vector<std::shared_ptr<Buffer>> data_buffers(2);
-  Datum input;
-  input.value = ArrayData::Make(boolean(), 0 /* length */, std::move(data_buffers),
-                                0 /* null_count */);
-
-  ASSERT_OK_AND_ASSIGN(Datum result, And(input, input));
-  // Result should be empty as well.
-  ASSERT_ARRAYS_EQUAL(*input.make_array(), *result.make_array());
+TEST(TestBooleanKernel, And) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, null,  null]");
+  // CheckScalarBinary("and", left, right, expected);
+  CheckBooleanScalarArrayBinary("and", left);
 }
 
-TEST_F(TestBooleanKernel, And) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(And, values1, values2, values3, values3);
+TEST(TestBooleanKernel, Or) {
+  auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[true, true,  null, false, null,  null]");
+  CheckScalarBinary("or", left, right, expected);
+  CheckBooleanScalarArrayBinary("or", left);
 }
 
-TEST_F(TestBooleanKernel, Or) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {true, true, true, false, true, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, Xor) {
+  auto left = ArrayFromJSON(boolean(), "    [true,  true,  true, false, false, null]");
+  auto right = ArrayFromJSON(boolean(), "   [true,  false, null, false, null,  null]");
+  auto expected = ArrayFromJSON(boolean(), "[false, true,  null, false, null,  null]");
+  CheckScalarBinary("xor", left, right, expected);
+  CheckBooleanScalarArrayBinary("xor", left);
 }
 
-TEST_F(TestBooleanKernel, Xor) {
-  std::vector<bool> values1 = {true, false, true, false, true, true};
-  std::vector<bool> values2 = {true, true, false, false, true, false};
-  std::vector<bool> values3 = {false, true, true, false, false, true};
-  std::vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls);
+TEST(TestBooleanKernel, AndNot) {
+  auto left = ArrayFromJSON(
+      boolean(), "[true,  true,  true, false, false, false, null, null,  null]");
+  auto right = ArrayFromJSON(
+      boolean(), "[true,  false, null, true,  false, null,  true, false, null]");
+  auto expected = ArrayFromJSON(
+      boolean(), "[false, true,  null, false, false, null,  null, null,  null]");
+  CheckScalarBinary("and_not", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_not", left);
 }
 
-TEST_F(TestBooleanKernel, KleeneAnd) {
+TEST(TestBooleanKernel, KleeneAnd) {
   auto left = ArrayFromJSON(boolean(), "    [true, true,  true, false, false, null]");
   auto right = ArrayFromJSON(boolean(), "   [true, false, null, false, null,  null]");
   auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false, null]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);
 
   left = ArrayFromJSON(boolean(), "    [true, true,  false, null, null]");
   right = ArrayFromJSON(boolean(), "   [true, false, false, true, false]");
   expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
-  TestBinaryKernel(KleeneAnd, left, right, expected);
+  CheckScalarBinary("and_kleene", left, right, expected);
+  CheckBooleanScalarArrayBinary("and_kleene", left);

Review comment:
       No, I mean you test a set of left / right inputs above, and another set of inputs here. Is there a reason to them separately? (just trying to understand if there is a particular reason...)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org