You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/08/25 11:04:47 UTC

[arrow] branch master updated: ARROW-1563: [C++] Implement logical unary and binary kernels for boolean arrays

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 588c1fa  ARROW-1563: [C++] Implement logical unary and binary kernels for boolean arrays
588c1fa is described below

commit 588c1faadf67e444817bd3accd8e5766eb0cf89a
Author: Korn, Uwe <Uw...@blue-yonder.com>
AuthorDate: Sat Aug 25 13:04:39 2018 +0200

    ARROW-1563: [C++] Implement logical unary and binary kernels for boolean arrays
    
    Author: Korn, Uwe <Uw...@blue-yonder.com>
    
    Closes #2461 from xhochy/ARROW-1563 and squashes the following commits:
    
    da0a47a8 <Korn, Uwe> Final review comments
    b86c5bf9 <Korn, Uwe> Even less repetition
    941135f2 <Korn, Uwe> DRY and clang-format
    c6de1146 <Korn, Uwe> ARROW-1563: Add (x)or kernels
    f0d229c7 <Korn, Uwe> ARROW-1563: Add AND kernel
    5eab2fce <Korn, Uwe> ARROW-1563: Implement Invert kernel
---
 cpp/src/arrow/CMakeLists.txt                   |   1 +
 cpp/src/arrow/compute/compute-test.cc          | 177 +++++++++++++++++++++++++
 cpp/src/arrow/compute/kernel.h                 |  12 ++
 cpp/src/arrow/compute/kernels/CMakeLists.txt   |   1 +
 cpp/src/arrow/compute/kernels/boolean.cc       | 150 +++++++++++++++++++++
 cpp/src/arrow/compute/kernels/boolean.h        |  83 ++++++++++++
 cpp/src/arrow/compute/kernels/util-internal.cc |  80 +++++++++++
 cpp/src/arrow/compute/kernels/util-internal.h  |   6 +
 cpp/src/arrow/table.cc                         |   3 +
 cpp/src/arrow/table.h                          |   1 +
 cpp/src/arrow/util/bit-util-test.cc            | 125 +++++++++++++----
 cpp/src/arrow/util/bit-util.cc                 |  90 ++++++++++---
 cpp/src/arrow/util/bit-util.h                  |  23 ++++
 13 files changed, 703 insertions(+), 49 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d96580e..1f0a625 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -56,6 +56,7 @@ if (ARROW_COMPUTE)
   add_subdirectory(compute)
   set(ARROW_SRCS ${ARROW_SRCS}
     compute/context.cc
+    compute/kernels/boolean.cc
     compute/kernels/cast.cc
     compute/kernels/hash.cc
     compute/kernels/util-internal.cc
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 269b9dd..39f5357 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -40,8 +40,10 @@
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/boolean.h"
 #include "arrow/compute/kernels/cast.h"
 #include "arrow/compute/kernels/hash.h"
+#include "arrow/compute/kernels/util-internal.h"
 
 using std::shared_ptr;
 using std::vector;
@@ -1285,5 +1287,180 @@ TEST_F(TestHashKernel, ChunkedArrayInvoke) {
   ASSERT_TRUE(encoded_out.chunked_array()->Equals(*dict_carr));
 }
 
+struct KernelFunc {
+  virtual Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+                      Datum* out) const = 0;
+
+  virtual ~KernelFunc() = default;
+};
+
+struct AndKernelFunc : KernelFunc {
+  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+              Datum* out) const override {
+    return And(ctx, left, right, out);
+  }
+};
+
+struct OrKernelFunc : KernelFunc {
+  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+              Datum* out) const override {
+    return Or(ctx, left, right, out);
+  }
+};
+
+struct XorKernelFunc : KernelFunc {
+  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+              Datum* out) const override {
+    return Xor(ctx, left, right, out);
+  }
+};
+
+class TestBooleanKernel : public ComputeFixture, public TestBase {
+ public:
+  void TestArrayBinary(const KernelFunc& kernel, const std::shared_ptr<Array>& left,
+                       const std::shared_ptr<Array>& right,
+                       const std::shared_ptr<Array>& expected) {
+    Datum result;
+    ASSERT_OK(kernel.Call(&this->ctx_, Datum(left), Datum(right), &result));
+    ASSERT_EQ(Datum::ARRAY, result.kind());
+    std::shared_ptr<Array> result_array = result.make_array();
+    ASSERT_TRUE(result_array->Equals(expected));
+  }
+
+  void TestChunkedArrayBinary(const KernelFunc& kernel,
+                              const std::shared_ptr<ChunkedArray>& left,
+                              const std::shared_ptr<ChunkedArray>& right,
+                              const std::shared_ptr<ChunkedArray>& expected) {
+    Datum result;
+    std::shared_ptr<Array> result_array;
+    ASSERT_OK(kernel.Call(&this->ctx_, Datum(left), Datum(right), &result));
+    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
+    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
+    ASSERT_TRUE(result_ca->Equals(expected));
+  }
+
+  void TestBinaryKernel(const KernelFunc& kernel, const std::vector<bool>& values1,
+                        const std::vector<bool>& values2,
+                        const std::vector<bool>& values3,
+                        const std::vector<bool>& values3_nulls) {
+    auto type = boolean();
+    auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+    auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+    auto a3 = _MakeArray<BooleanType, bool>(type, values3, {});
+    auto a1_nulls = _MakeArray<BooleanType, bool>(type, values1, values1);
+    auto a2_nulls = _MakeArray<BooleanType, bool>(type, values2, values2);
+    auto a3_nulls = _MakeArray<BooleanType, bool>(type, values3, values3_nulls);
+
+    TestArrayBinary(kernel, a1, a2, a3);
+    TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls);
+    TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1));
+    TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1));
+
+    // ChunkedArray
+    std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
+    auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
+    std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
+    auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
+    std::vector<std::shared_ptr<Array>> ca3_arrs = {a3, a3->Slice(1)};
+    auto ca3 = std::make_shared<ChunkedArray>(ca3_arrs);
+    TestChunkedArrayBinary(kernel, ca1, ca2, ca3);
+
+    // ChunkedArray with different chunks
+    std::vector<std::shared_ptr<Array>> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1),
+                                                    a1->Slice(1, 1), a1->Slice(2)};
+    auto ca4 = std::make_shared<ChunkedArray>(ca4_arrs);
+    TestChunkedArrayBinary(kernel, ca4, ca2, ca3);
+  }
+};
+
+TEST_F(TestBooleanKernel, Invert) {
+  vector<bool> values1 = {true, false, true};
+  vector<bool> values2 = {false, true, false};
+
+  auto type = boolean();
+  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+
+  // Plain array
+  Datum result;
+  ASSERT_OK(Invert(&this->ctx_, Datum(a1), &result));
+  ASSERT_EQ(Datum::ARRAY, result.kind());
+  std::shared_ptr<Array> result_array = result.make_array();
+  ASSERT_TRUE(result_array->Equals(a2));
+
+  // Array with offset
+  ASSERT_OK(Invert(&this->ctx_, Datum(a1->Slice(1)), &result));
+  ASSERT_EQ(Datum::ARRAY, result.kind());
+  result_array = result.make_array();
+  ASSERT_TRUE(result_array->Equals(a2->Slice(1)));
+
+  // ChunkedArray
+  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
+  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
+  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
+  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
+  ASSERT_OK(Invert(&this->ctx_, Datum(ca1), &result));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
+  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
+  ASSERT_TRUE(result_ca->Equals(ca2));
+}
+
+TEST_F(TestBooleanKernel, And) {
+  AndKernelFunc kernel;
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {true, false, false, false, true, false};
+  TestBinaryKernel(kernel, values1, values2, values3, values3);
+}
+
+TEST_F(TestBooleanKernel, Or) {
+  OrKernelFunc kernel;
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {true, true, true, false, true, true};
+  vector<bool> values3_nulls = {true, false, false, false, true, false};
+  TestBinaryKernel(kernel, values1, values2, values3, values3_nulls);
+}
+
+TEST_F(TestBooleanKernel, Xor) {
+  XorKernelFunc kernel;
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {false, true, true, false, false, true};
+  vector<bool> values3_nulls = {true, false, false, false, true, false};
+  TestBinaryKernel(kernel, values1, values2, values3, values3_nulls);
+}
+
+class TestInvokeBinaryKernel : public ComputeFixture, public TestBase {};
+
+class DummyBinaryKernel : public BinaryKernel {
+  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+              Datum* out) override {
+    return Status::OK();
+  }
+};
+
+TEST_F(TestInvokeBinaryKernel, Exceptions) {
+  DummyBinaryKernel kernel;
+  std::vector<Datum> outputs;
+  std::shared_ptr<Table> table;
+  vector<bool> values1 = {true, false, true};
+  vector<bool> values2 = {false, true, false};
+
+  auto type = boolean();
+  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+
+  // Left is not an array-like
+  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(
+                             &this->ctx_, &kernel, Datum(table), Datum(a2), &outputs));
+  // Right is not an array-like
+  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, Datum(a1),
+                                                         Datum(table), &outputs));
+  // Different sized inputs
+  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, Datum(a1),
+                                                         Datum(a1->Slice(1)), &outputs));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 3691f4a..fd596f1 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -103,6 +103,10 @@ struct ARROW_EXPORT Datum {
     return util::get<std::shared_ptr<ArrayData>>(this->value);
   }
 
+  std::shared_ptr<Array> make_array() const {
+    return MakeArray(util::get<std::shared_ptr<ArrayData>>(this->value));
+  }
+
   std::shared_ptr<ChunkedArray> chunked_array() const {
     return util::get<std::shared_ptr<ChunkedArray>>(this->value);
   }
@@ -135,6 +139,14 @@ class ARROW_EXPORT UnaryKernel : public OpKernel {
   virtual Status Call(FunctionContext* ctx, const Datum& input, Datum* out) = 0;
 };
 
+/// \class BinaryKernel
+/// \brief An array-valued function of a two input arguments
+class ARROW_EXPORT BinaryKernel : public OpKernel {
+ public:
+  virtual Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+                      Datum* out) = 0;
+};
+
 }  // namespace compute
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 715e6c6..923c8c3 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -16,6 +16,7 @@
 # under the License.
 
 install(FILES
+  boolean.h
   cast.h
   hash.h
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/compute/kernels")
diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc
new file mode 100644
index 0000000..64853be
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/boolean.cc
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/boolean.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernels/util-internal.h"
+#include "arrow/util/logging.h"
+
+#include <vector>
+
+namespace arrow {
+namespace compute {
+
+class InvertKernel : public UnaryKernel {
+  Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
+    DCHECK_EQ(Datum::ARRAY, input.kind());
+
+    const ArrayData& in_data = *input.array();
+    ArrayData* result;
+
+    out->value = ArrayData::Make(boolean(), in_data.length);
+    result = out->array().get();
+
+    // Allocate or copy bitmap
+    result->null_count = in_data.null_count;
+    std::shared_ptr<Buffer> validity_bitmap = in_data.buffers[0];
+    if (in_data.offset != 0) {
+      RETURN_NOT_OK(CopyBitmap(ctx->memory_pool(), validity_bitmap->data(),
+                               in_data.offset, in_data.length, &validity_bitmap));
+    }
+    result->buffers.push_back(validity_bitmap);
+
+    // Allocate output data buffer
+    std::shared_ptr<Buffer> data_buffer;
+    RETURN_NOT_OK(InvertBitmap(ctx->memory_pool(), in_data.buffers[1]->data(),
+                               in_data.offset, in_data.length, &data_buffer));
+    result->buffers.push_back(data_buffer);
+
+    return Status::OK();
+  }
+};
+
+Status Invert(FunctionContext* ctx, const Datum& value, Datum* out) {
+  InvertKernel kernel;
+
+  std::vector<Datum> result;
+  RETURN_NOT_OK(detail::InvokeUnaryArrayKernel(ctx, &kernel, value, &result));
+
+  *out = detail::WrapDatumsLike(value, result);
+  return Status::OK();
+}
+
+class BinaryBooleanKernel : public BinaryKernel {
+  virtual Status Compute(FunctionContext* ctx, const ArrayData& left,
+                         const ArrayData& right, ArrayData* out) = 0;
+
+  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
+              Datum* out) override {
+    DCHECK_EQ(Datum::ARRAY, right.kind());
+    DCHECK_EQ(Datum::ARRAY, left.kind());
+
+    const ArrayData& left_data = *left.array();
+    const ArrayData& right_data = *right.array();
+    ArrayData* result;
+    out->value = ArrayData::Make(boolean(), right_data.length);
+
+    result = out->array().get();
+
+    // If one of the arrays has a null value, the result will have a null.
+    std::shared_ptr<Buffer> validity_bitmap;
+    RETURN_NOT_OK(BitmapAnd(ctx->memory_pool(), left_data.buffers[0]->data(),
+                            left_data.offset, right_data.buffers[0]->data(),
+                            right_data.offset, right_data.length, 0, &validity_bitmap));
+    result->buffers.push_back(validity_bitmap);
+
+    result->null_count =
+        result->length - CountSetBits(validity_bitmap->data(), 0, result->length);
+
+    return Compute(ctx, left_data, right_data, result);
+  }
+};
+
+class AndKernel : public BinaryBooleanKernel {
+  Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right,
+                 ArrayData* out) override {
+    std::shared_ptr<Buffer> data_bitmap;
+    RETURN_NOT_OK(BitmapAnd(ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                            right.buffers[1]->data(), right.offset, right.length, 0,
+                            &data_bitmap));
+    out->buffers.push_back(data_bitmap);
+    return Status::OK();
+  }
+};
+
+Status And(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) {
+  AndKernel kernel;
+  return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
+}
+
+class OrKernel : public BinaryBooleanKernel {
+  Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right,
+                 ArrayData* out) override {
+    std::shared_ptr<Buffer> data_bitmap;
+    RETURN_NOT_OK(BitmapOr(ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                           right.buffers[1]->data(), right.offset, right.length, 0,
+                           &data_bitmap));
+    out->buffers.push_back(data_bitmap);
+    return Status::OK();
+  }
+};
+
+Status Or(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) {
+  OrKernel kernel;
+  return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
+}
+
+class XorKernel : public BinaryBooleanKernel {
+  Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right,
+                 ArrayData* out) override {
+    std::shared_ptr<Buffer> data_bitmap;
+    RETURN_NOT_OK(BitmapXor(ctx->memory_pool(), left.buffers[1]->data(), left.offset,
+                            right.buffers[1]->data(), right.offset, right.length, 0,
+                            &data_bitmap));
+    out->buffers.push_back(data_bitmap);
+    return Status::OK();
+  }
+};
+
+Status Xor(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) {
+  XorKernel kernel;
+  return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/boolean.h b/cpp/src/arrow/compute/kernels/boolean.h
new file mode 100644
index 0000000..4c4072c
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/boolean.h
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_COMPUTE_KERNELS_BOOLEAN_H
+#define ARROW_COMPUTE_KERNELS_BOOLEAN_H
+
+#include <memory>
+
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+#include "arrow/compute/kernel.h"
+
+namespace arrow {
+
+class Array;
+class ChunkedArray;
+class Column;
+class DataType;
+
+namespace compute {
+
+/// \brief Invert the values of a boolean datum
+/// \param[in] context the FunctionContext
+/// \param[in] value datum to invert
+/// \param[out] out resulting datum
+///
+/// \since 0.11.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Status Invert(FunctionContext* context, const Datum& value, Datum* out);
+
+/// \brief Element-wise AND of two boolean dates
+/// \param[in] context the FunctionContext
+/// \param[in] left left operand (array)
+/// \param[in] right right operand (array)
+/// \param[out] out resulting datum
+///
+/// \since 0.11.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Status And(FunctionContext* context, const Datum& left, const Datum& right, Datum* out);
+
+/// \brief Element-wise OR of two boolean dates
+/// \param[in] context the FunctionContext
+/// \param[in] left left operand (array)
+/// \param[in] right right operand (array)
+/// \param[out] out resulting datum
+///
+/// \since 0.11.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Status Or(FunctionContext* context, const Datum& left, const Datum& right, Datum* out);
+
+/// \brief Element-wise XOR of two boolean dates
+/// \param[in] context the FunctionContext
+/// \param[in] left left operand (array)
+/// \param[in] right right operand (array)
+/// \param[out] out resulting datum
+///
+/// \since 0.11.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Status Xor(FunctionContext* context, const Datum& left, const Datum& right, Datum* out);
+
+}  // namespace compute
+}  // namespace arrow
+
+#endif  // ARROW_COMPUTE_KERNELS_CAST_H
diff --git a/cpp/src/arrow/compute/kernels/util-internal.cc b/cpp/src/arrow/compute/kernels/util-internal.cc
index 0734365..c16e7aa 100644
--- a/cpp/src/arrow/compute/kernels/util-internal.cc
+++ b/cpp/src/arrow/compute/kernels/util-internal.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/compute/kernels/util-internal.h"
 
+#include <algorithm>
 #include <memory>
 #include <vector>
 
@@ -50,6 +51,85 @@ Status InvokeUnaryArrayKernel(FunctionContext* ctx, UnaryKernel* kernel,
   return Status::OK();
 }
 
+Status InvokeBinaryArrayKernel(FunctionContext* ctx, BinaryKernel* kernel,
+                               const Datum& left, const Datum& right,
+                               std::vector<Datum>* outputs) {
+  int64_t left_length;
+  std::vector<std::shared_ptr<Array>> left_arrays;
+  if (left.kind() == Datum::ARRAY) {
+    left_length = left.array()->length;
+    left_arrays.push_back(left.make_array());
+  } else if (left.kind() == Datum::CHUNKED_ARRAY) {
+    left_length = left.chunked_array()->length();
+    left_arrays = left.chunked_array()->chunks();
+  } else {
+    return Status::Invalid("Left input Datum was not array-like");
+  }
+
+  int64_t right_length;
+  std::vector<std::shared_ptr<Array>> right_arrays;
+  if (right.kind() == Datum::ARRAY) {
+    right_length = right.array()->length;
+    right_arrays.push_back(right.make_array());
+  } else if (right.kind() == Datum::CHUNKED_ARRAY) {
+    right_length = right.chunked_array()->length();
+    right_arrays = right.chunked_array()->chunks();
+  } else {
+    return Status::Invalid("Right input Datum was not array-like");
+  }
+
+  if (right_length != left_length) {
+    return Status::Invalid("Right and left have different lengths");
+  }
+
+  // TODO: Remove duplication with ChunkedArray::Equals
+  int left_chunk_idx = 0;
+  int64_t left_start_idx = 0;
+  int right_chunk_idx = 0;
+  int64_t right_start_idx = 0;
+
+  int64_t elements_compared = 0;
+  while (elements_compared < left_length) {
+    const std::shared_ptr<Array> left_array = left_arrays[left_chunk_idx];
+    const std::shared_ptr<Array> right_array = right_arrays[right_chunk_idx];
+    int64_t common_length = std::min(left_array->length() - left_start_idx,
+                                     right_array->length() - right_start_idx);
+
+    std::shared_ptr<Array> left_op = left_array->Slice(left_start_idx, common_length);
+    std::shared_ptr<Array> right_op = right_array->Slice(right_start_idx, common_length);
+    Datum output;
+    RETURN_NOT_OK(kernel->Call(ctx, Datum(left_op), Datum(right_op), &output));
+    outputs->push_back(output);
+
+    elements_compared += common_length;
+
+    // If we have exhausted the current chunk, proceed to the next one individually.
+    if (left_start_idx + common_length == left_array->length()) {
+      left_chunk_idx++;
+      left_start_idx = 0;
+    } else {
+      left_start_idx += common_length;
+    }
+
+    if (right_start_idx + common_length == right_array->length()) {
+      right_chunk_idx++;
+      right_start_idx = 0;
+    } else {
+      right_start_idx += common_length;
+    }
+  }
+
+  return Status::OK();
+}
+
+Status InvokeBinaryArrayKernel(FunctionContext* ctx, BinaryKernel* kernel,
+                               const Datum& left, const Datum& right, Datum* output) {
+  std::vector<Datum> result;
+  RETURN_NOT_OK(InvokeBinaryArrayKernel(ctx, kernel, left, right, &result));
+  *output = detail::WrapDatumsLike(left, result);
+  return Status::OK();
+}
+
 Datum WrapArraysLike(const Datum& value,
                      const std::vector<std::shared_ptr<Array>>& arrays) {
   // Create right kind of datum
diff --git a/cpp/src/arrow/compute/kernels/util-internal.h b/cpp/src/arrow/compute/kernels/util-internal.h
index acfcb4b..8c4b869 100644
--- a/cpp/src/arrow/compute/kernels/util-internal.h
+++ b/cpp/src/arrow/compute/kernels/util-internal.h
@@ -52,6 +52,12 @@ namespace detail {
 Status InvokeUnaryArrayKernel(FunctionContext* ctx, UnaryKernel* kernel,
                               const Datum& value, std::vector<Datum>* outputs);
 
+Status InvokeBinaryArrayKernel(FunctionContext* ctx, BinaryKernel* kernel,
+                               const Datum& left, const Datum& right,
+                               std::vector<Datum>* outputs);
+Status InvokeBinaryArrayKernel(FunctionContext* ctx, BinaryKernel* kernel,
+                               const Datum& left, const Datum& right, Datum* output);
+
 Datum WrapArraysLike(const Datum& value,
                      const std::vector<std::shared_ptr<Array>>& arrays);
 
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index bb58503..9919085 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -191,6 +191,9 @@ Column::Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>
 Column::Column(const std::string& name, const std::shared_ptr<Array>& data)
     : Column(::arrow::field(name, data->type()), data) {}
 
+Column::Column(const std::string& name, const std::shared_ptr<ChunkedArray>& data)
+    : Column(::arrow::field(name, data->type()), data) {}
+
 Column::Column(const std::shared_ptr<Field>& field,
                const std::shared_ptr<ChunkedArray>& data)
     : field_(field), data_(data) {}
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 43c51a7..f80cf29 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -103,6 +103,7 @@ class ARROW_EXPORT Column {
 
   // Construct from name and array
   Column(const std::string& name, const std::shared_ptr<Array>& data);
+  Column(const std::string& name, const std::shared_ptr<ChunkedArray>& data);
 
   int64_t length() const { return data_->length(); }
 
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index adb09e1..48b4ff1 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -31,6 +31,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/memory_pool.h"
+#include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/util/bit-stream-utils.h"
 #include "arrow/util/bit-util.h"
@@ -350,49 +351,117 @@ TYPED_TEST(TestGenerateBits, NormalOperation) {
   }
 }
 
-TEST(BitmapAnd, Aligned) {
-  std::shared_ptr<Buffer> left, right, out;
-  int64_t length;
+struct BitmapOperation {
+  virtual Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                      const uint8_t* right, int64_t right_offset, int64_t length,
+                      int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const = 0;
 
-  for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120}) {
-    BitmapFromVector({0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, left_offset, &left,
-                     &length);
-    for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) {
-      BitmapFromVector({0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}, right_offset, &right,
-                       &length);
-      for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) {
-        ASSERT_OK(BitmapAnd(default_memory_pool(), left->mutable_data(), left_offset,
+  virtual ~BitmapOperation() = default;
+};
+
+struct BitmapAndOp : public BitmapOperation {
+  Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+              const uint8_t* right, int64_t right_offset, int64_t length,
+              int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override {
+    return BitmapAnd(pool, left, left_offset, right, right_offset, length, out_offset,
+                     out_buffer);
+  }
+};
+
+struct BitmapOrOp : public BitmapOperation {
+  Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+              const uint8_t* right, int64_t right_offset, int64_t length,
+              int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override {
+    return BitmapOr(pool, left, left_offset, right, right_offset, length, out_offset,
+                    out_buffer);
+  }
+};
+
+struct BitmapXorOp : public BitmapOperation {
+  Status Call(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+              const uint8_t* right, int64_t right_offset, int64_t length,
+              int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) const override {
+    return BitmapXor(pool, left, left_offset, right, right_offset, length, out_offset,
+                     out_buffer);
+  }
+};
+
+class BitmapOp : public TestBase {
+ public:
+  void TestAligned(const BitmapOperation& op, const std::vector<int>& left_bits,
+                   const std::vector<int>& right_bits,
+                   const std::vector<int>& result_bits) {
+    std::shared_ptr<Buffer> left, right, out;
+    int64_t length;
+
+    for (int64_t left_offset : {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120}) {
+      BitmapFromVector(left_bits, left_offset, &left, &length);
+      for (int64_t right_offset : {left_offset, left_offset + 8, left_offset + 40}) {
+        BitmapFromVector(right_bits, right_offset, &right, &length);
+        for (int64_t out_offset : {left_offset, left_offset + 16, left_offset + 24}) {
+          ASSERT_OK(op.Call(default_memory_pool(), left->mutable_data(), left_offset,
                             right->mutable_data(), right_offset, length, out_offset,
                             &out));
-        auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
-        ASSERT_READER_VALUES(reader, {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0});
+          auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
+          ASSERT_READER_VALUES(reader, result_bits);
+        }
       }
     }
   }
-}
 
-TEST(BitmapAnd, Unaligned) {
-  std::shared_ptr<Buffer> left, right, out;
-  int64_t length;
-  auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120};
+  void TestUnaligned(const BitmapOperation& op, const std::vector<int>& left_bits,
+                     const std::vector<int>& right_bits,
+                     const std::vector<int>& result_bits) {
+    std::shared_ptr<Buffer> left, right, out;
+    int64_t length;
+    auto offset_values = {0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120};
 
-  for (int64_t left_offset : offset_values) {
-    BitmapFromVector({0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, left_offset, &left,
-                     &length);
+    for (int64_t left_offset : offset_values) {
+      BitmapFromVector(left_bits, left_offset, &left, &length);
 
-    for (int64_t right_offset : offset_values) {
-      BitmapFromVector({0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}, right_offset, &right,
-                       &length);
+      for (int64_t right_offset : offset_values) {
+        BitmapFromVector(right_bits, right_offset, &right, &length);
 
-      for (int64_t out_offset : offset_values) {
-        ASSERT_OK(BitmapAnd(default_memory_pool(), left->mutable_data(), left_offset,
+        for (int64_t out_offset : offset_values) {
+          ASSERT_OK(op.Call(default_memory_pool(), left->mutable_data(), left_offset,
                             right->mutable_data(), right_offset, length, out_offset,
                             &out));
-        auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
-        ASSERT_READER_VALUES(reader, {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0});
+          auto reader = internal::BitmapReader(out->mutable_data(), out_offset, length);
+          ASSERT_READER_VALUES(reader, result_bits);
+        }
       }
     }
   }
+};
+
+TEST_F(BitmapOp, And) {
+  BitmapAndOp op;
+  std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
+  std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
+  std::vector<int> result = {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0};
+
+  TestAligned(op, left, right, result);
+  TestUnaligned(op, left, right, result);
+}
+
+TEST_F(BitmapOp, Or) {
+  BitmapOrOp op;
+  std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0};
+  std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
+  std::vector<int> result = {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0};
+
+  TestAligned(op, left, right, result);
+  TestUnaligned(op, left, right, result);
+}
+
+TEST_F(BitmapOp, XorAligned) {
+  BitmapXorOp op;
+  std::vector<int> left = {0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1};
+  std::vector<int> right = {0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0};
+  std::vector<int> result = {0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1};
+
+  TestAligned(op, left, right, result);
+  TestUnaligned(op, left, right, result);
 }
 
 static inline int64_t SlowCountBits(const uint8_t* data, int64_t bit_offset,
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index 0d2929e..cd3e6d1 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -25,6 +25,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <functional>
 #include <vector>
 
 #include "arrow/buffer.h"
@@ -106,8 +107,9 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) {
   return count;
 }
 
-Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length,
-                  std::shared_ptr<Buffer>* out) {
+template <bool invert_bits>
+Status TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset,
+                      int64_t length, std::shared_ptr<Buffer>* out) {
   std::shared_ptr<Buffer> buffer;
   RETURN_NOT_OK(AllocateEmptyBitmap(pool, length, &buffer));
   uint8_t* dest = buffer->mutable_data();
@@ -129,12 +131,22 @@ Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t
     int64_t i = num_bytes - 1;
     while (i + 1 > 0) {
       uint8_t cur_byte = data[byte_offset + i];
-      dest[i] = static_cast<uint8_t>((cur_byte >> bit_offset) | carry);
+      if (invert_bits) {
+        dest[i] = static_cast<uint8_t>(~((cur_byte >> bit_offset) | carry));
+      } else {
+        dest[i] = static_cast<uint8_t>((cur_byte >> bit_offset) | carry);
+      }
       carry = (cur_byte & carry_mask) << carry_shift;
       --i;
     }
   } else {
-    std::memcpy(dest, data + byte_offset, static_cast<size_t>(num_bytes));
+    if (invert_bits) {
+      for (int64_t i = 0; i < num_bytes; i++) {
+        dest[i] = static_cast<uint8_t>(~(data[byte_offset + i]));
+      }
+    } else {
+      std::memcpy(dest, data + byte_offset, static_cast<size_t>(num_bytes));
+    }
   }
 
   for (int64_t i = length; i < length + bits_to_zero; ++i) {
@@ -146,6 +158,16 @@ Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t
   return Status::OK();
 }
 
+Status CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length,
+                  std::shared_ptr<Buffer>* out) {
+  return TransferBitmap<false>(pool, data, offset, length, out);
+}
+
+Status InvertBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length,
+                    std::shared_ptr<Buffer>* out) {
+  return TransferBitmap<true>(pool, data, offset, length, out);
+}
+
 bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
                   int64_t right_offset, int64_t bit_length) {
   if (left_offset % 8 == 0 && right_offset % 8 == 0) {
@@ -176,9 +198,11 @@ bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right
 
 namespace {
 
-void AlignedBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
-                      int64_t right_offset, uint8_t* out, int64_t out_offset,
-                      int64_t length) {
+template <typename Op>
+void AlignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                     int64_t right_offset, uint8_t* out, int64_t out_offset,
+                     int64_t length) {
+  Op op;
   DCHECK_EQ(left_offset % 8, right_offset % 8);
   DCHECK_EQ(left_offset % 8, out_offset % 8);
 
@@ -187,18 +211,20 @@ void AlignedBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* r
   right += right_offset / 8;
   out += out_offset / 8;
   for (int64_t i = 0; i < nbytes; ++i) {
-    out[i] = left[i] & right[i];
+    out[i] = op(left[i], right[i]);
   }
 }
 
-void UnalignedBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
-                        int64_t right_offset, uint8_t* out, int64_t out_offset,
-                        int64_t length) {
+template <typename Op>
+void UnalignedBitmapOp(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                       int64_t right_offset, uint8_t* out, int64_t out_offset,
+                       int64_t length) {
+  Op op;
   auto left_reader = internal::BitmapReader(left, left_offset, length);
   auto right_reader = internal::BitmapReader(right, right_offset, length);
   auto writer = internal::BitmapWriter(out, out_offset, length);
   for (int64_t i = 0; i < length; ++i) {
-    if (left_reader.IsSet() && right_reader.IsSet()) {
+    if (op(left_reader.IsSet(), right_reader.IsSet())) {
       writer.Set();
     }
     left_reader.Next();
@@ -208,24 +234,46 @@ void UnalignedBitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t*
   writer.Finish();
 }
 
-}  // namespace
-
-Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
-                 const uint8_t* right, int64_t right_offset, int64_t length,
-                 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
+template <typename BitOp, typename LogicalOp>
+Status BitmapOp(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                const uint8_t* right, int64_t right_offset, int64_t length,
+                int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
   if ((out_offset % 8 == left_offset % 8) && (out_offset % 8 == right_offset % 8)) {
     // Fast case: can use bytewise AND
     const int64_t phys_bits = length + out_offset;
     RETURN_NOT_OK(AllocateEmptyBitmap(pool, phys_bits, out_buffer));
-    AlignedBitmapAnd(left, left_offset, right, right_offset,
-                     (*out_buffer)->mutable_data(), out_offset, length);
+    AlignedBitmapOp<BitOp>(left, left_offset, right, right_offset,
+                           (*out_buffer)->mutable_data(), out_offset, length);
   } else {
     // Unaligned
     RETURN_NOT_OK(AllocateEmptyBitmap(pool, length + out_offset, out_buffer));
-    UnalignedBitmapAnd(left, left_offset, right, right_offset,
-                       (*out_buffer)->mutable_data(), out_offset, length);
+    UnalignedBitmapOp<LogicalOp>(left, left_offset, right, right_offset,
+                                 (*out_buffer)->mutable_data(), out_offset, length);
   }
   return Status::OK();
 }
 
+}  // namespace
+
+Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                 const uint8_t* right, int64_t right_offset, int64_t length,
+                 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
+  return BitmapOp<std::bit_and<uint8_t>, std::logical_and<bool>>(
+      pool, left, left_offset, right, right_offset, length, out_offset, out_buffer);
+}
+
+Status BitmapOr(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                const uint8_t* right, int64_t right_offset, int64_t length,
+                int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
+  return BitmapOp<std::bit_or<uint8_t>, std::logical_or<bool>>(
+      pool, left, left_offset, right, right_offset, length, out_offset, out_buffer);
+}
+
+Status BitmapXor(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                 const uint8_t* right, int64_t right_offset, int64_t length,
+                 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer) {
+  return BitmapOp<std::bit_xor<uint8_t>, std::bit_xor<bool>>(
+      pool, left, left_offset, right, right_offset, length, out_offset, out_buffer);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index d36ea5d..4696a8c 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -570,6 +570,19 @@ ARROW_EXPORT
 Status CopyBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, int64_t length,
                   std::shared_ptr<Buffer>* out);
 
+/// Invert a bit range of an existing bitmap
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+/// \param[out] out the resulting copy
+///
+/// \return Status message
+ARROW_EXPORT
+Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset,
+                    int64_t length, std::shared_ptr<Buffer>* out);
+
 /// Compute the number of 1's in the given data array
 ///
 /// \param[in] data a packed LSB-ordered bitmap as a byte array
@@ -589,6 +602,16 @@ Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
                  const uint8_t* right, int64_t right_offset, int64_t length,
                  int64_t out_offset, std::shared_ptr<Buffer>* out_buffer);
 
+ARROW_EXPORT
+Status BitmapOr(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                const uint8_t* right, int64_t right_offset, int64_t length,
+                int64_t out_offset, std::shared_ptr<Buffer>* out_buffer);
+
+ARROW_EXPORT
+Status BitmapXor(MemoryPool* pool, const uint8_t* left, int64_t left_offset,
+                 const uint8_t* right, int64_t right_offset, int64_t length,
+                 int64_t out_offset, std::shared_ptr<Buffer>* out_buffer);
+
 }  // namespace arrow
 
 #endif  // ARROW_UTIL_BIT_UTIL_H