You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/12/18 15:43:47 UTC
[arrow] branch master updated: ARROW-3387: [C++] Implement Binary to String cast

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e832df3  ARROW-3387: [C++] Implement Binary to String cast
e832df3 is described below

commit e832df36c2d44d02273de851db3cfcd8c231f479
Author: François Saint-Jacques <fs...@gmail.com>
AuthorDate: Tue Dec 18 16:43:39 2018 +0100

    ARROW-3387: [C++] Implement Binary to String cast
    
    Author: François Saint-Jacques <fs...@gmail.com>
    
    Closes #3211 from fsaintjacques/ARROW-3387-cast-binary-to-string and squashes the following commits:
    
    16cdb8ef <François Saint-Jacques> ARROE-3387:  clang-format
    1949d377 <François Saint-Jacques> ARROW-3387:  Address review comments
    31092b9f <François Saint-Jacques> ARROW-3387:  Implement Binary to String cast
    f045d64f <François Saint-Jacques> ARROW-3387:  Partition compute-test.cc in separate files
    5358148e <François Saint-Jacques> ARROW-3387:  Rename CopyData to ZeroCopyData
---
 cpp/src/arrow/compute/compute-test.cc              | 1551 +-------------------
 cpp/src/arrow/compute/kernels/CMakeLists.txt       |    4 +
 cpp/src/arrow/compute/kernels/boolean-test.cc      |  157 ++
 .../{compute-test.cc => kernels/cast-test.cc}      |  510 +------
 cpp/src/arrow/compute/kernels/cast.cc              |  106 +-
 cpp/src/arrow/compute/kernels/cast.h               |    9 +-
 cpp/src/arrow/compute/kernels/hash-test.cc         |  344 +++++
 cpp/src/arrow/compute/kernels/util-internal.h      |    4 +-
 cpp/src/arrow/compute/test-util.h                  |   57 +
 cpp/src/arrow/util/utf8.h                          |    8 +
 10 files changed, 703 insertions(+), 2047 deletions(-)

diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index e34a086..8129441 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -39,10 +39,8 @@
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
-#include "arrow/compute/kernels/boolean.h"
-#include "arrow/compute/kernels/cast.h"
-#include "arrow/compute/kernels/hash.h"
 #include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
 
 using std::shared_ptr;
 using std::vector;
@@ -50,26 +48,6 @@ using std::vector;
 namespace arrow {
 namespace compute {
 
-class ComputeFixture {
- public:
-  ComputeFixture() : ctx_(default_memory_pool()) {}
-
- protected:
-  FunctionContext ctx_;
-};
-
-template <typename Type, typename T>
-shared_ptr<Array> _MakeArray(const shared_ptr<DataType>& type, const vector<T>& values,
-                             const vector<bool>& is_valid) {
-  shared_ptr<Array> result;
-  if (is_valid.size() > 0) {
-    ArrayFromVector<Type, T>(type, is_valid, values, &result);
-  } else {
-    ArrayFromVector<Type, T>(type, values, &result);
-  }
-  return result;
-}
-
 // ----------------------------------------------------------------------
 // Datum
 
@@ -91,1533 +69,6 @@ TEST(TestDatum, ImplicitConstructors) {
   CheckImplicitConstructor<Table>(Datum::TABLE);
 }
 
-// ----------------------------------------------------------------------
-// Cast
-
-static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
-  ASSERT_EQ(left.data()->buffers[buffer_index].get(),
-            right.data()->buffers[buffer_index].get());
-}
-
-class TestCast : public ComputeFixture, public TestBase {
- public:
-  void CheckPass(const Array& input, const Array& expected,
-                 const shared_ptr<DataType>& out_type, const CastOptions& options) {
-    shared_ptr<Array> result;
-    ASSERT_OK(Cast(&ctx_, input, out_type, options, &result));
-    ASSERT_ARRAYS_EQUAL(expected, *result);
-  }
-
-  template <typename InType, typename I_TYPE>
-  void CheckFails(const shared_ptr<DataType>& in_type, const vector<I_TYPE>& in_values,
-                  const vector<bool>& is_valid, const shared_ptr<DataType>& out_type,
-                  const CastOptions& options) {
-    shared_ptr<Array> input, result;
-    if (is_valid.size() > 0) {
-      ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
-    } else {
-      ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
-    }
-    ASSERT_RAISES(Invalid, Cast(&ctx_, *input, out_type, options, &result));
-  }
-
-  void CheckZeroCopy(const Array& input, const shared_ptr<DataType>& out_type) {
-    shared_ptr<Array> result;
-    ASSERT_OK(Cast(&ctx_, input, out_type, {}, &result));
-    AssertBufferSame(input, *result, 0);
-    AssertBufferSame(input, *result, 1);
-  }
-
-  template <typename InType, typename I_TYPE, typename OutType, typename O_TYPE>
-  void CheckCase(const shared_ptr<DataType>& in_type, const vector<I_TYPE>& in_values,
-                 const vector<bool>& is_valid, const shared_ptr<DataType>& out_type,
-                 const vector<O_TYPE>& out_values, const CastOptions& options) {
-    DCHECK_EQ(in_values.size(), out_values.size());
-    shared_ptr<Array> input, expected;
-    if (is_valid.size() > 0) {
-      DCHECK_EQ(is_valid.size(), out_values.size());
-      ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
-      ArrayFromVector<OutType, O_TYPE>(out_type, is_valid, out_values, &expected);
-    } else {
-      ArrayFromVector<InType, I_TYPE>(in_type, in_values, &input);
-      ArrayFromVector<OutType, O_TYPE>(out_type, out_values, &expected);
-    }
-    CheckPass(*input, *expected, out_type, options);
-
-    // Check a sliced variant
-    if (input->length() > 1) {
-      CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
-    }
-  }
-};
-
-TEST_F(TestCast, SameTypeZeroCopy) {
-  vector<bool> is_valid = {true, false, true, true, true};
-  vector<int32_t> v1 = {0, 1, 2, 3, 4};
-
-  shared_ptr<Array> arr;
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  shared_ptr<Array> result;
-  ASSERT_OK(Cast(&this->ctx_, *arr, int32(), {}, &result));
-
-  AssertBufferSame(*arr, *result, 0);
-  AssertBufferSame(*arr, *result, 1);
-}
-
-TEST_F(TestCast, ToBoolean) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int8, should suffice for other integers
-  vector<int8_t> v1 = {0, 1, 127, -1, 0};
-  vector<bool> e1 = {false, true, true, true, false};
-  CheckCase<Int8Type, int8_t, BooleanType, bool>(int8(), v1, is_valid, boolean(), e1,
-                                                 options);
-
-  // floating point
-  vector<double> v2 = {1.0, 0, 0, -1.0, 5.0};
-  vector<bool> e2 = {true, false, false, true, true};
-  CheckCase<DoubleType, double, BooleanType, bool>(float64(), v2, is_valid, boolean(), e2,
-                                                   options);
-}
-
-TEST_F(TestCast, ToIntUpcast) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int8 to int32
-  vector<int8_t> v1 = {0, 1, 127, -1, 0};
-  vector<int32_t> e1 = {0, 1, 127, -1, 0};
-  CheckCase<Int8Type, int8_t, Int32Type, int32_t>(int8(), v1, is_valid, int32(), e1,
-                                                  options);
-
-  // bool to int8
-  vector<bool> v2 = {false, true, false, true, true};
-  vector<int8_t> e2 = {0, 1, 0, 1, 1};
-  CheckCase<BooleanType, bool, Int8Type, int8_t>(boolean(), v2, is_valid, int8(), e2,
-                                                 options);
-
-  // uint8 to int16, no overflow/underrun
-  vector<uint8_t> v3 = {0, 100, 200, 255, 0};
-  vector<int16_t> e3 = {0, 100, 200, 255, 0};
-  CheckCase<UInt8Type, uint8_t, Int16Type, int16_t>(uint8(), v3, is_valid, int16(), e3,
-                                                    options);
-}
-
-TEST_F(TestCast, OverflowInNullSlot) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<int32_t> v11 = {0, 70000, 2000, 1000, 0};
-  vector<int16_t> e11 = {0, 0, 2000, 1000, 0};
-
-  shared_ptr<Array> expected;
-  ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
-
-  auto buf = Buffer::Wrap(v11.data(), v11.size());
-  Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
-
-  CheckPass(tmp11, *expected, int16(), options);
-}
-
-TEST_F(TestCast, ToIntDowncastSafe) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int16 to uint8, no overflow/underrun
-  vector<int16_t> v1 = {0, 100, 200, 1, 2};
-  vector<uint8_t> e1 = {0, 100, 200, 1, 2};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
-                                                    options);
-
-  // int16 to uint8, with overflow
-  vector<int16_t> v2 = {0, 100, 256, 0, 0};
-  CheckFails<Int16Type>(int16(), v2, is_valid, uint8(), options);
-
-  // underflow
-  vector<int16_t> v3 = {0, 100, -1, 0, 0};
-  CheckFails<Int16Type>(int16(), v3, is_valid, uint8(), options);
-
-  // int32 to int16, no overflow
-  vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
-  vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
-                                                    options);
-
-  // int32 to int16, overflow
-  vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
-  CheckFails<Int32Type>(int32(), v5, is_valid, int16(), options);
-
-  // underflow
-  vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
-  CheckFails<Int32Type>(int32(), v6, is_valid, int16(), options);
-
-  vector<int32_t> v7 = {0, 1000, 2000, -70000, 0};
-  CheckFails<Int32Type>(int32(), v7, is_valid, uint8(), options);
-}
-
-template <typename O, typename I>
-std::vector<O> UnsafeVectorCast(const std::vector<I>& v) {
-  size_t n_elems = v.size();
-  std::vector<O> result(n_elems);
-
-  for (size_t i = 0; i < v.size(); i++) result[i] = static_cast<O>(v[i]);
-
-  return std::move(result);
-}
-
-TEST_F(TestCast, IntegerSignedToUnsigned) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<int32_t> v1 = {INT32_MIN, 100, -1, UINT16_MAX, INT32_MAX};
-
-  // Same width
-  CheckFails<Int32Type>(int32(), v1, is_valid, uint32(), options);
-  // Wider
-  CheckFails<Int32Type>(int32(), v1, is_valid, uint64(), options);
-  // Narrower
-  CheckFails<Int32Type>(int32(), v1, is_valid, uint16(), options);
-  // Fail because of overflow (instead of underflow).
-  vector<int32_t> over = {0, -11, 0, UINT16_MAX + 1, INT32_MAX};
-  CheckFails<Int32Type>(int32(), over, is_valid, uint16(), options);
-
-  options.allow_int_overflow = true;
-
-  CheckCase<Int32Type, int32_t, UInt32Type, uint32_t>(
-      int32(), v1, is_valid, uint32(), UnsafeVectorCast<uint32_t, int32_t>(v1), options);
-  CheckCase<Int32Type, int32_t, UInt64Type, uint64_t>(
-      int32(), v1, is_valid, uint64(), UnsafeVectorCast<uint64_t, int32_t>(v1), options);
-  CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
-      int32(), v1, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(v1), options);
-  CheckCase<Int32Type, int32_t, UInt16Type, uint16_t>(
-      int32(), over, is_valid, uint16(), UnsafeVectorCast<uint16_t, int32_t>(over),
-      options);
-}
-
-TEST_F(TestCast, IntegerUnsignedToSigned) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, true, true};
-
-  vector<uint32_t> v1 = {0, INT16_MAX + 1, UINT32_MAX};
-  vector<uint32_t> v2 = {0, INT16_MAX + 1, 2};
-  // Same width
-  CheckFails<UInt32Type>(uint32(), v1, is_valid, int32(), options);
-  // Narrower
-  CheckFails<UInt32Type>(uint32(), v1, is_valid, int16(), options);
-  CheckFails<UInt32Type>(uint32(), v2, is_valid, int16(), options);
-
-  options.allow_int_overflow = true;
-
-  CheckCase<UInt32Type, uint32_t, Int32Type, int32_t>(
-      uint32(), v1, is_valid, int32(), UnsafeVectorCast<int32_t, uint32_t>(v1), options);
-  CheckCase<UInt32Type, uint32_t, Int64Type, int64_t>(
-      uint32(), v1, is_valid, int64(), UnsafeVectorCast<int64_t, uint32_t>(v1), options);
-  CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
-      uint32(), v1, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v1), options);
-  CheckCase<UInt32Type, uint32_t, Int16Type, int16_t>(
-      uint32(), v2, is_valid, int16(), UnsafeVectorCast<int16_t, uint32_t>(v2), options);
-}
-
-TEST_F(TestCast, ToIntDowncastUnsafe) {
-  CastOptions options;
-  options.allow_int_overflow = true;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int16 to uint8, no overflow/underrun
-  vector<int16_t> v1 = {0, 100, 200, 1, 2};
-  vector<uint8_t> e1 = {0, 100, 200, 1, 2};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v1, is_valid, uint8(), e1,
-                                                    options);
-
-  // int16 to uint8, with overflow
-  vector<int16_t> v2 = {0, 100, 256, 0, 0};
-  vector<uint8_t> e2 = {0, 100, 0, 0, 0};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v2, is_valid, uint8(), e2,
-                                                    options);
-
-  // underflow
-  vector<int16_t> v3 = {0, 100, -1, 0, 0};
-  vector<uint8_t> e3 = {0, 100, 255, 0, 0};
-  CheckCase<Int16Type, int16_t, UInt8Type, uint8_t>(int16(), v3, is_valid, uint8(), e3,
-                                                    options);
-
-  // int32 to int16, no overflow
-  vector<int32_t> v4 = {0, 1000, 2000, 1, 2};
-  vector<int16_t> e4 = {0, 1000, 2000, 1, 2};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v4, is_valid, int16(), e4,
-                                                    options);
-
-  // int32 to int16, overflow
-  // TODO(wesm): do we want to allow this? we could set to null
-  vector<int32_t> v5 = {0, 1000, 2000, 70000, 0};
-  vector<int16_t> e5 = {0, 1000, 2000, 4464, 0};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v5, is_valid, int16(), e5,
-                                                    options);
-
-  // underflow
-  // TODO(wesm): do we want to allow this? we could set overflow to null
-  vector<int32_t> v6 = {0, 1000, 2000, -70000, 0};
-  vector<int16_t> e6 = {0, 1000, 2000, -4464, 0};
-  CheckCase<Int32Type, int32_t, Int16Type, int16_t>(int32(), v6, is_valid, int16(), e6,
-                                                    options);
-}
-
-TEST_F(TestCast, FloatingPointToInt) {
-  // which means allow_float_truncate == false
-  auto options = CastOptions::Safe();
-
-  vector<bool> is_valid = {true, false, true, true, true};
-  vector<bool> all_valid = {true, true, true, true, true};
-
-  // float32 to int32 no truncation
-  vector<float> v1 = {1.0, 0, 0.0, -1.0, 5.0};
-  vector<int32_t> e1 = {1, 0, 0, -1, 5};
-  CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, is_valid, int32(), e1,
-                                                  options);
-  CheckCase<FloatType, float, Int32Type, int32_t>(float32(), v1, all_valid, int32(), e1,
-                                                  options);
-
-  // float64 to int32 no truncation
-  vector<double> v2 = {1.0, 0, 0.0, -1.0, 5.0};
-  vector<int32_t> e2 = {1, 0, 0, -1, 5};
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, is_valid, int32(), e2,
-                                                    options);
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v2, all_valid, int32(), e2,
-                                                    options);
-
-  // float64 to int64 no truncation
-  vector<double> v3 = {1.0, 0, 0.0, -1.0, 5.0};
-  vector<int64_t> e3 = {1, 0, 0, -1, 5};
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, is_valid, int64(), e3,
-                                                    options);
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v3, all_valid, int64(), e3,
-                                                    options);
-
-  // float64 to int32 truncate
-  vector<double> v4 = {1.5, 0, 0.5, -1.5, 5.5};
-  vector<int32_t> e4 = {1, 0, 0, -1, 5};
-
-  options.allow_float_truncate = false;
-  CheckFails<DoubleType>(float64(), v4, is_valid, int32(), options);
-  CheckFails<DoubleType>(float64(), v4, all_valid, int32(), options);
-
-  options.allow_float_truncate = true;
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, is_valid, int32(), e4,
-                                                    options);
-  CheckCase<DoubleType, double, Int32Type, int32_t>(float64(), v4, all_valid, int32(), e4,
-                                                    options);
-
-  // float64 to int64 truncate
-  vector<double> v5 = {1.5, 0, 0.5, -1.5, 5.5};
-  vector<int64_t> e5 = {1, 0, 0, -1, 5};
-
-  options.allow_float_truncate = false;
-  CheckFails<DoubleType>(float64(), v5, is_valid, int64(), options);
-  CheckFails<DoubleType>(float64(), v5, all_valid, int64(), options);
-
-  options.allow_float_truncate = true;
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, is_valid, int64(), e5,
-                                                    options);
-  CheckCase<DoubleType, double, Int64Type, int64_t>(float64(), v5, all_valid, int64(), e5,
-                                                    options);
-}
-
-TEST_F(TestCast, IntToFloatingPoint) {
-  auto options = CastOptions::Safe();
-
-  vector<bool> all_valid = {true, true, true, true, true};
-  vector<bool> all_invalid = {false, false, false, false, false};
-
-  vector<int64_t> v1 = {INT64_MIN, INT64_MIN + 1, 0, INT64_MAX - 1, INT64_MAX};
-  CheckFails<Int64Type>(int64(), v1, all_valid, float32(), options);
-
-  // While it's not safe to convert, all values are null.
-  CheckCase<Int64Type, int64_t, DoubleType, double>(int64(), v1, all_invalid, float64(),
-                                                    UnsafeVectorCast<double, int64_t>(v1),
-                                                    options);
-}
-
-TEST_F(TestCast, TimestampToTimestamp) {
-  CastOptions options;
-
-  auto CheckTimestampCast =
-      [this](const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
-             const vector<int64_t>& from_values, const vector<int64_t>& to_values,
-             const vector<bool>& is_valid) {
-        CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
-            timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
-            options);
-      };
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // Multiply promotions
-  vector<int64_t> v1 = {0, 100, 200, 1, 2};
-  vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
-  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
-
-  vector<int64_t> v2 = {0, 100, 200, 1, 2};
-  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
-
-  vector<int64_t> v3 = {0, 100, 200, 1, 2};
-  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
-  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
-
-  vector<int64_t> v4 = {0, 100, 200, 1, 2};
-  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
-  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
-
-  vector<int64_t> v5 = {0, 100, 200, 1, 2};
-  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
-
-  vector<int64_t> v6 = {0, 100, 200, 1, 2};
-  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
-  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
-
-  // Zero copy
-  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid, v7,
-                                          &arr);
-  CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
-
-  // ARROW-1773, cast to integer
-  CheckZeroCopy(*arr, int64());
-
-  // Divide, truncate
-  vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
-  vector<int64_t> e8 = {0, 100, 200, 1, 2};
-
-  options.allow_time_truncate = true;
-  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
-  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
-  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
-
-  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
-  vector<int64_t> e9 = {0, 100, 200, 1, 2};
-  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
-  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
-
-  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
-  vector<int64_t> e10 = {0, 100, 200, 1, 2};
-  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
-
-  // Disallow truncate, failures
-  options.allow_time_truncate = false;
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
-                            timestamp(TimeUnit::MILLI), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v8, is_valid,
-                            timestamp(TimeUnit::MICRO), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
-                            timestamp(TimeUnit::MILLI), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v10, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
-}
-
-TEST_F(TestCast, TimestampToDate32_Date64) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, true, false};
-
-  // 2000-01-01, 2000-01-02, null
-  vector<int64_t> v_nano = {946684800000000000, 946771200000000000, 0};
-  vector<int64_t> v_micro = {946684800000000, 946771200000000, 0};
-  vector<int64_t> v_milli = {946684800000, 946771200000, 0};
-  vector<int64_t> v_second = {946684800, 946771200, 0};
-  vector<int32_t> v_day = {10957, 10958, 0};
-
-  // Simple conversions
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::NANO), v_nano, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::MICRO), v_micro, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::MILLI), v_milli, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::SECOND), v_second, is_valid, date64(), v_milli, options);
-
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::NANO), v_nano, is_valid, date32(), v_day, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::MICRO), v_micro, is_valid, date32(), v_day, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::MILLI), v_milli, is_valid, date32(), v_day, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::SECOND), v_second, is_valid, date32(), v_day, options);
-
-  // Disallow truncate, failures
-  vector<int64_t> v_nano_fail = {946684800000000001, 946771200000000001, 0};
-  vector<int64_t> v_micro_fail = {946684800000001, 946771200000001, 0};
-  vector<int64_t> v_milli_fail = {946684800001, 946771200001, 0};
-  vector<int64_t> v_second_fail = {946684801, 946771201, 0};
-
-  options.allow_time_truncate = false;
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date64(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date64(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date64(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
-                            date64(), options);
-
-  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v_nano_fail, is_valid, date32(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v_micro_fail, is_valid, date32(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v_milli_fail, is_valid, date32(),
-                            options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::SECOND), v_second_fail, is_valid,
-                            date32(), options);
-
-  // Make sure that nulls are excluded from the truncation checks
-  vector<int64_t> v_second_nofail = {946684800, 946771200, 1};
-  CheckCase<TimestampType, int64_t, Date64Type, int64_t>(
-      timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date64(), v_milli, options);
-  CheckCase<TimestampType, int64_t, Date32Type, int32_t>(
-      timestamp(TimeUnit::SECOND), v_second_nofail, is_valid, date32(), v_day, options);
-}
-
-TEST_F(TestCast, TimeToCompatible) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // Multiply promotions
-  vector<int32_t> v1 = {0, 100, 200, 1, 2};
-  vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
-  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
-      time32(TimeUnit::SECOND), v1, is_valid, time32(TimeUnit::MILLI), e1, options);
-
-  vector<int32_t> v2 = {0, 100, 200, 1, 2};
-  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::SECOND), v2, is_valid, time64(TimeUnit::MICRO), e2, options);
-
-  vector<int32_t> v3 = {0, 100, 200, 1, 2};
-  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::SECOND), v3, is_valid, time64(TimeUnit::NANO), e3, options);
-
-  vector<int32_t> v4 = {0, 100, 200, 1, 2};
-  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::MILLI), v4, is_valid, time64(TimeUnit::MICRO), e4, options);
-
-  vector<int32_t> v5 = {0, 100, 200, 1, 2};
-  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
-  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::MILLI), v5, is_valid, time64(TimeUnit::NANO), e5, options);
-
-  vector<int64_t> v6 = {0, 100, 200, 1, 2};
-  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
-  CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
-      time64(TimeUnit::MICRO), v6, is_valid, time64(TimeUnit::NANO), e6, options);
-
-  // Zero copy
-  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<Time64Type, int64_t>(time64(TimeUnit::MICRO), is_valid, v7, &arr);
-  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
-
-  // ARROW-1773: cast to int64
-  CheckZeroCopy(*arr, int64());
-
-  vector<int32_t> v7_2 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<Time32Type, int32_t>(time32(TimeUnit::SECOND), is_valid, v7_2, &arr);
-  CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
-
-  // ARROW-1773: cast to int64
-  CheckZeroCopy(*arr, int32());
-
-  // Divide, truncate
-  vector<int32_t> v8 = {0, 100123, 200456, 1123, 2456};
-  vector<int32_t> e8 = {0, 100, 200, 1, 2};
-
-  options.allow_time_truncate = true;
-  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
-      time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND), e8, options);
-  CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
-      time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI), e8, options);
-  CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
-      time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO), e8, options);
-
-  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
-  vector<int32_t> e9 = {0, 100, 200, 1, 2};
-  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND), e9, options);
-  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI), e9, options);
-
-  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
-  vector<int32_t> e10 = {0, 100, 200, 1, 2};
-  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND), e10, options);
-
-  // Disallow truncate, failures
-
-  options.allow_time_truncate = false;
-  CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI),
-                         options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND),
-                         options);
-}
-
-TEST_F(TestCast, PrimitiveZeroCopy) {
-  shared_ptr<Array> arr;
-
-  ArrayFromVector<UInt8Type, uint8_t>(uint8(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint8());
-  ArrayFromVector<Int8Type, int8_t>(int8(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int8());
-
-  ArrayFromVector<UInt16Type, uint16_t>(uint16(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint16());
-  ArrayFromVector<Int16Type, int8_t>(int16(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int16());
-
-  ArrayFromVector<UInt32Type, uint32_t>(uint32(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint32());
-  ArrayFromVector<Int32Type, int8_t>(int32(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int32());
-
-  ArrayFromVector<UInt64Type, uint64_t>(uint64(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, uint64());
-  ArrayFromVector<Int64Type, int8_t>(int64(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, int64());
-
-  ArrayFromVector<FloatType, float>(float32(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, float32());
-
-  ArrayFromVector<DoubleType, double>(float64(), {1, 1, 1, 1}, {1, 2, 3, 4}, &arr);
-  CheckZeroCopy(*arr, float64());
-}
-
-TEST_F(TestCast, DateToCompatible) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  constexpr int64_t F = 86400000;
-
-  // Multiply promotion
-  vector<int32_t> v1 = {0, 100, 200, 1, 2};
-  vector<int64_t> e1 = {0, 100 * F, 200 * F, F, 2 * F};
-  CheckCase<Date32Type, int32_t, Date64Type, int64_t>(date32(), v1, is_valid, date64(),
-                                                      e1, options);
-
-  // Zero copy
-  vector<int32_t> v2 = {0, 70000, 2000, 1000, 0};
-  vector<int64_t> v3 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<Date32Type, int32_t>(date32(), is_valid, v2, &arr);
-  CheckZeroCopy(*arr, date32());
-
-  // ARROW-1773: zero copy cast to integer
-  CheckZeroCopy(*arr, int32());
-
-  ArrayFromVector<Date64Type, int64_t>(date64(), is_valid, v3, &arr);
-  CheckZeroCopy(*arr, date64());
-
-  // ARROW-1773: zero copy cast to integer
-  CheckZeroCopy(*arr, int64());
-
-  // Divide, truncate
-  vector<int64_t> v8 = {0, 100 * F + 123, 200 * F + 456, F + 123, 2 * F + 456};
-  vector<int32_t> e8 = {0, 100, 200, 1, 2};
-
-  options.allow_time_truncate = true;
-  CheckCase<Date64Type, int64_t, Date32Type, int32_t>(date64(), v8, is_valid, date32(),
-                                                      e8, options);
-
-  // Disallow truncate, failures
-  options.allow_time_truncate = false;
-  CheckFails<Date64Type>(date64(), v8, is_valid, date32(), options);
-}
-
-TEST_F(TestCast, ToDouble) {
-  CastOptions options;
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // int16 to double
-  vector<int16_t> v1 = {0, 100, 200, 1, 2};
-  vector<double> e1 = {0, 100, 200, 1, 2};
-  CheckCase<Int16Type, int16_t, DoubleType, double>(int16(), v1, is_valid, float64(), e1,
-                                                    options);
-
-  // float to double
-  vector<float> v2 = {0, 100, 200, 1, 2};
-  vector<double> e2 = {0, 100, 200, 1, 2};
-  CheckCase<FloatType, float, DoubleType, double>(float32(), v2, is_valid, float64(), e2,
-                                                  options);
-
-  // bool to double
-  vector<bool> v3 = {true, true, false, false, true};
-  vector<double> e3 = {1, 1, 0, 0, 1};
-  CheckCase<BooleanType, bool, DoubleType, double>(boolean(), v3, is_valid, float64(), e3,
-                                                   options);
-}
-
-TEST_F(TestCast, ChunkedArray) {
-  vector<int16_t> values1 = {0, 1, 2};
-  vector<int16_t> values2 = {3, 4, 5};
-
-  auto type = int16();
-  auto out_type = int64();
-
-  auto a1 = _MakeArray<Int16Type, int16_t>(type, values1, {});
-  auto a2 = _MakeArray<Int16Type, int16_t>(type, values2, {});
-
-  ArrayVector arrays = {a1, a2};
-  auto carr = std::make_shared<ChunkedArray>(arrays);
-
-  CastOptions options;
-
-  Datum out;
-  ASSERT_OK(Cast(&this->ctx_, carr, out_type, options, &out));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, out.kind());
-
-  auto out_carr = out.chunked_array();
-
-  vector<int64_t> ex_values1 = {0, 1, 2};
-  vector<int64_t> ex_values2 = {3, 4, 5};
-  auto a3 = _MakeArray<Int64Type, int64_t>(out_type, ex_values1, {});
-  auto a4 = _MakeArray<Int64Type, int64_t>(out_type, ex_values2, {});
-
-  ArrayVector ex_arrays = {a3, a4};
-  auto ex_carr = std::make_shared<ChunkedArray>(ex_arrays);
-
-  ASSERT_TRUE(out.chunked_array()->Equals(*ex_carr));
-}
-
-TEST_F(TestCast, UnsupportedTarget) {
-  vector<bool> is_valid = {true, false, true, true, true};
-  vector<int32_t> v1 = {0, 1, 2, 3, 4};
-
-  shared_ptr<Array> arr;
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  shared_ptr<Array> result;
-  ASSERT_RAISES(NotImplemented, Cast(&this->ctx_, *arr, utf8(), {}, &result));
-}
-
-TEST_F(TestCast, DateTimeZeroCopy) {
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
-  shared_ptr<Array> arr;
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  CheckZeroCopy(*arr, time32(TimeUnit::SECOND));
-  CheckZeroCopy(*arr, date32());
-
-  vector<int64_t> v2 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, v2, &arr);
-
-  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
-  CheckZeroCopy(*arr, date64());
-  CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
-}
-
-TEST_F(TestCast, FromNull) {
-  // Null casts to everything
-  const int length = 10;
-
-  NullArray arr(length);
-
-  shared_ptr<Array> result;
-  ASSERT_OK(Cast(&ctx_, arr, int32(), {}, &result));
-
-  ASSERT_EQ(length, result->length());
-  ASSERT_EQ(length, result->null_count());
-
-  // OK to look at bitmaps
-  ASSERT_ARRAYS_EQUAL(*result, *result);
-}
-
-TEST_F(TestCast, PreallocatedMemory) {
-  CastOptions options;
-  options.allow_int_overflow = false;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  const int64_t length = 5;
-
-  shared_ptr<Array> arr;
-  vector<int32_t> v1 = {0, 70000, 2000, 1000, 0};
-  vector<int64_t> e1 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<Int32Type, int32_t>(int32(), is_valid, v1, &arr);
-
-  auto out_type = int64();
-
-  std::unique_ptr<UnaryKernel> kernel;
-  ASSERT_OK(GetCastFunction(*int32(), out_type, options, &kernel));
-
-  auto out_data = ArrayData::Make(out_type, length);
-
-  shared_ptr<Buffer> out_values;
-  ASSERT_OK(this->ctx_.Allocate(length * sizeof(int64_t), &out_values));
-
-  out_data->buffers.push_back(nullptr);
-  out_data->buffers.push_back(out_values);
-
-  Datum out(out_data);
-  ASSERT_OK(kernel->Call(&this->ctx_, arr, &out));
-
-  // Buffer address unchanged
-  ASSERT_EQ(out_values.get(), out_data->buffers[1].get());
-
-  shared_ptr<Array> result = MakeArray(out_data);
-  shared_ptr<Array> expected;
-  ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, e1, &expected);
-
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
-}
-
-template <typename InType, typename InT, typename OutType, typename OutT>
-void CheckOffsetOutputCase(FunctionContext* ctx, const std::shared_ptr<DataType>& in_type,
-                           const vector<InT>& in_values,
-                           const std::shared_ptr<DataType>& out_type,
-                           const vector<OutT>& out_values) {
-  using OutTraits = TypeTraits<OutType>;
-
-  CastOptions options;
-
-  const int64_t length = static_cast<int64_t>(in_values.size());
-
-  shared_ptr<Array> arr, expected;
-  ArrayFromVector<InType, InT>(in_type, in_values, &arr);
-  ArrayFromVector<OutType, OutT>(out_type, out_values, &expected);
-
-  shared_ptr<Buffer> out_buffer;
-  ASSERT_OK(ctx->Allocate(OutTraits::bytes_required(length), &out_buffer));
-
-  std::unique_ptr<UnaryKernel> kernel;
-  ASSERT_OK(GetCastFunction(*in_type, out_type, options, &kernel));
-
-  const int64_t first_half = length / 2;
-
-  auto out_data = ArrayData::Make(out_type, length, {nullptr, out_buffer});
-  auto out_second_data = out_data->Copy();
-  out_second_data->offset = first_half;
-
-  Datum out_first(out_data);
-  Datum out_second(out_second_data);
-
-  // Cast each bit
-  ASSERT_OK(kernel->Call(ctx, arr->Slice(0, first_half), &out_first));
-  ASSERT_OK(kernel->Call(ctx, arr->Slice(first_half), &out_second));
-
-  shared_ptr<Array> result = MakeArray(out_data);
-
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
-}
-
-TEST_F(TestCast, OffsetOutputBuffer) {
-  // ARROW-1735
-  vector<int32_t> v1 = {0, 10000, 2000, 1000, 0};
-  vector<int64_t> e1 = {0, 10000, 2000, 1000, 0};
-
-  auto in_type = int32();
-  auto out_type = int64();
-  CheckOffsetOutputCase<Int32Type, int32_t, Int64Type, int64_t>(&this->ctx_, in_type, v1,
-                                                                out_type, e1);
-
-  vector<bool> e2 = {false, true, true, true, false};
-
-  out_type = boolean();
-  CheckOffsetOutputCase<Int32Type, int32_t, BooleanType, bool>(&this->ctx_, in_type, v1,
-                                                               boolean(), e2);
-
-  vector<int16_t> e3 = {0, 10000, 2000, 1000, 0};
-  CheckOffsetOutputCase<Int32Type, int32_t, Int16Type, int16_t>(&this->ctx_, in_type, v1,
-                                                                int16(), e3);
-}
-
-TEST_F(TestCast, StringToBoolean) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  vector<std::string> v1 = {"False", "true", "true", "True", "false"};
-  vector<std::string> v2 = {"0", "1", "1", "1", "0"};
-  vector<bool> e = {false, true, true, true, false};
-  CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v1, is_valid, boolean(),
-                                                        e, options);
-  CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
-                                                        e, options);
-}
-
-TEST_F(TestCast, StringToBooleanErrors) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true};
-
-  CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
-  CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
-}
-
-TEST_F(TestCast, StringToNumber) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true, true, true};
-
-  // string to int
-  vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
-  vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
-  vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
-  vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
-  vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
-  CheckCase<StringType, std::string, Int8Type, int8_t>(utf8(), v_int, is_valid, int8(),
-                                                       e_int8, options);
-  CheckCase<StringType, std::string, Int16Type, int16_t>(utf8(), v_int, is_valid, int16(),
-                                                         e_int16, options);
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  v_int = {"2147483647", "0", "-2147483648", "0", "0"};
-  e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
-  e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  // string to uint
-  vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
-  vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
-  vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
-  vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
-  vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
-  CheckCase<StringType, std::string, UInt8Type, uint8_t>(utf8(), v_uint, is_valid,
-                                                         uint8(), e_uint8, options);
-  CheckCase<StringType, std::string, UInt16Type, uint16_t>(utf8(), v_uint, is_valid,
-                                                           uint16(), e_uint16, options);
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  v_uint = {"4294967295", "0", "0", "0", "0"};
-  e_uint32 = {4294967295, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  v_uint = {"18446744073709551615", "0", "0", "0", "0"};
-  e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  // string to float
-  vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
-  vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
-  vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-
-  // Test that casting is locale-independent
-  auto global_locale = std::locale();
-  try {
-    // French locale uses the comma as decimal point
-    std::locale::global(std::locale("fr_FR.UTF-8"));
-  } catch (std::runtime_error&) {
-    // Locale unavailable, ignore
-  }
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-  std::locale::global(global_locale);
-}
-
-TEST_F(TestCast, StringToNumberErrors) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true};
-
-  CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"12 z"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"128"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"-129"}, is_valid, int8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"0.5"}, is_valid, int8(), options);
-
-  CheckFails<StringType, std::string>(utf8(), {"256"}, is_valid, uint8(), options);
-  CheckFails<StringType, std::string>(utf8(), {"-1"}, is_valid, uint8(), options);
-
-  CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
-}
-
-TEST_F(TestCast, StringToTimestamp) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true, false, true};
-  vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
-
-  auto type = timestamp(TimeUnit::SECOND);
-  vector<int64_t> e = {0, 0, 951782400};
-  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
-                                                             type, e, options);
-
-  type = timestamp(TimeUnit::MICRO);
-  e = {0, 0, 951782400000000LL};
-  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
-                                                             type, e, options);
-
-  // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
-}
-
-TEST_F(TestCast, StringToTimestampErrors) {
-  CastOptions options;
-
-  vector<bool> is_valid = {true};
-
-  for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI, TimeUnit::MICRO, TimeUnit::NANO}) {
-    auto type = timestamp(unit);
-    CheckFails<StringType, std::string>(utf8(), {""}, is_valid, type, options);
-    CheckFails<StringType, std::string>(utf8(), {"xxx"}, is_valid, type, options);
-  }
-}
-
-template <typename TestType>
-class TestDictionaryCast : public TestCast {};
-
-typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
-                         UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
-                         Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
-    TestTypes;
-
-TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
-
-TYPED_TEST(TestDictionaryCast, Basic) {
-  CastOptions options;
-  shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  Datum out;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &out));
-
-  this->CheckPass(*MakeArray(out.array()), *plain_array, plain_array->type(), options);
-}
-
-TEST_F(TestCast, DictToNonDictNoNulls) {
-  vector<std::string> dict_values = {"foo", "bar", "baz"};
-  auto ex_dict = _MakeArray<StringType, std::string>(utf8(), dict_values, {});
-  auto dict_type = dictionary(int32(), ex_dict);
-
-  // Explicitly construct with nullptr for the null_bitmap_data
-  std::vector<int32_t> i1 = {1, 0, 1};
-  std::vector<int32_t> i2 = {2, 1, 0, 1};
-  auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
-  auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
-
-  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
-                             std::make_shared<DictionaryArray>(dict_type, c2)};
-  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
-  Datum cast_input(dict_carr);
-  Datum cast_output;
-  // Ensure that casting works even when the null_bitmap_data array is a nullptr
-  ASSERT_OK(Cast(&this->ctx_, cast_input,
-                 static_cast<DictionaryType&>(*dict_type).dictionary()->type(),
-                 CastOptions(), &cast_output));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, cast_output.kind());
-
-  auto e1 = _MakeArray<StringType, std::string>(utf8(), {"bar", "foo", "bar"}, {});
-  auto e2 = _MakeArray<StringType, std::string>(utf8(), {"baz", "bar", "foo", "bar"}, {});
-
-  auto chunks = cast_output.chunked_array()->chunks();
-  ASSERT_EQ(chunks.size(), 2);
-  ASSERT_ARRAYS_EQUAL(*e1, *chunks[0]);
-  ASSERT_ARRAYS_EQUAL(*e2, *chunks[1]);
-}
-
-/*TYPED_TEST(TestDictionaryCast, Reverse) {
-  CastOptions options;
-  shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  shared_ptr<Array> dict_array;
-  ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
-
-  this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
-}*/
-
-TEST_F(TestCast, ListToList) {
-  CastOptions options;
-  std::shared_ptr<Array> offsets;
-
-  vector<int32_t> offsets_values = {0, 1, 2, 5, 7, 7, 8, 10};
-  std::vector<bool> offsets_is_valid = {true, true, true, true, false, true, true, true};
-  ArrayFromVector<Int32Type, int32_t>(offsets_is_valid, offsets_values, &offsets);
-
-  shared_ptr<Array> int32_plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<Int32Type>::ArrayType>(10, 2);
-  std::shared_ptr<Array> int32_list_array;
-  ASSERT_OK(
-      ListArray::FromArrays(*offsets, *int32_plain_array, pool_, &int32_list_array));
-
-  std::shared_ptr<Array> int64_plain_array;
-  ASSERT_OK(Cast(&this->ctx_, *int32_plain_array, int64(), options, &int64_plain_array));
-  std::shared_ptr<Array> int64_list_array;
-  ASSERT_OK(
-      ListArray::FromArrays(*offsets, *int64_plain_array, pool_, &int64_list_array));
-
-  std::shared_ptr<Array> float64_plain_array;
-  ASSERT_OK(
-      Cast(&this->ctx_, *int32_plain_array, float64(), options, &float64_plain_array));
-  std::shared_ptr<Array> float64_list_array;
-  ASSERT_OK(
-      ListArray::FromArrays(*offsets, *float64_plain_array, pool_, &float64_list_array));
-
-  CheckPass(*int32_list_array, *int64_list_array, int64_list_array->type(), options);
-  CheckPass(*int32_list_array, *float64_list_array, float64_list_array->type(), options);
-  CheckPass(*int64_list_array, *int32_list_array, int32_list_array->type(), options);
-  CheckPass(*int64_list_array, *float64_list_array, float64_list_array->type(), options);
-
-  options.allow_float_truncate = true;
-  CheckPass(*float64_list_array, *int32_list_array, int32_list_array->type(), options);
-  CheckPass(*float64_list_array, *int64_list_array, int64_list_array->type(), options);
-}
-
-// ----------------------------------------------------------------------
-// Dictionary tests
-
-template <typename Type, typename T>
-void CheckUnique(FunctionContext* ctx, const shared_ptr<DataType>& type,
-                 const vector<T>& in_values, const vector<bool>& in_is_valid,
-                 const vector<T>& out_values, const vector<bool>& out_is_valid) {
-  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
-  shared_ptr<Array> expected = _MakeArray<Type, T>(type, out_values, out_is_valid);
-
-  shared_ptr<Array> result;
-  ASSERT_OK(Unique(ctx, input, &result));
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
-}
-
-template <typename Type, typename T>
-void CheckDictEncode(FunctionContext* ctx, const shared_ptr<DataType>& type,
-                     const vector<T>& in_values, const vector<bool>& in_is_valid,
-                     const vector<T>& out_values, const vector<bool>& out_is_valid,
-                     const vector<int32_t>& out_indices) {
-  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
-  shared_ptr<Array> ex_dict = _MakeArray<Type, T>(type, out_values, out_is_valid);
-  shared_ptr<Array> ex_indices =
-      _MakeArray<Int32Type, int32_t>(int32(), out_indices, in_is_valid);
-
-  DictionaryArray expected(dictionary(int32(), ex_dict), ex_indices);
-
-  Datum datum_out;
-  ASSERT_OK(DictionaryEncode(ctx, input, &datum_out));
-  shared_ptr<Array> result = MakeArray(datum_out.array());
-
-  ASSERT_ARRAYS_EQUAL(expected, *result);
-}
-
-class TestHashKernel : public ComputeFixture, public TestBase {};
-
-template <typename Type>
-class TestHashKernelPrimitive : public ComputeFixture, public TestBase {};
-
-typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
-                         UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType,
-                         Date32Type, Date64Type>
-    PrimitiveDictionaries;
-
-TYPED_TEST_CASE(TestHashKernelPrimitive, PrimitiveDictionaries);
-
-TYPED_TEST(TestHashKernelPrimitive, Unique) {
-  using T = typename TypeParam::c_type;
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1}, {true, false, true, true},
-                            {2, 1}, {});
-  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 3, 1}, {false, false, true, true},
-                            {3, 1}, {});
-}
-
-TYPED_TEST(TestHashKernelPrimitive, DictEncode) {
-  using T = typename TypeParam::c_type;
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckDictEncode<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1, 2, 3},
-                                {true, false, true, true, true, true}, {2, 1, 3}, {},
-                                {0, 0, 0, 1, 0, 2});
-}
-
-TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) {
-  using T = typename TypeParam::c_type;
-  // Skip this test for (u)int8
-  if (sizeof(Scalar) == 1) {
-    return;
-  }
-
-  const int64_t kTotalValues = 1000000;
-  const int64_t kRepeats = 5;
-
-  vector<T> values;
-  vector<T> uniques;
-  vector<int32_t> indices;
-  for (int64_t i = 0; i < kTotalValues * kRepeats; i++) {
-    const auto val = static_cast<T>(i % kTotalValues);
-    values.push_back(val);
-
-    if (i < kTotalValues) {
-      uniques.push_back(val);
-    }
-    indices.push_back(static_cast<int32_t>(i % kTotalValues));
-  }
-
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckUnique<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {});
-
-  CheckDictEncode<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {}, indices);
-}
-
-TEST_F(TestHashKernel, UniqueTimeTimestamp) {
-  CheckUnique<Time32Type, int32_t>(&this->ctx_, time32(TimeUnit::SECOND), {2, 1, 2, 1},
-                                   {true, false, true, true}, {2, 1}, {});
-
-  CheckUnique<Time64Type, int64_t>(&this->ctx_, time64(TimeUnit::NANO), {2, 1, 2, 1},
-                                   {true, false, true, true}, {2, 1}, {});
-
-  CheckUnique<TimestampType, int64_t>(&this->ctx_, timestamp(TimeUnit::NANO),
-                                      {2, 1, 2, 1}, {true, false, true, true}, {2, 1},
-                                      {});
-}
-
-TEST_F(TestHashKernel, UniqueBoolean) {
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true},
-                                 {true, false, true, true}, {true, false}, {});
-
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true},
-                                 {true, false, true, true}, {false, true}, {});
-
-  // No nulls
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true}, {},
-                                 {true, false}, {});
-
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true}, {},
-                                 {false, true}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeBoolean) {
-  CheckDictEncode<BooleanType, bool>(
-      &this->ctx_, boolean(), {true, true, false, true, false},
-      {true, false, true, true, true}, {true, false}, {}, {0, 0, 1, 0, 1});
-
-  CheckDictEncode<BooleanType, bool>(
-      &this->ctx_, boolean(), {false, true, false, true, false},
-      {true, false, true, true, true}, {false, true}, {}, {0, 0, 0, 1, 0});
-
-  // No nulls
-  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
-                                     {true, true, false, true, false}, {}, {true, false},
-                                     {}, {0, 0, 1, 0, 1});
-
-  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
-                                     {false, true, false, true, false}, {}, {false, true},
-                                     {}, {0, 1, 0, 1, 0});
-}
-
-TEST_F(TestHashKernel, UniqueBinary) {
-  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(),
-                                       {"test", "", "test2", "test"},
-                                       {true, false, true, true}, {"test", "test2"}, {});
-
-  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), {"test", "", "test2", "test"},
-                                       {true, false, true, true}, {"test", "test2"}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeBinary) {
-  CheckDictEncode<BinaryType, std::string>(
-      &this->ctx_, binary(), {"test", "", "test2", "test", "baz"},
-      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
-
-  CheckDictEncode<StringType, std::string>(
-      &this->ctx_, utf8(), {"test", "", "test2", "test", "baz"},
-      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
-}
-
-TEST_F(TestHashKernel, BinaryResizeTable) {
-  const int32_t kTotalValues = 10000;
-#if !defined(ARROW_VALGRIND)
-  const int32_t kRepeats = 10;
-#else
-  // Mitigate Valgrind's slowness
-  const int32_t kRepeats = 3;
-#endif
-
-  vector<std::string> values;
-  vector<std::string> uniques;
-  vector<int32_t> indices;
-  char buf[20] = "test";
-
-  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
-    int32_t index = i % kTotalValues;
-
-    ASSERT_GE(snprintf(buf + 4, sizeof(buf) - 4, "%d", index), 0);
-    values.emplace_back(buf);
-
-    if (i < kTotalValues) {
-      uniques.push_back(values.back());
-    }
-    indices.push_back(index);
-  }
-
-  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {});
-  CheckDictEncode<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {},
-                                           indices);
-
-  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {});
-  CheckDictEncode<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {},
-                                           indices);
-}
-
-TEST_F(TestHashKernel, UniqueFixedSizeBinary) {
-  CheckUnique<FixedSizeBinaryType, std::string>(
-      &this->ctx_, fixed_size_binary(5), {"aaaaa", "", "bbbbb", "aaaaa"},
-      {true, false, true, true}, {"aaaaa", "bbbbb"}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeFixedSizeBinary) {
-  CheckDictEncode<FixedSizeBinaryType, std::string>(
-      &this->ctx_, fixed_size_binary(5), {"bbbbb", "", "bbbbb", "aaaaa", "ccccc"},
-      {true, false, true, true, true}, {"bbbbb", "aaaaa", "ccccc"}, {}, {0, 0, 0, 1, 2});
-}
-
-TEST_F(TestHashKernel, FixedSizeBinaryResizeTable) {
-  const int32_t kTotalValues = 10000;
-#if !defined(ARROW_VALGRIND)
-  const int32_t kRepeats = 10;
-#else
-  // Mitigate Valgrind's slowness
-  const int32_t kRepeats = 3;
-#endif
-
-  vector<std::string> values;
-  vector<std::string> uniques;
-  vector<int32_t> indices;
-  char buf[7] = "test..";
-
-  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
-    int32_t index = i % kTotalValues;
-
-    buf[4] = static_cast<char>(index / 128);
-    buf[5] = static_cast<char>(index % 128);
-    values.emplace_back(buf, 6);
-
-    if (i < kTotalValues) {
-      uniques.push_back(values.back());
-    }
-    indices.push_back(index);
-  }
-
-  auto type = fixed_size_binary(6);
-  CheckUnique<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {}, uniques,
-                                                {});
-  CheckDictEncode<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {},
-                                                    uniques, {}, indices);
-}
-
-TEST_F(TestHashKernel, UniqueDecimal) {
-  vector<Decimal128> values{12, 12, 11, 12};
-  vector<Decimal128> expected{12, 11};
-
-  CheckUnique<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
-                                          {true, false, true, true}, expected, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeDecimal) {
-  vector<Decimal128> values{12, 12, 11, 12, 13};
-  vector<Decimal128> expected{12, 11, 13};
-
-  CheckDictEncode<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
-                                              {true, false, true, true, true}, expected,
-                                              {}, {0, 0, 1, 0, 2});
-}
-
-TEST_F(TestHashKernel, ChunkedArrayInvoke) {
-  vector<std::string> values1 = {"foo", "bar", "foo"};
-  vector<std::string> values2 = {"bar", "baz", "quuux", "foo"};
-
-  auto type = utf8();
-  auto a1 = _MakeArray<StringType, std::string>(type, values1, {});
-  auto a2 = _MakeArray<StringType, std::string>(type, values2, {});
-
-  vector<std::string> dict_values = {"foo", "bar", "baz", "quuux"};
-  auto ex_dict = _MakeArray<StringType, std::string>(type, dict_values, {});
-
-  ArrayVector arrays = {a1, a2};
-  auto carr = std::make_shared<ChunkedArray>(arrays);
-
-  // Unique
-  shared_ptr<Array> result;
-  ASSERT_OK(Unique(&this->ctx_, carr, &result));
-  ASSERT_ARRAYS_EQUAL(*ex_dict, *result);
-
-  // Dictionary encode
-  auto dict_type = dictionary(int32(), ex_dict);
-
-  auto i1 = _MakeArray<Int32Type, int32_t>(int32(), {0, 1, 0}, {});
-  auto i2 = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 0}, {});
-
-  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, i1),
-                             std::make_shared<DictionaryArray>(dict_type, i2)};
-  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
-  Datum encoded_out;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, carr, &encoded_out));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, encoded_out.kind());
-
-  AssertChunkedEqual(*dict_carr, *encoded_out.chunked_array());
-}
-
-using BinaryKernelFunc =
-    std::function<Status(FunctionContext*, const Datum&, const Datum&, Datum* out)>;
-
-class TestBooleanKernel : public ComputeFixture, public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    Datum result;
-    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_TRUE(result_array->Equals(expected));
-  }
-
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    Datum result;
-    std::shared_ptr<Array> result_array;
-    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    ASSERT_TRUE(result_ca->Equals(expected));
-  }
-
-  void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector<bool>& values1,
-                        const std::vector<bool>& values2,
-                        const std::vector<bool>& values3,
-                        const std::vector<bool>& values3_nulls) {
-    auto type = boolean();
-    auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-    auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-    auto a3 = _MakeArray<BooleanType, bool>(type, values3, {});
-    auto a1_nulls = _MakeArray<BooleanType, bool>(type, values1, values1);
-    auto a2_nulls = _MakeArray<BooleanType, bool>(type, values2, values2);
-    auto a3_nulls = _MakeArray<BooleanType, bool>(type, values3, values3_nulls);
-
-    TestArrayBinary(kernel, a1, a2, a3);
-    TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls);
-    TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1));
-    TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1));
-
-    // ChunkedArray
-    std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-    auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-    std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-    auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-    std::vector<std::shared_ptr<Array>> ca3_arrs = {a3, a3->Slice(1)};
-    auto ca3 = std::make_shared<ChunkedArray>(ca3_arrs);
-    TestChunkedArrayBinary(kernel, ca1, ca2, ca3);
-
-    // ChunkedArray with different chunks
-    std::vector<std::shared_ptr<Array>> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1),
-                                                    a1->Slice(1, 1), a1->Slice(2)};
-    auto ca4 = std::make_shared<ChunkedArray>(ca4_arrs);
-    TestChunkedArrayBinary(kernel, ca4, ca2, ca3);
-  }
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  vector<bool> values1 = {true, false, true};
-  vector<bool> values2 = {false, true, false};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-
-  // Plain array
-  Datum result;
-  ASSERT_OK(Invert(&this->ctx_, a1, &result));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  std::shared_ptr<Array> result_array = result.make_array();
-  ASSERT_TRUE(result_array->Equals(a2));
-
-  // Array with offset
-  ASSERT_OK(Invert(&this->ctx_, a1->Slice(1), &result));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  result_array = result.make_array();
-  ASSERT_TRUE(result_array->Equals(a2->Slice(1)));
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK(Invert(&this->ctx_, ca1, &result));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-  ASSERT_TRUE(result_ca->Equals(ca2));
-}
-
-TEST_F(TestBooleanKernel, And) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernel(And, values1, values2, values3, values3);
-}
-
-TEST_F(TestBooleanKernel, Or) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {true, true, true, false, true, true};
-  vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernel(Or, values1, values2, values3, values3_nulls);
-}
-
-TEST_F(TestBooleanKernel, Xor) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {false, true, true, false, false, true};
-  vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernel(Xor, values1, values2, values3, values3_nulls);
-}
-
 class TestInvokeBinaryKernel : public ComputeFixture, public TestBase {};
 
 class DummyBinaryKernel : public BinaryKernel {
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index a5a142b..4d508aa 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -16,3 +16,7 @@
 # under the License.
 
 ARROW_INSTALL_ALL_HEADERS("arrow/compute/kernels")
+
+ADD_ARROW_TEST(boolean-test PREFIX "arrow-compute")
+ADD_ARROW_TEST(cast-test PREFIX "arrow-compute")
+ADD_ARROW_TEST(hash-test PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/kernels/boolean-test.cc b/cpp/src/arrow/compute/kernels/boolean-test.cc
new file mode 100644
index 0000000..24b3c68
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/boolean-test.cc
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/test-common.h"
+#include "arrow/test-util.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/boolean.h"
+#include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+namespace compute {
+
+using BinaryKernelFunc =
+    std::function<Status(FunctionContext*, const Datum&, const Datum&, Datum* out)>;
+
+class TestBooleanKernel : public ComputeFixture, public TestBase {
+ public:
+  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
+                       const std::shared_ptr<Array>& right,
+                       const std::shared_ptr<Array>& expected) {
+    Datum result;
+    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
+    ASSERT_EQ(Datum::ARRAY, result.kind());
+    std::shared_ptr<Array> result_array = result.make_array();
+    ASSERT_TRUE(result_array->Equals(expected));
+  }
+
+  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
+                              const std::shared_ptr<ChunkedArray>& left,
+                              const std::shared_ptr<ChunkedArray>& right,
+                              const std::shared_ptr<ChunkedArray>& expected) {
+    Datum result;
+    std::shared_ptr<Array> result_array;
+    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
+    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
+    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
+    ASSERT_TRUE(result_ca->Equals(expected));
+  }
+
+  void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector<bool>& values1,
+                        const std::vector<bool>& values2,
+                        const std::vector<bool>& values3,
+                        const std::vector<bool>& values3_nulls) {
+    auto type = boolean();
+    auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+    auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+    auto a3 = _MakeArray<BooleanType, bool>(type, values3, {});
+    auto a1_nulls = _MakeArray<BooleanType, bool>(type, values1, values1);
+    auto a2_nulls = _MakeArray<BooleanType, bool>(type, values2, values2);
+    auto a3_nulls = _MakeArray<BooleanType, bool>(type, values3, values3_nulls);
+
+    TestArrayBinary(kernel, a1, a2, a3);
+    TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls);
+    TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1));
+    TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1));
+
+    // ChunkedArray
+    std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
+    auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
+    std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
+    auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
+    std::vector<std::shared_ptr<Array>> ca3_arrs = {a3, a3->Slice(1)};
+    auto ca3 = std::make_shared<ChunkedArray>(ca3_arrs);
+    TestChunkedArrayBinary(kernel, ca1, ca2, ca3);
+
+    // ChunkedArray with different chunks
+    std::vector<std::shared_ptr<Array>> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1),
+                                                    a1->Slice(1, 1), a1->Slice(2)};
+    auto ca4 = std::make_shared<ChunkedArray>(ca4_arrs);
+    TestChunkedArrayBinary(kernel, ca4, ca2, ca3);
+  }
+};
+
+TEST_F(TestBooleanKernel, Invert) {
+  vector<bool> values1 = {true, false, true};
+  vector<bool> values2 = {false, true, false};
+
+  auto type = boolean();
+  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
+  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
+
+  // Plain array
+  Datum result;
+  ASSERT_OK(Invert(&this->ctx_, a1, &result));
+  ASSERT_EQ(Datum::ARRAY, result.kind());
+  std::shared_ptr<Array> result_array = result.make_array();
+  ASSERT_TRUE(result_array->Equals(a2));
+
+  // Array with offset
+  ASSERT_OK(Invert(&this->ctx_, a1->Slice(1), &result));
+  ASSERT_EQ(Datum::ARRAY, result.kind());
+  result_array = result.make_array();
+  ASSERT_TRUE(result_array->Equals(a2->Slice(1)));
+
+  // ChunkedArray
+  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
+  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
+  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
+  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
+  ASSERT_OK(Invert(&this->ctx_, ca1, &result));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
+  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
+  ASSERT_TRUE(result_ca->Equals(ca2));
+}
+
+TEST_F(TestBooleanKernel, And) {
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {true, false, false, false, true, false};
+  TestBinaryKernel(And, values1, values2, values3, values3);
+}
+
+TEST_F(TestBooleanKernel, Or) {
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {true, true, true, false, true, true};
+  vector<bool> values3_nulls = {true, false, false, false, true, false};
+  TestBinaryKernel(Or, values1, values2, values3, values3_nulls);
+}
+
+TEST_F(TestBooleanKernel, Xor) {
+  vector<bool> values1 = {true, false, true, false, true, true};
+  vector<bool> values2 = {true, true, false, false, true, false};
+  vector<bool> values3 = {false, true, true, false, false, true};
+  vector<bool> values3_nulls = {true, false, false, false, true, false};
+  TestBinaryKernel(Xor, values1, values2, values3, values3_nulls);
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
similarity index 71%
copy from cpp/src/arrow/compute/compute-test.cc
copy to cpp/src/arrow/compute/kernels/cast-test.cc
index e34a086..4c39928 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -34,15 +34,16 @@
 #include "arrow/test-common.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
-#include "arrow/compute/kernels/boolean.h"
 #include "arrow/compute/kernels/cast.h"
 #include "arrow/compute/kernels/hash.h"
 #include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
 
 using std::shared_ptr;
 using std::vector;
@@ -50,50 +51,6 @@ using std::vector;
 namespace arrow {
 namespace compute {
 
-class ComputeFixture {
- public:
-  ComputeFixture() : ctx_(default_memory_pool()) {}
-
- protected:
-  FunctionContext ctx_;
-};
-
-template <typename Type, typename T>
-shared_ptr<Array> _MakeArray(const shared_ptr<DataType>& type, const vector<T>& values,
-                             const vector<bool>& is_valid) {
-  shared_ptr<Array> result;
-  if (is_valid.size() > 0) {
-    ArrayFromVector<Type, T>(type, is_valid, values, &result);
-  } else {
-    ArrayFromVector<Type, T>(type, values, &result);
-  }
-  return result;
-}
-
-// ----------------------------------------------------------------------
-// Datum
-
-template <typename T>
-void CheckImplicitConstructor(enum Datum::type expected_kind) {
-  std::shared_ptr<T> value;
-  Datum datum = value;
-  ASSERT_EQ(expected_kind, datum.kind());
-}
-
-TEST(TestDatum, ImplicitConstructors) {
-  CheckImplicitConstructor<Array>(Datum::ARRAY);
-
-  // Instantiate from array subclass
-  CheckImplicitConstructor<BinaryArray>(Datum::ARRAY);
-
-  CheckImplicitConstructor<ChunkedArray>(Datum::CHUNKED_ARRAY);
-  CheckImplicitConstructor<RecordBatch>(Datum::RECORD_BATCH);
-  CheckImplicitConstructor<Table>(Datum::TABLE);
-}
-
-// ----------------------------------------------------------------------
-// Cast
-
 static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
   ASSERT_EQ(left.data()->buffers[buffer_index].get(),
             right.data()->buffers[buffer_index].get());
@@ -1110,6 +1067,31 @@ TEST_F(TestCast, StringToTimestampErrors) {
   }
 }
 
+constexpr const char* kInvalidUtf8 = "\xa0\xa1";
+
+TEST_F(TestCast, BinaryToString) {
+  CastOptions options;
+
+  // All valid except the last one
+  vector<bool> all = {1, 1, 1, 1, 1};
+  vector<bool> valid = {1, 1, 1, 1, 0};
+  vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
+
+  std::shared_ptr<Array> array;
+
+  // Should accept when invalid but null.
+  ArrayFromVector<BinaryType, std::string>(binary(), valid, strings, &array);
+  CheckZeroCopy(*array, utf8());
+
+  // Should refuse due to invalid utf8 payload
+  CheckFails<BinaryType, std::string>(binary(), strings, all, utf8(), options);
+
+  // Should accept due to option override
+  options.allow_invalid_utf8 = true;
+  CheckCase<BinaryType, std::string, StringType, std::string>(binary(), strings, all,
+                                                              utf8(), strings, options);
+}
+
 template <typename TestType>
 class TestDictionaryCast : public TestCast {};
 
@@ -1211,443 +1193,5 @@ TEST_F(TestCast, ListToList) {
   CheckPass(*float64_list_array, *int64_list_array, int64_list_array->type(), options);
 }
 
-// ----------------------------------------------------------------------
-// Dictionary tests
-
-template <typename Type, typename T>
-void CheckUnique(FunctionContext* ctx, const shared_ptr<DataType>& type,
-                 const vector<T>& in_values, const vector<bool>& in_is_valid,
-                 const vector<T>& out_values, const vector<bool>& out_is_valid) {
-  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
-  shared_ptr<Array> expected = _MakeArray<Type, T>(type, out_values, out_is_valid);
-
-  shared_ptr<Array> result;
-  ASSERT_OK(Unique(ctx, input, &result));
-  ASSERT_ARRAYS_EQUAL(*expected, *result);
-}
-
-template <typename Type, typename T>
-void CheckDictEncode(FunctionContext* ctx, const shared_ptr<DataType>& type,
-                     const vector<T>& in_values, const vector<bool>& in_is_valid,
-                     const vector<T>& out_values, const vector<bool>& out_is_valid,
-                     const vector<int32_t>& out_indices) {
-  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
-  shared_ptr<Array> ex_dict = _MakeArray<Type, T>(type, out_values, out_is_valid);
-  shared_ptr<Array> ex_indices =
-      _MakeArray<Int32Type, int32_t>(int32(), out_indices, in_is_valid);
-
-  DictionaryArray expected(dictionary(int32(), ex_dict), ex_indices);
-
-  Datum datum_out;
-  ASSERT_OK(DictionaryEncode(ctx, input, &datum_out));
-  shared_ptr<Array> result = MakeArray(datum_out.array());
-
-  ASSERT_ARRAYS_EQUAL(expected, *result);
-}
-
-class TestHashKernel : public ComputeFixture, public TestBase {};
-
-template <typename Type>
-class TestHashKernelPrimitive : public ComputeFixture, public TestBase {};
-
-typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
-                         UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType,
-                         Date32Type, Date64Type>
-    PrimitiveDictionaries;
-
-TYPED_TEST_CASE(TestHashKernelPrimitive, PrimitiveDictionaries);
-
-TYPED_TEST(TestHashKernelPrimitive, Unique) {
-  using T = typename TypeParam::c_type;
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1}, {true, false, true, true},
-                            {2, 1}, {});
-  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 3, 1}, {false, false, true, true},
-                            {3, 1}, {});
-}
-
-TYPED_TEST(TestHashKernelPrimitive, DictEncode) {
-  using T = typename TypeParam::c_type;
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckDictEncode<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1, 2, 3},
-                                {true, false, true, true, true, true}, {2, 1, 3}, {},
-                                {0, 0, 0, 1, 0, 2});
-}
-
-TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) {
-  using T = typename TypeParam::c_type;
-  // Skip this test for (u)int8
-  if (sizeof(Scalar) == 1) {
-    return;
-  }
-
-  const int64_t kTotalValues = 1000000;
-  const int64_t kRepeats = 5;
-
-  vector<T> values;
-  vector<T> uniques;
-  vector<int32_t> indices;
-  for (int64_t i = 0; i < kTotalValues * kRepeats; i++) {
-    const auto val = static_cast<T>(i % kTotalValues);
-    values.push_back(val);
-
-    if (i < kTotalValues) {
-      uniques.push_back(val);
-    }
-    indices.push_back(static_cast<int32_t>(i % kTotalValues));
-  }
-
-  auto type = TypeTraits<TypeParam>::type_singleton();
-  CheckUnique<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {});
-
-  CheckDictEncode<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {}, indices);
-}
-
-TEST_F(TestHashKernel, UniqueTimeTimestamp) {
-  CheckUnique<Time32Type, int32_t>(&this->ctx_, time32(TimeUnit::SECOND), {2, 1, 2, 1},
-                                   {true, false, true, true}, {2, 1}, {});
-
-  CheckUnique<Time64Type, int64_t>(&this->ctx_, time64(TimeUnit::NANO), {2, 1, 2, 1},
-                                   {true, false, true, true}, {2, 1}, {});
-
-  CheckUnique<TimestampType, int64_t>(&this->ctx_, timestamp(TimeUnit::NANO),
-                                      {2, 1, 2, 1}, {true, false, true, true}, {2, 1},
-                                      {});
-}
-
-TEST_F(TestHashKernel, UniqueBoolean) {
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true},
-                                 {true, false, true, true}, {true, false}, {});
-
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true},
-                                 {true, false, true, true}, {false, true}, {});
-
-  // No nulls
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true}, {},
-                                 {true, false}, {});
-
-  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true}, {},
-                                 {false, true}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeBoolean) {
-  CheckDictEncode<BooleanType, bool>(
-      &this->ctx_, boolean(), {true, true, false, true, false},
-      {true, false, true, true, true}, {true, false}, {}, {0, 0, 1, 0, 1});
-
-  CheckDictEncode<BooleanType, bool>(
-      &this->ctx_, boolean(), {false, true, false, true, false},
-      {true, false, true, true, true}, {false, true}, {}, {0, 0, 0, 1, 0});
-
-  // No nulls
-  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
-                                     {true, true, false, true, false}, {}, {true, false},
-                                     {}, {0, 0, 1, 0, 1});
-
-  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
-                                     {false, true, false, true, false}, {}, {false, true},
-                                     {}, {0, 1, 0, 1, 0});
-}
-
-TEST_F(TestHashKernel, UniqueBinary) {
-  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(),
-                                       {"test", "", "test2", "test"},
-                                       {true, false, true, true}, {"test", "test2"}, {});
-
-  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), {"test", "", "test2", "test"},
-                                       {true, false, true, true}, {"test", "test2"}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeBinary) {
-  CheckDictEncode<BinaryType, std::string>(
-      &this->ctx_, binary(), {"test", "", "test2", "test", "baz"},
-      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
-
-  CheckDictEncode<StringType, std::string>(
-      &this->ctx_, utf8(), {"test", "", "test2", "test", "baz"},
-      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
-}
-
-TEST_F(TestHashKernel, BinaryResizeTable) {
-  const int32_t kTotalValues = 10000;
-#if !defined(ARROW_VALGRIND)
-  const int32_t kRepeats = 10;
-#else
-  // Mitigate Valgrind's slowness
-  const int32_t kRepeats = 3;
-#endif
-
-  vector<std::string> values;
-  vector<std::string> uniques;
-  vector<int32_t> indices;
-  char buf[20] = "test";
-
-  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
-    int32_t index = i % kTotalValues;
-
-    ASSERT_GE(snprintf(buf + 4, sizeof(buf) - 4, "%d", index), 0);
-    values.emplace_back(buf);
-
-    if (i < kTotalValues) {
-      uniques.push_back(values.back());
-    }
-    indices.push_back(index);
-  }
-
-  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {});
-  CheckDictEncode<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {},
-                                           indices);
-
-  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {});
-  CheckDictEncode<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {},
-                                           indices);
-}
-
-TEST_F(TestHashKernel, UniqueFixedSizeBinary) {
-  CheckUnique<FixedSizeBinaryType, std::string>(
-      &this->ctx_, fixed_size_binary(5), {"aaaaa", "", "bbbbb", "aaaaa"},
-      {true, false, true, true}, {"aaaaa", "bbbbb"}, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeFixedSizeBinary) {
-  CheckDictEncode<FixedSizeBinaryType, std::string>(
-      &this->ctx_, fixed_size_binary(5), {"bbbbb", "", "bbbbb", "aaaaa", "ccccc"},
-      {true, false, true, true, true}, {"bbbbb", "aaaaa", "ccccc"}, {}, {0, 0, 0, 1, 2});
-}
-
-TEST_F(TestHashKernel, FixedSizeBinaryResizeTable) {
-  const int32_t kTotalValues = 10000;
-#if !defined(ARROW_VALGRIND)
-  const int32_t kRepeats = 10;
-#else
-  // Mitigate Valgrind's slowness
-  const int32_t kRepeats = 3;
-#endif
-
-  vector<std::string> values;
-  vector<std::string> uniques;
-  vector<int32_t> indices;
-  char buf[7] = "test..";
-
-  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
-    int32_t index = i % kTotalValues;
-
-    buf[4] = static_cast<char>(index / 128);
-    buf[5] = static_cast<char>(index % 128);
-    values.emplace_back(buf, 6);
-
-    if (i < kTotalValues) {
-      uniques.push_back(values.back());
-    }
-    indices.push_back(index);
-  }
-
-  auto type = fixed_size_binary(6);
-  CheckUnique<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {}, uniques,
-                                                {});
-  CheckDictEncode<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {},
-                                                    uniques, {}, indices);
-}
-
-TEST_F(TestHashKernel, UniqueDecimal) {
-  vector<Decimal128> values{12, 12, 11, 12};
-  vector<Decimal128> expected{12, 11};
-
-  CheckUnique<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
-                                          {true, false, true, true}, expected, {});
-}
-
-TEST_F(TestHashKernel, DictEncodeDecimal) {
-  vector<Decimal128> values{12, 12, 11, 12, 13};
-  vector<Decimal128> expected{12, 11, 13};
-
-  CheckDictEncode<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
-                                              {true, false, true, true, true}, expected,
-                                              {}, {0, 0, 1, 0, 2});
-}
-
-TEST_F(TestHashKernel, ChunkedArrayInvoke) {
-  vector<std::string> values1 = {"foo", "bar", "foo"};
-  vector<std::string> values2 = {"bar", "baz", "quuux", "foo"};
-
-  auto type = utf8();
-  auto a1 = _MakeArray<StringType, std::string>(type, values1, {});
-  auto a2 = _MakeArray<StringType, std::string>(type, values2, {});
-
-  vector<std::string> dict_values = {"foo", "bar", "baz", "quuux"};
-  auto ex_dict = _MakeArray<StringType, std::string>(type, dict_values, {});
-
-  ArrayVector arrays = {a1, a2};
-  auto carr = std::make_shared<ChunkedArray>(arrays);
-
-  // Unique
-  shared_ptr<Array> result;
-  ASSERT_OK(Unique(&this->ctx_, carr, &result));
-  ASSERT_ARRAYS_EQUAL(*ex_dict, *result);
-
-  // Dictionary encode
-  auto dict_type = dictionary(int32(), ex_dict);
-
-  auto i1 = _MakeArray<Int32Type, int32_t>(int32(), {0, 1, 0}, {});
-  auto i2 = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 0}, {});
-
-  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, i1),
-                             std::make_shared<DictionaryArray>(dict_type, i2)};
-  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
-  Datum encoded_out;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, carr, &encoded_out));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, encoded_out.kind());
-
-  AssertChunkedEqual(*dict_carr, *encoded_out.chunked_array());
-}
-
-using BinaryKernelFunc =
-    std::function<Status(FunctionContext*, const Datum&, const Datum&, Datum* out)>;
-
-class TestBooleanKernel : public ComputeFixture, public TestBase {
- public:
-  void TestArrayBinary(const BinaryKernelFunc& kernel, const std::shared_ptr<Array>& left,
-                       const std::shared_ptr<Array>& right,
-                       const std::shared_ptr<Array>& expected) {
-    Datum result;
-    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
-    ASSERT_EQ(Datum::ARRAY, result.kind());
-    std::shared_ptr<Array> result_array = result.make_array();
-    ASSERT_TRUE(result_array->Equals(expected));
-  }
-
-  void TestChunkedArrayBinary(const BinaryKernelFunc& kernel,
-                              const std::shared_ptr<ChunkedArray>& left,
-                              const std::shared_ptr<ChunkedArray>& right,
-                              const std::shared_ptr<ChunkedArray>& expected) {
-    Datum result;
-    std::shared_ptr<Array> result_array;
-    ASSERT_OK(kernel(&this->ctx_, left, right, &result));
-    ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-    std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-    ASSERT_TRUE(result_ca->Equals(expected));
-  }
-
-  void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector<bool>& values1,
-                        const std::vector<bool>& values2,
-                        const std::vector<bool>& values3,
-                        const std::vector<bool>& values3_nulls) {
-    auto type = boolean();
-    auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-    auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-    auto a3 = _MakeArray<BooleanType, bool>(type, values3, {});
-    auto a1_nulls = _MakeArray<BooleanType, bool>(type, values1, values1);
-    auto a2_nulls = _MakeArray<BooleanType, bool>(type, values2, values2);
-    auto a3_nulls = _MakeArray<BooleanType, bool>(type, values3, values3_nulls);
-
-    TestArrayBinary(kernel, a1, a2, a3);
-    TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls);
-    TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1));
-    TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1));
-
-    // ChunkedArray
-    std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-    auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-    std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-    auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-    std::vector<std::shared_ptr<Array>> ca3_arrs = {a3, a3->Slice(1)};
-    auto ca3 = std::make_shared<ChunkedArray>(ca3_arrs);
-    TestChunkedArrayBinary(kernel, ca1, ca2, ca3);
-
-    // ChunkedArray with different chunks
-    std::vector<std::shared_ptr<Array>> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1),
-                                                    a1->Slice(1, 1), a1->Slice(2)};
-    auto ca4 = std::make_shared<ChunkedArray>(ca4_arrs);
-    TestChunkedArrayBinary(kernel, ca4, ca2, ca3);
-  }
-};
-
-TEST_F(TestBooleanKernel, Invert) {
-  vector<bool> values1 = {true, false, true};
-  vector<bool> values2 = {false, true, false};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-
-  // Plain array
-  Datum result;
-  ASSERT_OK(Invert(&this->ctx_, a1, &result));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  std::shared_ptr<Array> result_array = result.make_array();
-  ASSERT_TRUE(result_array->Equals(a2));
-
-  // Array with offset
-  ASSERT_OK(Invert(&this->ctx_, a1->Slice(1), &result));
-  ASSERT_EQ(Datum::ARRAY, result.kind());
-  result_array = result.make_array();
-  ASSERT_TRUE(result_array->Equals(a2->Slice(1)));
-
-  // ChunkedArray
-  std::vector<std::shared_ptr<Array>> ca1_arrs = {a1, a1->Slice(1)};
-  auto ca1 = std::make_shared<ChunkedArray>(ca1_arrs);
-  std::vector<std::shared_ptr<Array>> ca2_arrs = {a2, a2->Slice(1)};
-  auto ca2 = std::make_shared<ChunkedArray>(ca2_arrs);
-  ASSERT_OK(Invert(&this->ctx_, ca1, &result));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
-  std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-  ASSERT_TRUE(result_ca->Equals(ca2));
-}
-
-TEST_F(TestBooleanKernel, And) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {true, false, false, false, true, false};
-  TestBinaryKernel(And, values1, values2, values3, values3);
-}
-
-TEST_F(TestBooleanKernel, Or) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {true, true, true, false, true, true};
-  vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernel(Or, values1, values2, values3, values3_nulls);
-}
-
-TEST_F(TestBooleanKernel, Xor) {
-  vector<bool> values1 = {true, false, true, false, true, true};
-  vector<bool> values2 = {true, true, false, false, true, false};
-  vector<bool> values3 = {false, true, true, false, false, true};
-  vector<bool> values3_nulls = {true, false, false, false, true, false};
-  TestBinaryKernel(Xor, values1, values2, values3, values3_nulls);
-}
-
-class TestInvokeBinaryKernel : public ComputeFixture, public TestBase {};
-
-class DummyBinaryKernel : public BinaryKernel {
-  Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
-              Datum* out) override {
-    return Status::OK();
-  }
-};
-
-TEST_F(TestInvokeBinaryKernel, Exceptions) {
-  DummyBinaryKernel kernel;
-  std::vector<Datum> outputs;
-  std::shared_ptr<Table> table;
-  vector<bool> values1 = {true, false, true};
-  vector<bool> values2 = {false, true, false};
-
-  auto type = boolean();
-  auto a1 = _MakeArray<BooleanType, bool>(type, values1, {});
-  auto a2 = _MakeArray<BooleanType, bool>(type, values2, {});
-
-  // Left is not an array-like
-  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, table, a2,
-                                                         &outputs));
-  // Right is not an array-like
-  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, a1, table,
-                                                         &outputs));
-  // Different sized inputs
-  ASSERT_RAISES(Invalid, detail::InvokeBinaryArrayKernel(&this->ctx_, &kernel, a1,
-                                                         a1->Slice(1), &outputs));
-}
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc
index 4f7d7f8..b148486 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -37,6 +37,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/parsing.h"  // IWYU pragma: keep
+#include "arrow/util/utf8.h"
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
@@ -77,6 +78,19 @@ namespace compute {
 
 constexpr int64_t kMillisecondsInDay = 86400000;
 
+template <typename O, typename I, typename Enable = void>
+struct is_binary_to_string {
+  static constexpr bool value = false;
+};
+
+template <typename O, typename I>
+struct is_binary_to_string<
+    O, I,
+    typename std::enable_if<std::is_same<BinaryType, I>::value &&
+                            std::is_base_of<StringType, O>::value>::type> {
+  static constexpr bool value = true;
+};
+
 // ----------------------------------------------------------------------
 // Zero copy casts
 
@@ -112,15 +126,30 @@ struct is_zero_copy_cast<
   static constexpr bool value = sizeof(O_T) == sizeof(I_T);
 };
 
+// Binary to String doesn't require copying, the payload only needs to be
+// validated.
+template <typename O, typename I>
+struct is_zero_copy_cast<
+    O, I,
+    typename std::enable_if<!std::is_same<I, O>::value &&
+                            is_binary_to_string<O, I>::value>::type> {
+  static constexpr bool value = true;
+};
+
 template <typename OutType, typename InType, typename Enable = void>
 struct CastFunctor {};
 
 // Indicated no computation required
+//
+// The case BinaryType -> StringType is special cased due to validation
+// requirements.
 template <typename O, typename I>
-struct CastFunctor<O, I, typename std::enable_if<is_zero_copy_cast<O, I>::value>::type> {
+struct CastFunctor<O, I,
+                   typename std::enable_if<is_zero_copy_cast<O, I>::value &&
+                                           !is_binary_to_string<O, I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
-    CopyData(input, output);
+    ZeroCopyData(input, output);
   }
 };
 
@@ -532,7 +561,7 @@ struct CastFunctor<TimestampType, TimestampType> {
     const auto& out_type = checked_cast<const TimestampType&>(*output->type);
 
     if (in_type.unit() == out_type.unit()) {
-      CopyData(input, output);
+      ZeroCopyData(input, output);
       return;
     }
 
@@ -625,7 +654,7 @@ struct CastFunctor<O, I,
     const auto& out_type = checked_cast<const O&>(*output->type);
 
     if (in_type.unit() == out_type.unit()) {
-      CopyData(input, output);
+      ZeroCopyData(input, output);
       return;
     }
 
@@ -998,7 +1027,7 @@ struct CastFunctor<TimestampType, StringType> {
         continue;
       }
 
-      auto str = input_array.GetView(i);
+      const auto str = input_array.GetView(i);
       if (!converter(str.data(), str.length(), out_data)) {
         std::stringstream ss;
         ss << "Failed to cast String '" << str << "' into " << output->type->ToString();
@@ -1010,6 +1039,52 @@ struct CastFunctor<TimestampType, StringType> {
 };
 
 // ----------------------------------------------------------------------
+// Binary to String
+//
+
+template <typename I>
+struct CastFunctor<
+    StringType, I,
+    typename std::enable_if<is_binary_to_string<StringType, I>::value>::type> {
+  void operator()(FunctionContext* ctx, const CastOptions& options,
+                  const ArrayData& input, ArrayData* output) {
+    BinaryArray binary(input.Copy());
+
+    if (options.allow_invalid_utf8) {
+      ZeroCopyData(input, output);
+      return;
+    }
+
+    util::InitializeUTF8();
+
+    if (binary.null_count() != 0) {
+      for (int64_t i = 0; i < input.length; i++) {
+        if (binary.IsNull(i)) {
+          continue;
+        }
+
+        const auto str = binary.GetView(i);
+        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
+          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
+          return;
+        }
+      }
+
+    } else {
+      for (int64_t i = 0; i < input.length; i++) {
+        const auto str = binary.GetView(i);
+        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
+          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
+          return;
+        }
+      }
+    }
+
+    ZeroCopyData(input, output);
+  }
+};
+
+// ----------------------------------------------------------------------
 
 typedef std::function<void(FunctionContext*, const CastOptions& options, const ArrayData&,
                            ArrayData*)>
@@ -1088,17 +1163,22 @@ class CastKernel : public UnaryKernel {
         out_type_(out_type) {}
 
   Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
-    DCHECK_EQ(Datum::ARRAY, input.kind());
+    if (input.kind() != Datum::ARRAY)
+      return Status::NotImplemented("CastKernel only supports Datum::ARRAY input");
 
     const ArrayData& in_data = *input.array();
-    ArrayData* result;
 
-    if (out->kind() == Datum::NONE) {
-      out->value = ArrayData::Make(out_type_, in_data.length);
+    switch (out->kind()) {
+      case Datum::NONE:
+        out->value = ArrayData::Make(out_type_, in_data.length);
+        break;
+      case Datum::ARRAY:
+        break;
+      default:
+        return Status::NotImplemented("CastKernel only supports Datum::ARRAY output");
     }
 
-    result = out->array().get();
-
+    ArrayData* result = out->array().get();
     if (!is_zero_copy_) {
       RETURN_NOT_OK(
           AllocateIfNotPreallocated(ctx, in_data, can_pre_allocate_values_, result));
@@ -1187,6 +1267,8 @@ class CastKernel : public UnaryKernel {
   FN(TimestampType, Date64Type);     \
   FN(TimestampType, Int64Type);
 
+#define BINARY_CASES(FN, IN_TYPE) FN(BinaryType, StringType);
+
 #define STRING_CASES(FN, IN_TYPE) \
   FN(StringType, StringType);     \
   FN(StringType, BooleanType);    \
@@ -1259,6 +1341,7 @@ GET_CAST_FUNCTION(DATE64_CASES, Date64Type);
 GET_CAST_FUNCTION(TIME32_CASES, Time32Type);
 GET_CAST_FUNCTION(TIME64_CASES, Time64Type);
 GET_CAST_FUNCTION(TIMESTAMP_CASES, TimestampType);
+GET_CAST_FUNCTION(BINARY_CASES, BinaryType);
 GET_CAST_FUNCTION(STRING_CASES, StringType);
 GET_CAST_FUNCTION(DICTIONARY_CASES, DictionaryType);
 
@@ -1307,6 +1390,7 @@ Status GetCastFunction(const DataType& in_type, const std::shared_ptr<DataType>&
     CAST_FUNCTION_CASE(Time32Type);
     CAST_FUNCTION_CASE(Time64Type);
     CAST_FUNCTION_CASE(TimestampType);
+    CAST_FUNCTION_CASE(BinaryType);
     CAST_FUNCTION_CASE(StringType);
     CAST_FUNCTION_CASE(DictionaryType);
     case Type::LIST:
diff --git a/cpp/src/arrow/compute/kernels/cast.h b/cpp/src/arrow/compute/kernels/cast.h
index 65c70bf..8c42f07 100644
--- a/cpp/src/arrow/compute/kernels/cast.h
+++ b/cpp/src/arrow/compute/kernels/cast.h
@@ -38,12 +38,14 @@ struct ARROW_EXPORT CastOptions {
   CastOptions()
       : allow_int_overflow(false),
         allow_time_truncate(false),
-        allow_float_truncate(false) {}
+        allow_float_truncate(false),
+        allow_invalid_utf8(false) {}
 
   explicit CastOptions(bool safe)
       : allow_int_overflow(!safe),
         allow_time_truncate(!safe),
-        allow_float_truncate(!safe) {}
+        allow_float_truncate(!safe),
+        allow_invalid_utf8(!safe) {}
 
   static CastOptions Safe() { return CastOptions(true); }
 
@@ -52,6 +54,9 @@ struct ARROW_EXPORT CastOptions {
   bool allow_int_overflow;
   bool allow_time_truncate;
   bool allow_float_truncate;
+  // Indicate if conversions from Binary/FixedSizeBinary to string must
+  // validate the utf8 payload.
+  bool allow_invalid_utf8;
 };
 
 /// \since 0.7.0
diff --git a/cpp/src/arrow/compute/kernels/hash-test.cc b/cpp/src/arrow/compute/kernels/hash-test.cc
new file mode 100644
index 0000000..f20575f
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/hash-test.cc
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <functional>
+#include <locale>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/test-common.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/decimal.h"
+
+#include "arrow/compute/context.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/hash.h"
+#include "arrow/compute/kernels/util-internal.h"
+#include "arrow/compute/test-util.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+namespace compute {
+
+// ----------------------------------------------------------------------
+// Dictionary tests
+
+template <typename Type, typename T>
+void CheckUnique(FunctionContext* ctx, const shared_ptr<DataType>& type,
+                 const vector<T>& in_values, const vector<bool>& in_is_valid,
+                 const vector<T>& out_values, const vector<bool>& out_is_valid) {
+  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
+  shared_ptr<Array> expected = _MakeArray<Type, T>(type, out_values, out_is_valid);
+
+  shared_ptr<Array> result;
+  ASSERT_OK(Unique(ctx, input, &result));
+  ASSERT_ARRAYS_EQUAL(*expected, *result);
+}
+
+template <typename Type, typename T>
+void CheckDictEncode(FunctionContext* ctx, const shared_ptr<DataType>& type,
+                     const vector<T>& in_values, const vector<bool>& in_is_valid,
+                     const vector<T>& out_values, const vector<bool>& out_is_valid,
+                     const vector<int32_t>& out_indices) {
+  shared_ptr<Array> input = _MakeArray<Type, T>(type, in_values, in_is_valid);
+  shared_ptr<Array> ex_dict = _MakeArray<Type, T>(type, out_values, out_is_valid);
+  shared_ptr<Array> ex_indices =
+      _MakeArray<Int32Type, int32_t>(int32(), out_indices, in_is_valid);
+
+  DictionaryArray expected(dictionary(int32(), ex_dict), ex_indices);
+
+  Datum datum_out;
+  ASSERT_OK(DictionaryEncode(ctx, input, &datum_out));
+  shared_ptr<Array> result = MakeArray(datum_out.array());
+
+  ASSERT_ARRAYS_EQUAL(expected, *result);
+}
+
+class TestHashKernel : public ComputeFixture, public TestBase {};
+
+template <typename Type>
+class TestHashKernelPrimitive : public ComputeFixture, public TestBase {};
+
+typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
+                         UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType,
+                         Date32Type, Date64Type>
+    PrimitiveDictionaries;
+
+TYPED_TEST_CASE(TestHashKernelPrimitive, PrimitiveDictionaries);
+
+TYPED_TEST(TestHashKernelPrimitive, Unique) {
+  using T = typename TypeParam::c_type;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1}, {true, false, true, true},
+                            {2, 1}, {});
+  CheckUnique<TypeParam, T>(&this->ctx_, type, {2, 1, 3, 1}, {false, false, true, true},
+                            {3, 1}, {});
+}
+
+TYPED_TEST(TestHashKernelPrimitive, DictEncode) {
+  using T = typename TypeParam::c_type;
+  auto type = TypeTraits<TypeParam>::type_singleton();
+  CheckDictEncode<TypeParam, T>(&this->ctx_, type, {2, 1, 2, 1, 2, 3},
+                                {true, false, true, true, true, true}, {2, 1, 3}, {},
+                                {0, 0, 0, 1, 0, 2});
+}
+
+TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) {
+  using T = typename TypeParam::c_type;
+  // Skip this test for (u)int8
+  if (sizeof(Scalar) == 1) {
+    return;
+  }
+
+  const int64_t kTotalValues = 1000000;
+  const int64_t kRepeats = 5;
+
+  vector<T> values;
+  vector<T> uniques;
+  vector<int32_t> indices;
+  for (int64_t i = 0; i < kTotalValues * kRepeats; i++) {
+    const auto val = static_cast<T>(i % kTotalValues);
+    values.push_back(val);
+
+    if (i < kTotalValues) {
+      uniques.push_back(val);
+    }
+    indices.push_back(static_cast<int32_t>(i % kTotalValues));
+  }
+
+  auto type = TypeTraits<TypeParam>::type_singleton();
+  CheckUnique<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {});
+
+  CheckDictEncode<TypeParam, T>(&this->ctx_, type, values, {}, uniques, {}, indices);
+}
+
+TEST_F(TestHashKernel, UniqueTimeTimestamp) {
+  CheckUnique<Time32Type, int32_t>(&this->ctx_, time32(TimeUnit::SECOND), {2, 1, 2, 1},
+                                   {true, false, true, true}, {2, 1}, {});
+
+  CheckUnique<Time64Type, int64_t>(&this->ctx_, time64(TimeUnit::NANO), {2, 1, 2, 1},
+                                   {true, false, true, true}, {2, 1}, {});
+
+  CheckUnique<TimestampType, int64_t>(&this->ctx_, timestamp(TimeUnit::NANO),
+                                      {2, 1, 2, 1}, {true, false, true, true}, {2, 1},
+                                      {});
+}
+
+TEST_F(TestHashKernel, UniqueBoolean) {
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true},
+                                 {true, false, true, true}, {true, false}, {});
+
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true},
+                                 {true, false, true, true}, {false, true}, {});
+
+  // No nulls
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {true, true, false, true}, {},
+                                 {true, false}, {});
+
+  CheckUnique<BooleanType, bool>(&this->ctx_, boolean(), {false, true, false, true}, {},
+                                 {false, true}, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeBoolean) {
+  CheckDictEncode<BooleanType, bool>(
+      &this->ctx_, boolean(), {true, true, false, true, false},
+      {true, false, true, true, true}, {true, false}, {}, {0, 0, 1, 0, 1});
+
+  CheckDictEncode<BooleanType, bool>(
+      &this->ctx_, boolean(), {false, true, false, true, false},
+      {true, false, true, true, true}, {false, true}, {}, {0, 0, 0, 1, 0});
+
+  // No nulls
+  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
+                                     {true, true, false, true, false}, {}, {true, false},
+                                     {}, {0, 0, 1, 0, 1});
+
+  CheckDictEncode<BooleanType, bool>(&this->ctx_, boolean(),
+                                     {false, true, false, true, false}, {}, {false, true},
+                                     {}, {0, 1, 0, 1, 0});
+}
+
+TEST_F(TestHashKernel, UniqueBinary) {
+  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(),
+                                       {"test", "", "test2", "test"},
+                                       {true, false, true, true}, {"test", "test2"}, {});
+
+  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), {"test", "", "test2", "test"},
+                                       {true, false, true, true}, {"test", "test2"}, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeBinary) {
+  CheckDictEncode<BinaryType, std::string>(
+      &this->ctx_, binary(), {"test", "", "test2", "test", "baz"},
+      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
+
+  CheckDictEncode<StringType, std::string>(
+      &this->ctx_, utf8(), {"test", "", "test2", "test", "baz"},
+      {true, false, true, true, true}, {"test", "test2", "baz"}, {}, {0, 0, 1, 0, 2});
+}
+
+TEST_F(TestHashKernel, BinaryResizeTable) {
+  const int32_t kTotalValues = 10000;
+#if !defined(ARROW_VALGRIND)
+  const int32_t kRepeats = 10;
+#else
+  // Mitigate Valgrind's slowness
+  const int32_t kRepeats = 3;
+#endif
+
+  vector<std::string> values;
+  vector<std::string> uniques;
+  vector<int32_t> indices;
+  char buf[20] = "test";
+
+  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
+    int32_t index = i % kTotalValues;
+
+    ASSERT_GE(snprintf(buf + 4, sizeof(buf) - 4, "%d", index), 0);
+    values.emplace_back(buf);
+
+    if (i < kTotalValues) {
+      uniques.push_back(values.back());
+    }
+    indices.push_back(index);
+  }
+
+  CheckUnique<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {});
+  CheckDictEncode<BinaryType, std::string>(&this->ctx_, binary(), values, {}, uniques, {},
+                                           indices);
+
+  CheckUnique<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {});
+  CheckDictEncode<StringType, std::string>(&this->ctx_, utf8(), values, {}, uniques, {},
+                                           indices);
+}
+
+TEST_F(TestHashKernel, UniqueFixedSizeBinary) {
+  CheckUnique<FixedSizeBinaryType, std::string>(
+      &this->ctx_, fixed_size_binary(5), {"aaaaa", "", "bbbbb", "aaaaa"},
+      {true, false, true, true}, {"aaaaa", "bbbbb"}, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeFixedSizeBinary) {
+  CheckDictEncode<FixedSizeBinaryType, std::string>(
+      &this->ctx_, fixed_size_binary(5), {"bbbbb", "", "bbbbb", "aaaaa", "ccccc"},
+      {true, false, true, true, true}, {"bbbbb", "aaaaa", "ccccc"}, {}, {0, 0, 0, 1, 2});
+}
+
+TEST_F(TestHashKernel, FixedSizeBinaryResizeTable) {
+  const int32_t kTotalValues = 10000;
+#if !defined(ARROW_VALGRIND)
+  const int32_t kRepeats = 10;
+#else
+  // Mitigate Valgrind's slowness
+  const int32_t kRepeats = 3;
+#endif
+
+  vector<std::string> values;
+  vector<std::string> uniques;
+  vector<int32_t> indices;
+  char buf[7] = "test..";
+
+  for (int32_t i = 0; i < kTotalValues * kRepeats; i++) {
+    int32_t index = i % kTotalValues;
+
+    buf[4] = static_cast<char>(index / 128);
+    buf[5] = static_cast<char>(index % 128);
+    values.emplace_back(buf, 6);
+
+    if (i < kTotalValues) {
+      uniques.push_back(values.back());
+    }
+    indices.push_back(index);
+  }
+
+  auto type = fixed_size_binary(6);
+  CheckUnique<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {}, uniques,
+                                                {});
+  CheckDictEncode<FixedSizeBinaryType, std::string>(&this->ctx_, type, values, {},
+                                                    uniques, {}, indices);
+}
+
+TEST_F(TestHashKernel, UniqueDecimal) {
+  vector<Decimal128> values{12, 12, 11, 12};
+  vector<Decimal128> expected{12, 11};
+
+  CheckUnique<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
+                                          {true, false, true, true}, expected, {});
+}
+
+TEST_F(TestHashKernel, DictEncodeDecimal) {
+  vector<Decimal128> values{12, 12, 11, 12, 13};
+  vector<Decimal128> expected{12, 11, 13};
+
+  CheckDictEncode<Decimal128Type, Decimal128>(&this->ctx_, decimal(2, 0), values,
+                                              {true, false, true, true, true}, expected,
+                                              {}, {0, 0, 1, 0, 2});
+}
+
+TEST_F(TestHashKernel, ChunkedArrayInvoke) {
+  vector<std::string> values1 = {"foo", "bar", "foo"};
+  vector<std::string> values2 = {"bar", "baz", "quuux", "foo"};
+
+  auto type = utf8();
+  auto a1 = _MakeArray<StringType, std::string>(type, values1, {});
+  auto a2 = _MakeArray<StringType, std::string>(type, values2, {});
+
+  vector<std::string> dict_values = {"foo", "bar", "baz", "quuux"};
+  auto ex_dict = _MakeArray<StringType, std::string>(type, dict_values, {});
+
+  ArrayVector arrays = {a1, a2};
+  auto carr = std::make_shared<ChunkedArray>(arrays);
+
+  // Unique
+  shared_ptr<Array> result;
+  ASSERT_OK(Unique(&this->ctx_, carr, &result));
+  ASSERT_ARRAYS_EQUAL(*ex_dict, *result);
+
+  // Dictionary encode
+  auto dict_type = dictionary(int32(), ex_dict);
+
+  auto i1 = _MakeArray<Int32Type, int32_t>(int32(), {0, 1, 0}, {});
+  auto i2 = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 0}, {});
+
+  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, i1),
+                             std::make_shared<DictionaryArray>(dict_type, i2)};
+  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
+
+  Datum encoded_out;
+  ASSERT_OK(DictionaryEncode(&this->ctx_, carr, &encoded_out));
+  ASSERT_EQ(Datum::CHUNKED_ARRAY, encoded_out.kind());
+
+  AssertChunkedEqual(*dict_carr, *encoded_out.chunked_array());
+}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/util-internal.h b/cpp/src/arrow/compute/kernels/util-internal.h
index 23ed4fd..d71e36d 100644
--- a/cpp/src/arrow/compute/kernels/util-internal.h
+++ b/cpp/src/arrow/compute/kernels/util-internal.h
@@ -32,7 +32,9 @@ namespace compute {
 
 class FunctionContext;
 
-static inline void CopyData(const ArrayData& input, ArrayData* output) {
+// \brief Make a copy of the buffers into a destination array without carrying
+// the type.
+static inline void ZeroCopyData(const ArrayData& input, ArrayData* output) {
   output->length = input.length;
   output->null_count = input.null_count;
   output->buffers = input.buffers;
diff --git a/cpp/src/arrow/compute/test-util.h b/cpp/src/arrow/compute/test-util.h
new file mode 100644
index 0000000..e2bda69
--- /dev/null
+++ b/cpp/src/arrow/compute/test-util.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_COMPUTE_TEST_UTIL_H
+#define ARROW_COMPUTE_TEST_UTIL_H
+
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/memory_pool.h"
+#include "arrow/type.h"
+
+#include "arrow/compute/context.h"
+
+namespace arrow {
+namespace compute {
+
+class ComputeFixture {
+ public:
+  ComputeFixture() : ctx_(default_memory_pool()) {}
+
+ protected:
+  FunctionContext ctx_;
+};
+
+template <typename Type, typename T>
+std::shared_ptr<Array> _MakeArray(const std::shared_ptr<DataType>& type,
+                                  const std::vector<T>& values,
+                                  const std::vector<bool>& is_valid) {
+  std::shared_ptr<Array> result;
+  if (is_valid.size() > 0) {
+    ArrayFromVector<Type, T>(type, is_valid, values, &result);
+  } else {
+    ArrayFromVector<Type, T>(type, values, &result);
+  }
+  return result;
+}
+
+}  // namespace compute
+}  // namespace arrow
+
+#endif
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index f5a18be..072c218 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -24,6 +24,7 @@
 #include <memory>
 
 #include "arrow/util/macros.h"
+#include "arrow/util/string_view.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -157,6 +158,13 @@ inline bool ValidateUTF8(const uint8_t* data, int64_t size) {
   return ARROW_PREDICT_TRUE(state == internal::kUTF8ValidateAccept);
 }
 
+inline bool ValidateUTF8(const util::string_view& str) {
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(str.data());
+  const size_t length = str.size();
+
+  return ValidateUTF8(data, length);
+}
+
 }  // namespace util
 }  // namespace arrow