You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/03 22:55:26 UTC

[arrow] branch master updated: ARROW-3767: [C++] Add cast from null to any other type

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 982f341  ARROW-3767: [C++] Add cast from null to any other type
982f341 is described below

commit 982f341bc81e1e22d4b25f8cf00ef882a34766b6
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Fri May 3 17:55:11 2019 -0500

    ARROW-3767: [C++] Add cast from null to any other type
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #4196 from pitrou/ARROW-3767-cast-null-to-any and squashes the following commits:
    
    f4d269d0c <Antoine Pitrou> Fix list and struct cases
    0327b6033 <Antoine Pitrou> ARROW-3767:  Add cast from null to any other type
---
 cpp/src/arrow/array-binary-test.cc                 | 140 ++++++---------
 cpp/src/arrow/array-list-test.cc                   |  23 +++
 cpp/src/arrow/array.cc                             |  46 ++---
 cpp/src/arrow/array/builder_adaptive.h             |   4 +-
 cpp/src/arrow/array/builder_base.cc                |   6 +
 cpp/src/arrow/array/builder_base.h                 |  26 ++-
 cpp/src/arrow/array/builder_binary.cc              |   7 +
 cpp/src/arrow/array/builder_binary.h               |  20 ++-
 cpp/src/arrow/array/builder_dict.cc                |  15 ++
 cpp/src/arrow/array/builder_dict.h                 |   8 +-
 cpp/src/arrow/array/builder_nested.cc              |  25 ++-
 cpp/src/arrow/array/builder_nested.h               |  10 +-
 cpp/src/arrow/array/builder_primitive.h            |  12 +-
 cpp/src/arrow/array/builder_union.h                |  13 +-
 cpp/src/arrow/compute/kernels/cast-test.cc         | 196 +++++++++++----------
 cpp/src/arrow/compute/kernels/cast.cc              |  87 +++++++--
 .../kernels/generated/cast-codegen-internal.h      |  18 --
 cpp/src/arrow/compute/kernels/generated/codegen.py |   6 +-
 python/pyarrow/tests/test_array.py                 |  48 ++++-
 19 files changed, 444 insertions(+), 266 deletions(-)

diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc
index daf859e..227f74b 100644
--- a/cpp/src/arrow/array-binary-test.cc
+++ b/cpp/src/arrow/array-binary-test.cc
@@ -34,6 +34,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/string_view.h"
 
 namespace arrow {
 
@@ -42,6 +43,30 @@ using internal::checked_cast;
 // ----------------------------------------------------------------------
 // String / Binary tests
 
+template <typename ArrayType>
+void CheckStringArray(const ArrayType& array, const std::vector<std::string>& strings,
+                      const std::vector<uint8_t>& is_valid, int repeats = 1) {
+  int64_t length = array.length();
+  int64_t base_length = static_cast<int64_t>(strings.size());
+  ASSERT_EQ(base_length, static_cast<int64_t>(is_valid.size()));
+  ASSERT_EQ(base_length * repeats, length);
+
+  int32_t value_pos = 0;
+  for (int i = 0; i < length; ++i) {
+    auto j = i % base_length;
+    if (is_valid[j]) {
+      ASSERT_FALSE(array.IsNull(i));
+      auto view = array.GetView(i);
+      ASSERT_EQ(value_pos, array.value_offset(i));
+      ASSERT_EQ(strings[j].size(), view.size());
+      ASSERT_EQ(util::string_view(strings[j]), view);
+      value_pos += static_cast<int32_t>(view.size());
+    } else {
+      ASSERT_TRUE(array.IsNull(i));
+    }
+  }
+}
+
 class TestStringArray : public ::testing::Test {
  public:
   void SetUp() {
@@ -210,14 +235,14 @@ class TestStringBuilder : public TestBuilder {
 
 TEST_F(TestStringBuilder, TestScalarAppend) {
   std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
 
   int N = static_cast<int>(strings.size());
   int reps = 1000;
 
   for (int j = 0; j < reps; ++j) {
     for (int i = 0; i < N; ++i) {
-      if (is_null[i]) {
+      if (!is_valid[i]) {
         ASSERT_OK(builder_->AppendNull());
       } else {
         ASSERT_OK(builder_->Append(strings[i]));
@@ -230,21 +255,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
   ASSERT_EQ(reps, result_->null_count());
   ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  int32_t length;
-  int32_t pos = 0;
-  for (int i = 0; i < N * reps; ++i) {
-    if (is_null[i % N]) {
-      ASSERT_TRUE(result_->IsNull(i));
-    } else {
-      ASSERT_FALSE(result_->IsNull(i));
-      result_->GetValue(i, &length);
-      ASSERT_EQ(pos, result_->value_offset(i));
-      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
-      ASSERT_EQ(strings[i % N], result_->GetString(i));
-
-      pos += length;
-    }
-  }
+  CheckStringArray(*result_, strings, is_valid, reps);
 }
 
 TEST_F(TestStringBuilder, TestAppendVector) {
@@ -263,21 +274,7 @@ TEST_F(TestStringBuilder, TestAppendVector) {
   ASSERT_EQ(reps, result_->null_count());
   ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  int32_t length;
-  int32_t pos = 0;
-  for (int i = 0; i < N * reps; ++i) {
-    if (valid_bytes[i % N]) {
-      ASSERT_FALSE(result_->IsNull(i));
-      result_->GetValue(i, &length);
-      ASSERT_EQ(pos, result_->value_offset(i));
-      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
-      ASSERT_EQ(strings[i % N], result_->GetString(i));
-
-      pos += length;
-    } else {
-      ASSERT_TRUE(result_->IsNull(i));
-    }
-  }
+  CheckStringArray(*result_, strings, valid_bytes, reps);
 }
 
 TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
@@ -296,22 +293,7 @@ TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
   ASSERT_EQ(reps * 3, result_->null_count());
   ASSERT_EQ(reps * 3, result_->value_data()->size());
 
-  int32_t length;
-  int32_t pos = 0;
-  for (int i = 0; i < N * reps; ++i) {
-    auto string = strings[i % N];
-    if (string && valid_bytes[i % N]) {
-      ASSERT_FALSE(result_->IsNull(i));
-      result_->GetValue(i, &length);
-      ASSERT_EQ(pos, result_->value_offset(i));
-      ASSERT_EQ(static_cast<int32_t>(strlen(string)), length);
-      ASSERT_EQ(strings[i % N], result_->GetString(i));
-
-      pos += length;
-    } else {
-      ASSERT_TRUE(result_->IsNull(i));
-    }
-  }
+  CheckStringArray(*result_, {"", "aaa", "", "", ""}, {0, 1, 0, 0, 1}, reps);
 }
 
 TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
@@ -329,21 +311,7 @@ TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
   ASSERT_EQ(reps, result_->null_count());
   ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  int32_t length;
-  int32_t pos = 0;
-  for (int i = 0; i < N * reps; ++i) {
-    if (strings[i % N]) {
-      ASSERT_FALSE(result_->IsNull(i));
-      result_->GetValue(i, &length);
-      ASSERT_EQ(pos, result_->value_offset(i));
-      ASSERT_EQ(static_cast<int32_t>(strlen(strings[i % N])), length);
-      ASSERT_EQ(strings[i % N], result_->GetString(i));
-
-      pos += length;
-    } else {
-      ASSERT_TRUE(result_->IsNull(i));
-    }
-  }
+  CheckStringArray(*result_, {"", "bb", "a", "", "ccc"}, {1, 1, 1, 0, 1}, reps);
 }
 
 TEST_F(TestStringBuilder, TestZeroLength) {
@@ -499,14 +467,14 @@ class TestBinaryBuilder : public TestBuilder {
 
 TEST_F(TestBinaryBuilder, TestScalarAppend) {
   std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
 
   int N = static_cast<int>(strings.size());
   int reps = 10;
 
   for (int j = 0; j < reps; ++j) {
     for (int i = 0; i < N; ++i) {
-      if (is_null[i]) {
+      if (!is_valid[i]) {
         ASSERT_OK(builder_->AppendNull());
       } else {
         ASSERT_OK(builder_->Append(strings[i]));
@@ -519,22 +487,26 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
   ASSERT_EQ(reps, result_->null_count());
   ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  int32_t length;
-  for (int i = 0; i < N * reps; ++i) {
-    if (is_null[i % N]) {
-      ASSERT_TRUE(result_->IsNull(i));
-    } else {
-      ASSERT_FALSE(result_->IsNull(i));
-      const uint8_t* vals = result_->GetValue(i, &length);
-      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
-      ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
-    }
-  }
+  CheckStringArray(*result_, strings, is_valid, reps);
+}
+
+TEST_F(TestBinaryBuilder, TestAppendNulls) {
+  ASSERT_OK(builder_->Append("bow"));
+  ASSERT_OK(builder_->AppendNulls(3));
+  ASSERT_OK(builder_->Append("arrow"));
+  Done();
+  ASSERT_OK(ValidateArray(*result_));
+
+  ASSERT_EQ(5, result_->length());
+  ASSERT_EQ(3, result_->null_count());
+  ASSERT_EQ(8, result_->value_data()->size());
+
+  CheckStringArray(*result_, {"bow", "", "", "", "arrow"}, {1, 0, 0, 0, 1});
 }
 
 TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
   std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
 
   int N = static_cast<int>(strings.size());
   int reps = 13;
@@ -546,7 +518,7 @@ TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
 
   for (int j = 0; j < reps; ++j) {
     for (int i = 0; i < N; ++i) {
-      if (is_null[i]) {
+      if (!is_valid[i]) {
         builder_->UnsafeAppendNull();
       } else {
         builder_->UnsafeAppend(strings[i]);
@@ -560,17 +532,7 @@ TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
   ASSERT_EQ(reps, result_->null_count());
   ASSERT_EQ(reps * total_length, result_->value_data()->size());
 
-  int32_t length;
-  for (int i = 0; i < N * reps; ++i) {
-    if (is_null[i % N]) {
-      ASSERT_TRUE(result_->IsNull(i));
-    } else {
-      ASSERT_FALSE(result_->IsNull(i));
-      const uint8_t* vals = result_->GetValue(i, &length);
-      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
-      ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
-    }
-  }
+  CheckStringArray(*result_, strings, is_valid, reps);
 }
 
 TEST_F(TestBinaryBuilder, TestCapacityReserve) {
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
index 16dc16a..0fb86ce 100644
--- a/cpp/src/arrow/array-list-test.cc
+++ b/cpp/src/arrow/array-list-test.cc
@@ -203,6 +203,29 @@ TEST_F(TestListArray, TestAppendNull) {
   ASSERT_NE(nullptr, values->data()->buffers[1]);
 }
 
+TEST_F(TestListArray, TestAppendNulls) {
+  ASSERT_OK(builder_->AppendNulls(3));
+
+  Done();
+
+  ASSERT_OK(ValidateArray(*result_));
+  ASSERT_EQ(result_->length(), 3);
+  ASSERT_EQ(result_->null_count(), 3);
+  ASSERT_TRUE(result_->IsNull(0));
+  ASSERT_TRUE(result_->IsNull(1));
+  ASSERT_TRUE(result_->IsNull(2));
+
+  ASSERT_EQ(0, result_->raw_value_offsets()[0]);
+  ASSERT_EQ(0, result_->value_offset(1));
+  ASSERT_EQ(0, result_->value_offset(2));
+  ASSERT_EQ(0, result_->value_offset(3));
+
+  auto values = result_->values();
+  ASSERT_EQ(0, values->length());
+  // Values buffer should be non-null
+  ASSERT_NE(nullptr, values->data()->buffers[1]);
+}
+
 void ValidateBasicListArray(const ListArray* result, const std::vector<int32_t>& values,
                             const std::vector<uint8_t>& is_valid) {
   ASSERT_OK(ValidateArray(*result));
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 5956dd2..2346908 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -804,7 +804,7 @@ struct ValidateVisitor {
     if (array.data()->buffers.size() != 3) {
       return Status::Invalid("number of buffers was != 3");
     }
-    return Status::OK();
+    return ValidateOffsets(array);
   }
 
   Status Visit(const ListArray& array) {
@@ -836,25 +836,7 @@ struct ValidateVisitor {
       return Status::Invalid("Child array invalid: ", child_valid.ToString());
     }
 
-    int32_t prev_offset = array.value_offset(0);
-    if (prev_offset != 0) {
-      return Status::Invalid("The first offset wasn't zero");
-    }
-    for (int64_t i = 1; i <= array.length(); ++i) {
-      int32_t current_offset = array.value_offset(i);
-      if (array.IsNull(i - 1) && current_offset != prev_offset) {
-        return Status::Invalid("Offset invariant failure at: ", i,
-                               " inconsistent value_offsets for null slot",
-                               current_offset, "!=", prev_offset);
-      }
-      if (current_offset < prev_offset) {
-        return Status::Invalid("Offset invariant failure: ", i,
-                               " inconsistent offset for non-null slot: ", current_offset,
-                               "<", prev_offset);
-      }
-      prev_offset = current_offset;
-    }
-    return Status::OK();
+    return ValidateOffsets(array);
   }
 
   Status Visit(const StructArray& array) {
@@ -912,6 +894,30 @@ struct ValidateVisitor {
   }
 
   Status Visit(const ExtensionArray& array) { return ValidateArray(*array.storage()); }
+
+ protected:
+  template <typename ArrayType>
+  Status ValidateOffsets(ArrayType& array) {
+    int32_t prev_offset = array.value_offset(0);
+    if (array.offset() == 0 && prev_offset != 0) {
+      return Status::Invalid("The first offset wasn't zero");
+    }
+    for (int64_t i = 1; i <= array.length(); ++i) {
+      int32_t current_offset = array.value_offset(i);
+      if (array.IsNull(i - 1) && current_offset != prev_offset) {
+        return Status::Invalid("Offset invariant failure at: ", i,
+                               " inconsistent value_offsets for null slot",
+                               current_offset, "!=", prev_offset);
+      }
+      if (current_offset < prev_offset) {
+        return Status::Invalid("Offset invariant failure: ", i,
+                               " inconsistent offset for non-null slot: ", current_offset,
+                               "<", prev_offset);
+      }
+      prev_offset = current_offset;
+    }
+    return Status::OK();
+  }
 };
 
 }  // namespace internal
diff --git a/cpp/src/arrow/array/builder_adaptive.h b/cpp/src/arrow/array/builder_adaptive.h
index afbfca2..7f24109 100644
--- a/cpp/src/arrow/array/builder_adaptive.h
+++ b/cpp/src/arrow/array/builder_adaptive.h
@@ -31,7 +31,7 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
 
   /// \brief Append multiple nulls
   /// \param[in] length the number of nulls to append
-  Status AppendNulls(int64_t length) {
+  Status AppendNulls(int64_t length) final {
     ARROW_RETURN_NOT_OK(CommitPendingData());
     ARROW_RETURN_NOT_OK(Reserve(length));
     memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
@@ -39,7 +39,7 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendNull() {
+  Status AppendNull() final {
     pending_data_[pending_pos_] = 0;
     pending_valid_[pending_pos_] = 0;
     pending_has_nulls_ = true;
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 75baedd..fb13a88 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -63,6 +63,12 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length)
   return Status::OK();
 }
 
+Status ArrayBuilder::AppendToBitmap(int64_t num_bits, bool value) {
+  RETURN_NOT_OK(Reserve(num_bits));
+  UnsafeAppendToBitmap(num_bits, value);
+  return Status::OK();
+}
+
 Status ArrayBuilder::Resize(int64_t capacity) {
   RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
   capacity_ = capacity;
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 21503ee..4f04866 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -18,25 +18,18 @@
 #pragma once
 
 #include <algorithm>  // IWYU pragma: keep
-#include <array>
-#include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <iterator>
 #include <limits>
 #include <memory>
-#include <string>
 #include <type_traits>
 #include <vector>
 
 #include "arrow/buffer-builder.h"
-#include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
 #include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
 #include "arrow/util/type_traits.h"
 #include "arrow/util/visibility.h"
 
@@ -44,6 +37,7 @@ namespace arrow {
 
 class Array;
 struct ArrayData;
+class MemoryPool;
 
 constexpr int64_t kMinBuilderCapacity = 1 << 5;
 constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
@@ -103,6 +97,9 @@ class ARROW_EXPORT ArrayBuilder {
   /// Reset the builder.
   virtual void Reset();
 
+  virtual Status AppendNull() = 0;
+  virtual Status AppendNulls(int64_t length) = 0;
+
   /// For cases where raw data was memcpy'd into the internal buffers, allows us
   /// to advance the length of the builder. It is your responsibility to use
   /// this function responsibly.
@@ -133,6 +130,9 @@ class ARROW_EXPORT ArrayBuilder {
   /// assume all of length bits are valid.
   Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
 
+  /// Uniform append.  Append N times the same validity bit.
+  Status AppendToBitmap(int64_t num_bits, bool value);
+
   /// Set the next length bits to not null (i.e. valid).
   Status SetNotNull(int64_t length);
 
@@ -158,11 +158,21 @@ class ARROW_EXPORT ArrayBuilder {
     null_count_ = null_bitmap_builder_.false_count();
   }
 
+  // Append the same validity value a given number of times.
+  void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
+    if (value) {
+      UnsafeSetNotNull(num_bits);
+    } else {
+      UnsafeSetNull(num_bits);
+    }
+  }
+
   void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
 
-  // Set the next length bits to not null (i.e. valid).
+  // Set the next validity bits to not null (i.e. valid).
   void UnsafeSetNotNull(int64_t length);
 
+  // Set the next validity bits to null (i.e. invalid).
   void UnsafeSetNull(int64_t length);
 
   static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 26c6cb4..4a8ea40 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -236,6 +236,13 @@ Status FixedSizeBinaryBuilder::AppendNull() {
   return Status::OK();
 }
 
+Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
+  RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(length, false);
+  byte_builder_.UnsafeAdvance(length * byte_width_);
+  return Status::OK();
+}
+
 void FixedSizeBinaryBuilder::Reset() {
   ArrayBuilder::Reset();
   byte_builder_.Reset();
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 954f58e..c849572 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -55,7 +55,20 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendNull() {
+  Status AppendNulls(int64_t length) final {
+    const int64_t num_bytes = value_data_builder_.length();
+    if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+      return AppendOverflow(num_bytes);
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    }
+    UnsafeAppendToBitmap(length, false);
+    return Status::OK();
+  }
+
+  Status AppendNull() final {
     ARROW_RETURN_NOT_OK(AppendNextOffset());
     ARROW_RETURN_NOT_OK(Reserve(1));
     UnsafeAppendToBitmap(false);
@@ -215,7 +228,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
 
   Status AppendValues(const uint8_t* data, int64_t length,
                       const uint8_t* valid_bytes = NULLPTR);
-  Status AppendNull();
+
+  Status AppendNull() final;
+
+  Status AppendNulls(int64_t length) final;
 
   void UnsafeAppend(const uint8_t* value) {
     UnsafeAppendToBitmap(true);
diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc
index b5fa0d6..2e43234 100644
--- a/cpp/src/arrow/array/builder_dict.cc
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -258,6 +258,14 @@ Status DictionaryBuilder<T>::AppendNull() {
   return values_builder_.AppendNull();
 }
 
+template <typename T>
+Status DictionaryBuilder<T>::AppendNulls(int64_t length) {
+  length_ += length;
+  null_count_ += length;
+
+  return values_builder_.AppendNulls(length);
+}
+
 Status DictionaryBuilder<NullType>::AppendNull() {
   length_ += 1;
   null_count_ += 1;
@@ -265,6 +273,13 @@ Status DictionaryBuilder<NullType>::AppendNull() {
   return values_builder_.AppendNull();
 }
 
+Status DictionaryBuilder<NullType>::AppendNulls(int64_t length) {
+  length_ += length;
+  null_count_ += length;
+
+  return values_builder_.AppendNulls(length);
+}
+
 template <typename T>
 Status DictionaryBuilder<T>::AppendArray(const Array& array) {
   using ArrayType = typename TypeTraits<T>::ArrayType;
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
index 204d609..4d31cdb 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -95,7 +95,9 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder {
   }
 
   /// \brief Append a scalar null value
-  Status AppendNull();
+  Status AppendNull() final;
+
+  Status AppendNulls(int64_t length) final;
 
   /// \brief Append a whole dense array to the builder
   Status AppendArray(const Array& array);
@@ -127,7 +129,9 @@ class ARROW_EXPORT DictionaryBuilder<NullType> : public ArrayBuilder {
   DictionaryBuilder(const std::shared_ptr<Array>& dictionary, MemoryPool* pool);
 
   /// \brief Append a scalar null value
-  Status AppendNull();
+  Status AppendNull() final;
+
+  Status AppendNulls(int64_t length) final;
 
   /// \brief Append a whole dense array to the builder
   Status AppendArray(const Array& array);
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
index 4663771..b65394f 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -57,12 +57,18 @@ Status ListBuilder::AppendValues(const int32_t* offsets, int64_t length,
   return Status::OK();
 }
 
-Status ListBuilder::AppendNextOffset() {
+Status ListBuilder::CheckNextOffset() const {
   const int64_t num_values = value_builder_->length();
   ARROW_RETURN_IF(
       num_values > kListMaximumElements,
       Status::CapacityError("ListArray cannot contain more then 2^31 - 1 child elements,",
                             " have ", num_values));
+  return Status::OK();
+}
+
+Status ListBuilder::AppendNextOffset() {
+  RETURN_NOT_OK(CheckNextOffset());
+  const int64_t num_values = value_builder_->length();
   return offsets_builder_.Append(static_cast<int32_t>(num_values));
 }
 
@@ -72,6 +78,17 @@ Status ListBuilder::Append(bool is_valid) {
   return AppendNextOffset();
 }
 
+Status ListBuilder::AppendNulls(int64_t length) {
+  RETURN_NOT_OK(Reserve(length));
+  RETURN_NOT_OK(CheckNextOffset());
+  UnsafeAppendToBitmap(length, false);
+  const int64_t num_values = value_builder_->length();
+  for (int64_t i = 0; i < length; ++i) {
+    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_values));
+  }
+  return Status::OK();
+}
+
 Status ListBuilder::Resize(int64_t capacity) {
   DCHECK_LE(capacity, kListMaximumElements);
   RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
@@ -141,6 +158,12 @@ void StructBuilder::Reset() {
   }
 }
 
+Status StructBuilder::AppendNulls(int64_t length) {
+  ARROW_RETURN_NOT_OK(Reserve(length));
+  UnsafeAppendToBitmap(length, false);
+  return Status::OK();
+}
+
 Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   std::shared_ptr<Buffer> null_bitmap;
   RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 19b0ad8..79e8c1b 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -65,7 +65,9 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
   /// value builder
   Status Append(bool is_valid = true);
 
-  Status AppendNull() { return Append(false); }
+  Status AppendNull() final { return Append(false); }
+
+  Status AppendNulls(int64_t length) final;
 
   ArrayBuilder* value_builder() const;
 
@@ -74,7 +76,9 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
   std::shared_ptr<ArrayBuilder> value_builder_;
   std::shared_ptr<Array> values_;
 
+  Status CheckNextOffset() const;
   Status AppendNextOffset();
+  Status AppendNextOffset(int64_t num_repeats);
 };
 
 // ----------------------------------------------------------------------
@@ -110,7 +114,9 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status AppendNull() { return Append(false); }
+  Status AppendNull() final { return Append(false); }
+
+  Status AppendNulls(int64_t length) final;
 
   void Reset() override;
 
diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
index 21f87b2..d8b2a5f 100644
--- a/cpp/src/arrow/array/builder_primitive.h
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -33,7 +33,7 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
       : ArrayBuilder(null(), pool) {}
 
   /// \brief Append the specified number of null elements
-  Status AppendNulls(int64_t length) {
+  Status AppendNulls(int64_t length) final {
     if (length < 0) return Status::Invalid("length must be positive");
     null_count_ += length;
     length_ += length;
@@ -41,7 +41,7 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
   }
 
   /// \brief Append a single null element
-  Status AppendNull() { return AppendNulls(1); }
+  Status AppendNull() final { return AppendNulls(1); }
 
   Status Append(std::nullptr_t) { return AppendNull(); }
 
@@ -71,7 +71,7 @@ class NumericBuilder : public ArrayBuilder {
   /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
   /// The memory at the corresponding data slot is set to 0 to prevent
   /// uninitialized memory access
-  Status AppendNulls(int64_t length) {
+  Status AppendNulls(int64_t length) final {
     ARROW_RETURN_NOT_OK(Reserve(length));
     data_builder_.UnsafeAppend(length, static_cast<value_type>(0));
     UnsafeSetNull(length);
@@ -79,7 +79,7 @@ class NumericBuilder : public ArrayBuilder {
   }
 
   /// \brief Append a single null element
-  Status AppendNull() {
+  Status AppendNull() final {
     ARROW_RETURN_NOT_OK(Reserve(1));
     data_builder_.UnsafeAppend(static_cast<value_type>(0));
     UnsafeAppendToBitmap(false);
@@ -263,14 +263,14 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
   explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
 
   /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
-  Status AppendNulls(int64_t length) {
+  Status AppendNulls(int64_t length) final {
     ARROW_RETURN_NOT_OK(Reserve(length));
     data_builder_.UnsafeAppend(length, false);
     UnsafeSetNull(length);
     return Status::OK();
   }
 
-  Status AppendNull() {
+  Status AppendNull() final {
     ARROW_RETURN_NOT_OK(Reserve(1));
     UnsafeAppendNull();
     return Status::OK();
diff --git a/cpp/src/arrow/array/builder_union.h b/cpp/src/arrow/array/builder_union.h
index 2ababc7..5764d5b 100644
--- a/cpp/src/arrow/array/builder_union.h
+++ b/cpp/src/arrow/array/builder_union.h
@@ -47,12 +47,23 @@ class ARROW_EXPORT DenseUnionBuilder : public ArrayBuilder {
   explicit DenseUnionBuilder(MemoryPool* pool,
                              const std::shared_ptr<DataType>& type = NULLPTR);
 
-  Status AppendNull() {
+  Status AppendNull() final {
     ARROW_RETURN_NOT_OK(types_builder_.Append(0));
     ARROW_RETURN_NOT_OK(offsets_builder_.Append(0));
     return AppendToBitmap(false);
   }
 
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(types_builder_.Reserve(length));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(length));
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    for (int64_t i = 0; i < length; ++i) {
+      types_builder_.UnsafeAppend(0);
+      offsets_builder_.UnsafeAppend(0);
+    }
+    return AppendToBitmap(length, false);
+  }
+
   /// \brief Append an element to the UnionArray. This must be followed
   ///        by an append to the appropriate child builder.
   /// \param[in] type index of the child the value will be appended
diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
index 4bbdfaa..aa5815b 100644
--- a/cpp/src/arrow/compute/kernels/cast-test.cc
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -37,6 +37,7 @@
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 
 #include "arrow/compute/context.h"
@@ -49,6 +50,8 @@
 namespace arrow {
 namespace compute {
 
+using internal::checked_cast;
+
 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
     uint8(), int8(),   uint16(), int16(),   uint32(),
     int32(), uint64(), int64(),  float32(), float64()};
@@ -64,6 +67,7 @@ class TestCast : public ComputeFixture, public TestBase {
                  const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
     std::shared_ptr<Array> result;
     ASSERT_OK(Cast(&ctx_, input, out_type, options, &result));
+    ASSERT_OK(ValidateArray(*result));
     ASSERT_ARRAYS_EQUAL(expected, *result);
   }
 
@@ -83,6 +87,7 @@ class TestCast : public ComputeFixture, public TestBase {
   void CheckZeroCopy(const Array& input, const std::shared_ptr<DataType>& out_type) {
     std::shared_ptr<Array> result;
     ASSERT_OK(Cast(&ctx_, input, out_type, {}, &result));
+    ASSERT_OK(ValidateArray(*result));
     ASSERT_EQ(input.data()->buffers.size(), result->data()->buffers.size());
     for (size_t i = 0; i < input.data()->buffers.size(); ++i) {
       AssertBufferSame(input, *result, static_cast<int>(i));
@@ -806,22 +811,6 @@ TEST_F(TestCast, DateTimeZeroCopy) {
   CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
 }
 
-TEST_F(TestCast, FromNull) {
-  // Null casts to everything
-  const int length = 10;
-
-  NullArray arr(length);
-
-  std::shared_ptr<Array> result;
-  ASSERT_OK(Cast(&ctx_, arr, int32(), {}, &result));
-
-  ASSERT_EQ(length, result->length());
-  ASSERT_EQ(length, result->null_count());
-
-  // OK to look at bitmaps
-  ASSERT_ARRAYS_EQUAL(*result, *result);
-}
-
 TEST_F(TestCast, PreallocatedMemory) {
   CastOptions options;
   options.allow_int_overflow = false;
@@ -1094,86 +1083,6 @@ TEST_F(TestCast, BinaryToString) {
                                                               utf8(), strings, options);
 }
 
-template <typename TestType>
-class TestDictionaryCast : public TestCast {};
-
-typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
-                         UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
-                         Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
-    TestTypes;
-
-TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
-
-TYPED_TEST(TestDictionaryCast, Basic) {
-  CastOptions options;
-  std::shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  Datum out;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &out));
-
-  this->CheckPass(*MakeArray(out.array()), *plain_array, plain_array->type(), options);
-}
-
-TEST_F(TestCast, DictToNumericNoNulls) {
-  // ARROW-3208
-  CastOptions options;
-
-  // Convoluted way to create an array with nullptr bitmap buffer
-  auto array_ = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 4, 5, 6}, {});
-  auto data = array_->data();
-  data->buffers[0] = nullptr;
-  auto array = MakeArray(data);
-
-  Datum encoded;
-  ASSERT_OK(DictionaryEncode(&this->ctx_, array->data(), &encoded));
-
-  this->CheckPass(*MakeArray(encoded.array()), *array, array->type(), options);
-}
-
-TEST_F(TestCast, DictToNonDictNoNulls) {
-  std::vector<std::string> dict_values = {"foo", "bar", "baz"};
-  auto ex_dict = _MakeArray<StringType, std::string>(utf8(), dict_values, {});
-  auto dict_type = dictionary(int32(), ex_dict);
-
-  // Explicitly construct with nullptr for the null_bitmap_data
-  std::vector<int32_t> i1 = {1, 0, 1};
-  std::vector<int32_t> i2 = {2, 1, 0, 1};
-  auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
-  auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
-
-  ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
-                             std::make_shared<DictionaryArray>(dict_type, c2)};
-  auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
-  Datum cast_input(dict_carr);
-  Datum cast_output;
-  // Ensure that casting works even when the null_bitmap_data array is a nullptr
-  ASSERT_OK(Cast(&this->ctx_, cast_input,
-                 static_cast<DictionaryType&>(*dict_type).dictionary()->type(),
-                 CastOptions(), &cast_output));
-  ASSERT_EQ(Datum::CHUNKED_ARRAY, cast_output.kind());
-
-  auto e1 = _MakeArray<StringType, std::string>(utf8(), {"bar", "foo", "bar"}, {});
-  auto e2 = _MakeArray<StringType, std::string>(utf8(), {"baz", "bar", "foo", "bar"}, {});
-
-  auto chunks = cast_output.chunked_array()->chunks();
-  ASSERT_EQ(chunks.size(), 2);
-  ASSERT_ARRAYS_EQUAL(*e1, *chunks[0]);
-  ASSERT_ARRAYS_EQUAL(*e2, *chunks[1]);
-}
-
-/*TYPED_TEST(TestDictionaryCast, Reverse) {
-  CastOptions options;
-  std::shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  std::shared_ptr<Array> dict_array;
-  ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
-
-  this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
-}*/
-
 TEST_F(TestCast, ListToList) {
   CastOptions options;
   std::shared_ptr<Array> offsets;
@@ -1264,5 +1173,100 @@ TEST_F(TestCast, EmptyCasts) {
   }
 }
 
+// ----------------------------------------------------------------------
+// Test casting from NullType
+
+template <typename TestType>
+class TestNullCast : public TestCast {};
+
+typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
+                         UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
+                         Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
+    TestTypes;
+
+TYPED_TEST_CASE(TestNullCast, TestTypes);
+
+TYPED_TEST(TestNullCast, FromNull) {
+  // Null casts to everything
+  const int length = 10;
+
+  // Hack to get a DataType including for parametric types
+  std::shared_ptr<DataType> out_type =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(0, 0)->type();
+
+  NullArray arr(length);
+
+  std::shared_ptr<Array> result;
+  ASSERT_OK(Cast(&this->ctx_, arr, out_type, {}, &result));
+  ASSERT_OK(ValidateArray(*result));
+
+  ASSERT_TRUE(result->type()->Equals(*out_type));
+  ASSERT_EQ(length, result->length());
+  ASSERT_EQ(length, result->null_count());
+}
+
+// ----------------------------------------------------------------------
+// Test casting to DictionaryType
+
+template <typename TestType>
+class TestDictionaryCast : public TestCast {};
+
+typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
+                         UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
+                         Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
+    TestTypes;
+
+TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
+
+TYPED_TEST(TestDictionaryCast, Basic) {
+  CastOptions options;
+  std::shared_ptr<Array> plain_array =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
+
+  Datum encoded;
+  ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &encoded));
+  ASSERT_EQ(encoded.array()->type->id(), Type::DICTIONARY);
+
+  this->CheckPass(*MakeArray(encoded.array()), *plain_array, plain_array->type(),
+                  options);
+}
+
+TYPED_TEST(TestDictionaryCast, NoNulls) {
+  // Test with a nullptr bitmap buffer (ARROW-3208)
+  if (TypeParam::type_id == Type::NA) {
+    // Skip, but gtest doesn't support skipping :-/
+    return;
+  }
+
+  CastOptions options;
+  std::shared_ptr<Array> plain_array =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 0);
+  ASSERT_EQ(plain_array->null_count(), 0);
+
+  // Dict-encode the plain array
+  Datum encoded;
+  ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &encoded));
+
+  // Make a new dict array with nullptr bitmap buffer
+  auto data = encoded.array()->Copy();
+  data->buffers[0] = nullptr;
+  data->null_count = 0;
+  std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
+  ASSERT_OK(ValidateArray(*dict_array));
+
+  this->CheckPass(*dict_array, *plain_array, plain_array->type(), options);
+}
+
+/*TYPED_TEST(TestDictionaryCast, Reverse) {
+  CastOptions options;
+  std::shared_ptr<Array> plain_array =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
+
+  std::shared_ptr<Array> dict_array;
+  ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
+
+  this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
+}*/
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc
index 2d3e1a8..749e200 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -38,6 +38,7 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/parsing.h"  // IWYU pragma: keep
 #include "arrow/util/utf8.h"
+#include "arrow/visitor_inline.h"
 
 #include "arrow/compute/context.h"
 #include "arrow/compute/kernel.h"
@@ -78,19 +79,16 @@ namespace compute {
 
 constexpr int64_t kMillisecondsInDay = 86400000;
 
+Status CastNotImplemented(const DataType& in_type, const DataType& out_type) {
+  return Status::NotImplemented("No cast implemented from ", in_type.ToString(), " to ",
+                                out_type.ToString());
+}
+
 template <typename OutType, typename InType, typename Enable = void>
 struct CastFunctor {};
 
 // ----------------------------------------------------------------------
-// Null to other things
-
-template <typename T>
-struct CastFunctor<
-    T, NullType,
-    typename std::enable_if<std::is_base_of<FixedWidthType, T>::value>::type> {
-  void operator()(FunctionContext* ctx, const CastOptions& options,
-                  const ArrayData& input, ArrayData* output) {}
-};
+// Dictionary to null
 
 template <>
 struct CastFunctor<NullType, DictionaryType> {
@@ -689,6 +687,63 @@ class ListCastKernel : public CastKernelBase {
 };
 
 // ----------------------------------------------------------------------
+// Null to other things
+
+class FromNullCastKernel : public CastKernelBase {
+ public:
+  explicit FromNullCastKernel(std::shared_ptr<DataType> out_type)
+      : CastKernelBase(std::move(out_type)) {}
+
+  Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
+    DCHECK_EQ(Datum::ARRAY, input.kind());
+
+    const ArrayData& in_data = *input.array();
+    DCHECK_EQ(Type::NA, in_data.type->id());
+    auto length = in_data.length;
+
+    // A ArrayData may be preallocated for the output (see InvokeUnaryArrayKernel),
+    // however, it doesn't have any actual data, so throw it away and start anew.
+    std::unique_ptr<ArrayBuilder> builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out_type_, &builder));
+    NullBuilderVisitor visitor = {length, builder.get()};
+    RETURN_NOT_OK(VisitTypeInline(*out_type_, &visitor));
+
+    std::shared_ptr<Array> out_array;
+    RETURN_NOT_OK(visitor.builder_->Finish(&out_array));
+    out->value = out_array->data();
+    return Status::OK();
+  }
+
+  struct NullBuilderVisitor {
+    // Generic implementation
+    Status Visit(const DataType& type) { return builder_->AppendNulls(length_); }
+
+    Status Visit(const StructType& type) {
+      RETURN_NOT_OK(builder_->AppendNulls(length_));
+      auto& struct_builder = checked_cast<StructBuilder&>(*builder_);
+      // Append nulls to all child builders too
+      for (int i = 0; i < struct_builder.num_fields(); ++i) {
+        NullBuilderVisitor visitor = {length_, struct_builder.field_builder(i)};
+        RETURN_NOT_OK(VisitTypeInline(*type.child(i)->type(), &visitor));
+      }
+      return Status::OK();
+    }
+
+    Status Visit(const DictionaryType& type) {
+      // XXX (ARROW-5215): Cannot implement this easily, as DictionaryBuilder
+      // disregards the index type given in the dictionary type, and instead
+      // chooses the smallest possible index type.
+      return CastNotImplemented(*null(), type);
+    }
+
+    Status Visit(const UnionType& type) { return CastNotImplemented(*null(), type); }
+
+    int64_t length_;
+    ArrayBuilder* builder_;
+  };
+};
+
+// ----------------------------------------------------------------------
 // Dictionary to other things
 
 template <typename IndexType>
@@ -1125,7 +1180,6 @@ class CastKernel : public CastKernelBase {
 
 #include "generated/cast-codegen-internal.h"  // NOLINT
 
-GET_CAST_FUNCTION(NULL_CASES, NullType)
 GET_CAST_FUNCTION(BOOLEAN_CASES, BooleanType)
 GET_CAST_FUNCTION(UINT8_CASES, UInt8Type)
 GET_CAST_FUNCTION(INT8_CASES, Int8Type)
@@ -1194,17 +1248,21 @@ inline bool IsZeroCopyCast(Type::type in_type, Type::type out_type) {
 Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_type,
                        const CastOptions& options, std::unique_ptr<UnaryKernel>* kernel) {
   if (in_type.Equals(out_type)) {
-    *kernel = std::unique_ptr<UnaryKernel>(new IdentityCast(std::move(out_type)));
+    kernel->reset(new IdentityCast(std::move(out_type)));
     return Status::OK();
   }
 
   if (IsZeroCopyCast(in_type.id(), out_type->id())) {
-    *kernel = std::unique_ptr<UnaryKernel>(new ZeroCopyCast(std::move(out_type)));
+    kernel->reset(new ZeroCopyCast(std::move(out_type)));
+    return Status::OK();
+  }
+
+  if (in_type.id() == Type::NA) {
+    kernel->reset(new FromNullCastKernel(std::move(out_type)));
     return Status::OK();
   }
 
   switch (in_type.id()) {
-    CAST_FUNCTION_CASE(NullType);
     CAST_FUNCTION_CASE(BooleanType);
     CAST_FUNCTION_CASE(UInt8Type);
     CAST_FUNCTION_CASE(Int8Type);
@@ -1231,8 +1289,7 @@ Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_ty
       break;
   }
   if (*kernel == nullptr) {
-    return Status::NotImplemented("No cast implemented from ", in_type.ToString(), " to ",
-                                  out_type->ToString());
+    return CastNotImplemented(in_type, *out_type);
   }
   return Status::OK();
 }
diff --git a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
index cf2c036..77334af 100644
--- a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
+++ b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
@@ -17,24 +17,6 @@
 
 // THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT
 // Generated by codegen.py script
-#define NULL_CASES(TEMPLATE) \
-  TEMPLATE(NullType, BooleanType) \
-  TEMPLATE(NullType, UInt8Type) \
-  TEMPLATE(NullType, Int8Type) \
-  TEMPLATE(NullType, UInt16Type) \
-  TEMPLATE(NullType, Int16Type) \
-  TEMPLATE(NullType, UInt32Type) \
-  TEMPLATE(NullType, Int32Type) \
-  TEMPLATE(NullType, UInt64Type) \
-  TEMPLATE(NullType, Int64Type) \
-  TEMPLATE(NullType, FloatType) \
-  TEMPLATE(NullType, DoubleType) \
-  TEMPLATE(NullType, Date32Type) \
-  TEMPLATE(NullType, Date64Type) \
-  TEMPLATE(NullType, Time32Type) \
-  TEMPLATE(NullType, Time64Type) \
-  TEMPLATE(NullType, TimestampType)
-
 #define BOOLEAN_CASES(TEMPLATE) \
   TEMPLATE(BooleanType, UInt8Type) \
   TEMPLATE(BooleanType, Int8Type) \
diff --git a/cpp/src/arrow/compute/kernels/generated/codegen.py b/cpp/src/arrow/compute/kernels/generated/codegen.py
index 397ba66..04fc386 100644
--- a/cpp/src/arrow/compute/kernels/generated/codegen.py
+++ b/cpp/src/arrow/compute/kernels/generated/codegen.py
@@ -21,6 +21,7 @@
 
 
 import io
+import os
 
 
 INTEGER_TYPES = ['UInt8', 'Int8', 'UInt16', 'Int16',
@@ -64,7 +65,6 @@ class CastCodeGenerator(object):
 
 
 CAST_GENERATORS = [
-    CastCodeGenerator('Null', NUMERIC_TYPES + DATE_TIME_TYPES),
     CastCodeGenerator('Boolean', NUMERIC_TYPES),
     CastCodeGenerator('UInt8', NUMERIC_TYPES),
     CastCodeGenerator('Int8', NUMERIC_TYPES),
@@ -126,8 +126,10 @@ def write_file_with_preamble(path, code):
 
 
 def write_files():
+    here = os.path.abspath(os.path.dirname(__file__))
     cast_code = generate_cast_code()
-    write_file_with_preamble('cast-codegen-internal.h', cast_code)
+    write_file_with_preamble(os.path.join(here, 'cast-codegen-internal.h'),
+                             cast_code)
 
 
 if __name__ == '__main__':
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 476740d..b70dbca 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -531,11 +531,20 @@ def test_string_from_buffers():
 
 def _check_cast_case(case, safe=True):
     in_data, in_type, out_data, out_type = case
-    expected = pa.array(out_data, type=out_type)
+    if isinstance(out_data, pa.Array):
+        assert out_data.type == out_type
+        expected = out_data
+    else:
+        expected = pa.array(out_data, type=out_type)
 
     # check casting an already created array
-    in_arr = pa.array(in_data, type=in_type)
+    if isinstance(in_data, pa.Array):
+        assert in_data.type == in_type
+        in_arr = in_data
+    else:
+        in_arr = pa.array(in_data, type=in_type)
     casted = in_arr.cast(out_type, safe=safe)
+    casted.validate()
     assert casted.equals(expected)
 
     # constructing an array with out type which optionally involves casting
@@ -665,6 +674,41 @@ def test_cast_signed_to_unsigned():
         _check_cast_case(case)
 
 
+def test_cast_from_null():
+    in_data = [None] * 3
+    in_type = pa.null()
+    out_types = [
+        pa.null(),
+        pa.uint8(),
+        pa.float16(),
+        pa.utf8(),
+        pa.binary(),
+        pa.binary(10),
+        pa.list_(pa.int16()),
+        pa.decimal128(19, 4),
+        pa.timestamp('us'),
+        pa.timestamp('us', tz='UTC'),
+        pa.timestamp('us', tz='Europe/Paris'),
+        pa.struct([pa.field('a', pa.int32()),
+                   pa.field('b', pa.list_(pa.int8())),
+                   pa.field('c', pa.string())]),
+        ]
+    for out_type in out_types:
+        _check_cast_case((in_data, in_type, in_data, out_type))
+
+    out_types = [
+        pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])),
+        pa.union([pa.field('a', pa.binary(10)),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
+        pa.union([pa.field('a', pa.binary(10)),
+                  pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
+        ]
+    in_arr = pa.array(in_data, type=pa.null())
+    for out_type in out_types:
+        with pytest.raises(NotImplementedError):
+            in_arr.cast(out_type)
+
+
 def test_unique_simple():
     cases = [
         (pa.array([1, 2, 3, 1, 2, 3]), pa.array([1, 2, 3])),