You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/07/30 07:11:31 UTC

[arrow] branch master updated: ARROW-750: [Format] [C++] Add LargeBinary and LargeString types

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e8607f  ARROW-750: [Format] [C++] Add LargeBinary and LargeString types
6e8607f is described below

commit 6e8607f83363903737fa9392a8a7ba5031665e0b
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Tue Jul 30 09:11:17 2019 +0200

    ARROW-750: [Format] [C++] Add LargeBinary and LargeString types
    
    These are like Binary and String respectively, except with 64-bit offsets
    so as to allow extremely large individual values.
    
    Closes #4921 from pitrou/ARROW-750-large-binary-2 and squashes the following commits:
    
    3e17dcd27 <Antoine Pitrou> Address review comments
    9a2995378 <Antoine Pitrou> ARROW-750:   Add LargeBinary and LargeString types
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/array-binary-test.cc                 | 711 ++++++++-------------
 cpp/src/arrow/array.cc                             | 133 ++--
 cpp/src/arrow/array.h                              |  94 ++-
 cpp/src/arrow/array/builder_binary.cc              | 168 +----
 cpp/src/arrow/array/builder_binary.h               | 298 +++++++--
 cpp/src/arrow/array/concatenate-test.cc            |  31 +-
 cpp/src/arrow/array/concatenate.cc                 |  31 +-
 cpp/src/arrow/builder.cc                           |   2 +
 cpp/src/arrow/compare.cc                           |  52 +-
 cpp/src/arrow/compute/kernels/cast-test.cc         | 255 ++++----
 cpp/src/arrow/compute/kernels/cast.cc              |  95 +--
 .../kernels/generated/cast-codegen-internal.h      |  17 +
 cpp/src/arrow/compute/kernels/generated/codegen.py |   2 +
 cpp/src/arrow/csv/converter-test.cc                |  39 +-
 cpp/src/arrow/csv/converter.cc                     |   9 +
 cpp/src/arrow/ipc/feather.cc                       |  66 +-
 cpp/src/arrow/ipc/feather.fbs                      |   5 +-
 cpp/src/arrow/ipc/json-internal.cc                 |  59 +-
 cpp/src/arrow/ipc/json-simple-test.cc              |  15 +
 cpp/src/arrow/ipc/json-simple.cc                   |  16 +-
 cpp/src/arrow/ipc/metadata-internal.cc             |  18 +
 cpp/src/arrow/ipc/reader.cc                        |   6 +-
 cpp/src/arrow/ipc/test-common.cc                   |  42 +-
 cpp/src/arrow/ipc/writer.cc                        |  19 +-
 cpp/src/arrow/json/converter-test.cc               |   5 +
 cpp/src/arrow/json/converter.cc                    |   2 +
 cpp/src/arrow/pretty_print-test.cc                 |   4 +
 cpp/src/arrow/pretty_print.cc                      |   9 +-
 cpp/src/arrow/scalar.h                             |  36 +-
 cpp/src/arrow/testing/random.cc                    |  38 +-
 cpp/src/arrow/testing/random.h                     |  15 +-
 cpp/src/arrow/type-test.cc                         |  14 +
 cpp/src/arrow/type.cc                              |  12 +-
 cpp/src/arrow/type.h                               |  85 ++-
 cpp/src/arrow/type_fwd.h                           |  14 +
 cpp/src/arrow/type_traits.h                        |  43 ++
 cpp/src/arrow/visitor.cc                           |   6 +
 cpp/src/arrow/visitor.h                            |   6 +
 cpp/src/arrow/visitor_inline.h                     |   7 +-
 format/Schema.fbs                                  |  13 +
 40 files changed, 1409 insertions(+), 1083 deletions(-)

diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc
index cb8d6d5..85a1620 100644
--- a/cpp/src/arrow/array-binary-test.cc
+++ b/cpp/src/arrow/array-binary-test.cc
@@ -40,6 +40,9 @@ namespace arrow {
 
 using internal::checked_cast;
 
+using StringTypes =
+    ::testing::Types<StringType, LargeStringType, BinaryType, LargeBinaryType>;
+
 // ----------------------------------------------------------------------
 // String / Binary tests
 
@@ -67,8 +70,14 @@ void CheckStringArray(const ArrayType& array, const std::vector<std::string>& st
   }
 }
 
+template <typename T>
 class TestStringArray : public ::testing::Test {
  public:
+  using TypeClass = T;
+  using offset_type = typename TypeClass::offset_type;
+  using ArrayType = typename TypeTraits<TypeClass>::ArrayType;
+  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
+
   void SetUp() {
     chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
     offsets_ = {0, 1, 1, 1, 3, 6};
@@ -85,268 +94,132 @@ class TestStringArray : public ::testing::Test {
     ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
     null_count_ = CountNulls(valid_bytes_);
 
-    strings_ = std::make_shared<StringArray>(length_, offsets_buf_, value_buf_,
-                                             null_bitmap_, null_count_);
-  }
-
- protected:
-  std::vector<int32_t> offsets_;
-  std::vector<char> chars_;
-  std::vector<uint8_t> valid_bytes_;
-
-  std::vector<std::string> expected_;
-
-  std::shared_ptr<Buffer> value_buf_;
-  std::shared_ptr<Buffer> offsets_buf_;
-  std::shared_ptr<Buffer> null_bitmap_;
-
-  int64_t null_count_;
-  int64_t length_;
-
-  std::shared_ptr<StringArray> strings_;
-};
-
-TEST_F(TestStringArray, TestArrayBasics) {
-  ASSERT_EQ(length_, strings_->length());
-  ASSERT_EQ(1, strings_->null_count());
-  ASSERT_OK(ValidateArray(*strings_));
-}
-
-TEST_F(TestStringArray, TestType) {
-  std::shared_ptr<DataType> type = strings_->type();
-
-  ASSERT_EQ(Type::STRING, type->id());
-  ASSERT_EQ(Type::STRING, strings_->type_id());
-}
-
-TEST_F(TestStringArray, TestListFunctions) {
-  int pos = 0;
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    ASSERT_EQ(pos, strings_->value_offset(i));
-    ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
-    pos += static_cast<int>(expected_[i].size());
-  }
-}
-
-TEST_F(TestStringArray, TestDestructor) {
-  auto arr = std::make_shared<StringArray>(length_, offsets_buf_, value_buf_,
+    strings_ = std::make_shared<ArrayType>(length_, offsets_buf_, value_buf_,
                                            null_bitmap_, null_count_);
-}
+  }
 
-TEST_F(TestStringArray, TestGetString) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
+  void TestArrayBasics() {
+    ASSERT_EQ(length_, strings_->length());
+    ASSERT_EQ(1, strings_->null_count());
+    ASSERT_OK(ValidateArray(*strings_));
+    TestInitialized(*strings_);
+    AssertZeroPadded(*strings_);
+  }
+
+  void TestType() {
+    std::shared_ptr<DataType> type = this->strings_->type();
+
+    if (std::is_same<TypeClass, StringType>::value) {
+      ASSERT_EQ(Type::STRING, type->id());
+      ASSERT_EQ(Type::STRING, this->strings_->type_id());
+    } else if (std::is_same<TypeClass, LargeStringType>::value) {
+      ASSERT_EQ(Type::LARGE_STRING, type->id());
+      ASSERT_EQ(Type::LARGE_STRING, this->strings_->type_id());
+    } else if (std::is_same<TypeClass, BinaryType>::value) {
+      ASSERT_EQ(Type::BINARY, type->id());
+      ASSERT_EQ(Type::BINARY, this->strings_->type_id());
+    } else if (std::is_same<TypeClass, LargeBinaryType>::value) {
+      ASSERT_EQ(Type::LARGE_BINARY, type->id());
+      ASSERT_EQ(Type::LARGE_BINARY, this->strings_->type_id());
     } else {
-      ASSERT_EQ(expected_[i], strings_->GetString(i));
+      FAIL();
     }
   }
-}
-
-TEST_F(TestStringArray, TestEmptyStringComparison) {
-  offsets_ = {0, 0, 0, 0, 0, 0};
-  offsets_buf_ = Buffer::Wrap(offsets_);
-  length_ = static_cast<int64_t>(offsets_.size() - 1);
-
-  auto strings_a = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
-                                                 null_bitmap_, null_count_);
-  auto strings_b = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
-                                                 null_bitmap_, null_count_);
-  ASSERT_TRUE(strings_a->Equals(strings_b));
-}
-
-TEST_F(TestStringArray, CompareNullByteSlots) {
-  StringBuilder builder;
-  StringBuilder builder2;
-  StringBuilder builder3;
-
-  ASSERT_OK(builder.Append("foo"));
-  ASSERT_OK(builder2.Append("foo"));
-  ASSERT_OK(builder3.Append("foo"));
-
-  ASSERT_OK(builder.Append("bar"));
-  ASSERT_OK(builder2.AppendNull());
-
-  // same length, but different
-  ASSERT_OK(builder3.Append("xyz"));
-
-  ASSERT_OK(builder.Append("baz"));
-  ASSERT_OK(builder2.Append("baz"));
-  ASSERT_OK(builder3.Append("baz"));
-
-  std::shared_ptr<Array> array, array2, array3;
-  FinishAndCheckPadding(&builder, &array);
-  ASSERT_OK(builder2.Finish(&array2));
-  ASSERT_OK(builder3.Finish(&array3));
-
-  const auto& a1 = checked_cast<const StringArray&>(*array);
-  const auto& a2 = checked_cast<const StringArray&>(*array2);
-  const auto& a3 = checked_cast<const StringArray&>(*array3);
-
-  // The validity bitmaps are the same, the data is different, but the unequal
-  // portion is masked out
-  StringArray equal_array(3, a1.value_offsets(), a1.value_data(), a2.null_bitmap(), 1);
-  StringArray equal_array2(3, a3.value_offsets(), a3.value_data(), a2.null_bitmap(), 1);
 
-  ASSERT_TRUE(equal_array.Equals(equal_array2));
-  ASSERT_TRUE(a2.RangeEquals(equal_array2, 0, 3, 0));
-
-  ASSERT_TRUE(equal_array.Array::Slice(1)->Equals(equal_array2.Array::Slice(1)));
-  ASSERT_TRUE(
-      equal_array.Array::Slice(1)->RangeEquals(0, 2, 0, equal_array2.Array::Slice(1)));
-}
-
-TEST_F(TestStringArray, TestSliceGetString) {
-  StringBuilder builder;
-
-  ASSERT_OK(builder.Append("a"));
-  ASSERT_OK(builder.Append("b"));
-  ASSERT_OK(builder.Append("c"));
-
-  std::shared_ptr<Array> array;
-  ASSERT_OK(builder.Finish(&array));
-  auto s = array->Slice(1, 10);
-  auto arr = std::dynamic_pointer_cast<StringArray>(s);
-  ASSERT_EQ(arr->GetString(0), "b");
-}
-
-// ----------------------------------------------------------------------
-// String builder tests
-
-class TestStringBuilder : public TestBuilder {
- public:
-  void SetUp() {
-    TestBuilder::SetUp();
-    builder_.reset(new StringBuilder(pool_));
+  void TestListFunctions() {
+    int64_t pos = 0;
+    for (size_t i = 0; i < expected_.size(); ++i) {
+      ASSERT_EQ(pos, strings_->value_offset(i));
+      ASSERT_EQ(expected_[i].size(), strings_->value_length(i));
+      pos += expected_[i].size();
+    }
   }
 
-  void Done() {
-    std::shared_ptr<Array> out;
-    FinishAndCheckPadding(builder_.get(), &out);
-
-    result_ = std::dynamic_pointer_cast<StringArray>(out);
-    ASSERT_OK(ValidateArray(*result_));
+  void TestDestructor() {
+    auto arr = std::make_shared<ArrayType>(length_, offsets_buf_, value_buf_,
+                                           null_bitmap_, null_count_);
   }
 
- protected:
-  std::unique_ptr<StringBuilder> builder_;
-  std::shared_ptr<StringArray> result_;
-};
-
-TEST_F(TestStringBuilder, TestScalarAppend) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
-
-  int N = static_cast<int>(strings.size());
-  int reps = 1000;
-
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (!is_valid[i]) {
-        ASSERT_OK(builder_->AppendNull());
+  void TestGetString() {
+    for (size_t i = 0; i < expected_.size(); ++i) {
+      if (valid_bytes_[i] == 0) {
+        ASSERT_TRUE(strings_->IsNull(i));
       } else {
-        ASSERT_OK(builder_->Append(strings[i]));
+        ASSERT_FALSE(strings_->IsNull(i));
+        ASSERT_EQ(expected_[i], strings_->GetString(i));
       }
     }
   }
-  Done();
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
-
-  CheckStringArray(*result_, strings, is_valid, reps);
-}
-
-TEST_F(TestStringBuilder, TestAppendVector) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
-
-  int N = static_cast<int>(strings.size());
-  int reps = 1000;
+  void TestEmptyStringComparison() {
+    offsets_ = {0, 0, 0, 0, 0, 0};
+    offsets_buf_ = Buffer::Wrap(offsets_);
+    length_ = static_cast<int64_t>(offsets_.size() - 1);
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->AppendValues(strings, valid_bytes.data()));
+    auto strings_a = std::make_shared<ArrayType>(length_, offsets_buf_, nullptr,
+                                                 null_bitmap_, null_count_);
+    auto strings_b = std::make_shared<ArrayType>(length_, offsets_buf_, nullptr,
+                                                 null_bitmap_, null_count_);
+    ASSERT_TRUE(strings_a->Equals(strings_b));
   }
-  Done();
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
+  void TestCompareNullByteSlots() {
+    BuilderType builder;
+    BuilderType builder2;
+    BuilderType builder3;
 
-  CheckStringArray(*result_, strings, valid_bytes, reps);
-}
+    ASSERT_OK(builder.Append("foo"));
+    ASSERT_OK(builder2.Append("foo"));
+    ASSERT_OK(builder3.Append("foo"));
 
-TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
-  const char* strings[] = {nullptr, "aaa", nullptr, "ignored", ""};
-  std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
+    ASSERT_OK(builder.Append("bar"));
+    ASSERT_OK(builder2.AppendNull());
 
-  int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
-  int reps = 1000;
+    // same length, but different
+    ASSERT_OK(builder3.Append("xyz"));
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->AppendValues(strings, N, valid_bytes.data()));
-  }
-  Done();
+    ASSERT_OK(builder.Append("baz"));
+    ASSERT_OK(builder2.Append("baz"));
+    ASSERT_OK(builder3.Append("baz"));
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps * 3, result_->null_count());
-  ASSERT_EQ(reps * 3, result_->value_data()->size());
+    std::shared_ptr<Array> array, array2, array3;
+    FinishAndCheckPadding(&builder, &array);
+    ASSERT_OK(builder2.Finish(&array2));
+    ASSERT_OK(builder3.Finish(&array3));
 
-  CheckStringArray(*result_, {"", "aaa", "", "", ""}, {0, 1, 0, 0, 1}, reps);
-}
+    const auto& a1 = checked_cast<const ArrayType&>(*array);
+    const auto& a2 = checked_cast<const ArrayType&>(*array2);
+    const auto& a3 = checked_cast<const ArrayType&>(*array3);
 
-TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
-  const char* strings[] = {"", "bb", "a", nullptr, "ccc"};
+    // The validity bitmaps are the same, the data is different, but the unequal
+    // portion is masked out
+    ArrayType equal_array(3, a1.value_offsets(), a1.value_data(), a2.null_bitmap(), 1);
+    ArrayType equal_array2(3, a3.value_offsets(), a3.value_data(), a2.null_bitmap(), 1);
 
-  int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
-  int reps = 1000;
+    ASSERT_TRUE(equal_array.Equals(equal_array2));
+    ASSERT_TRUE(a2.RangeEquals(equal_array2, 0, 3, 0));
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->AppendValues(strings, N));
+    ASSERT_TRUE(equal_array.Array::Slice(1)->Equals(equal_array2.Array::Slice(1)));
+    ASSERT_TRUE(
+        equal_array.Array::Slice(1)->RangeEquals(0, 2, 0, equal_array2.Array::Slice(1)));
   }
-  Done();
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
+  void TestSliceGetString() {
+    BuilderType builder;
 
-  CheckStringArray(*result_, {"", "bb", "a", "", "ccc"}, {1, 1, 1, 0, 1}, reps);
-}
+    ASSERT_OK(builder.Append("a"));
+    ASSERT_OK(builder.Append("b"));
+    ASSERT_OK(builder.Append("c"));
 
-TEST_F(TestStringBuilder, TestZeroLength) {
-  // All buffers are null
-  Done();
-}
-
-// Binary container type
-// TODO(emkornfield) there should be some way to refactor these to avoid code duplicating
-// with String
-class TestBinaryArray : public ::testing::Test {
- public:
-  void SetUp() {
-    chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
-    offsets_ = {0, 1, 1, 1, 3, 6};
-    valid_bytes_ = {1, 1, 0, 1, 1};
-    expected_ = {"a", "", "", "bb", "ccc"};
-
-    MakeArray();
-  }
-
-  void MakeArray() {
-    length_ = static_cast<int64_t>(offsets_.size() - 1);
-    value_buf_ = Buffer::Wrap(chars_);
-    offsets_buf_ = Buffer::Wrap(offsets_);
-
-    ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
-    null_count_ = CountNulls(valid_bytes_);
-
-    strings_ = std::make_shared<BinaryArray>(length_, offsets_buf_, value_buf_,
-                                             null_bitmap_, null_count_);
+    std::shared_ptr<Array> array;
+    ASSERT_OK(builder.Finish(&array));
+    auto s = array->Slice(1, 10);
+    auto arr = std::dynamic_pointer_cast<ArrayType>(s);
+    ASSERT_EQ(arr->GetString(0), "b");
   }
 
  protected:
-  std::vector<int32_t> offsets_;
+  std::vector<offset_type> offsets_;
   std::vector<char> chars_;
   std::vector<uint8_t> valid_bytes_;
 
@@ -359,300 +232,240 @@ class TestBinaryArray : public ::testing::Test {
   int64_t null_count_;
   int64_t length_;
 
-  std::shared_ptr<BinaryArray> strings_;
+  std::shared_ptr<ArrayType> strings_;
 };
 
-TEST_F(TestBinaryArray, TestArrayBasics) {
-  ASSERT_EQ(length_, strings_->length());
-  ASSERT_EQ(1, strings_->null_count());
-  ASSERT_OK(ValidateArray(*strings_));
-}
-
-TEST_F(TestBinaryArray, TestType) {
-  std::shared_ptr<DataType> type = strings_->type();
-
-  ASSERT_EQ(Type::BINARY, type->id());
-  ASSERT_EQ(Type::BINARY, strings_->type_id());
-}
+TYPED_TEST_CASE(TestStringArray, StringTypes);
 
-TEST_F(TestBinaryArray, TestListFunctions) {
-  size_t pos = 0;
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    ASSERT_EQ(pos, strings_->value_offset(i));
-    ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
-    pos += expected_[i].size();
-  }
-}
+TYPED_TEST(TestStringArray, TestArrayBasics) { this->TestArrayBasics(); }
 
-TEST_F(TestBinaryArray, TestDestructor) {
-  auto arr = std::make_shared<BinaryArray>(length_, offsets_buf_, value_buf_,
-                                           null_bitmap_, null_count_);
-}
+TYPED_TEST(TestStringArray, TestType) { this->TestType(); }
 
-TEST_F(TestBinaryArray, TestGetValue) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
-    } else {
-      ASSERT_FALSE(strings_->IsNull(i));
-      ASSERT_EQ(strings_->GetString(i), expected_[i]);
-    }
-  }
-}
+TYPED_TEST(TestStringArray, TestListFunctions) { this->TestListFunctions(); }
 
-TEST_F(TestBinaryArray, TestNullValuesInitialized) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
-    } else {
-      ASSERT_FALSE(strings_->IsNull(i));
-      ASSERT_EQ(strings_->GetString(i), expected_[i]);
-    }
-  }
-  TestInitialized(*strings_);
-}
+TYPED_TEST(TestStringArray, TestDestructor) { this->TestDestructor(); }
 
-TEST_F(TestBinaryArray, TestPaddingZeroed) { AssertZeroPadded(*strings_); }
+TYPED_TEST(TestStringArray, TestGetString) { this->TestGetString(); }
 
-TEST_F(TestBinaryArray, TestGetString) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
-    } else {
-      std::string val = strings_->GetString(i);
-      ASSERT_EQ(0, std::memcmp(expected_[i].data(), val.c_str(), val.size()));
-    }
-  }
+TYPED_TEST(TestStringArray, TestEmptyStringComparison) {
+  this->TestEmptyStringComparison();
 }
 
-TEST_F(TestBinaryArray, TestEqualsEmptyStrings) {
-  BinaryBuilder builder;
-
-  std::string empty_string("");
-  for (int i = 0; i < 5; ++i) {
-    ASSERT_OK(builder.Append(empty_string));
-  }
-
-  std::shared_ptr<Array> left_arr;
-  FinishAndCheckPadding(&builder, &left_arr);
+TYPED_TEST(TestStringArray, CompareNullByteSlots) { this->TestCompareNullByteSlots(); }
 
-  const BinaryArray& left = checked_cast<const BinaryArray&>(*left_arr);
-  std::shared_ptr<Array> right =
-      std::make_shared<BinaryArray>(left.length(), left.value_offsets(), nullptr,
-                                    left.null_bitmap(), left.null_count());
+TYPED_TEST(TestStringArray, TestSliceGetString) { this->TestSliceGetString(); }
 
-  ASSERT_TRUE(left.Equals(right));
-  ASSERT_TRUE(left.RangeEquals(0, left.length(), 0, right));
-}
+// ----------------------------------------------------------------------
+// String builder tests
 
-class TestBinaryBuilder : public TestBuilder {
+template <typename T>
+class TestStringBuilder : public TestBuilder {
  public:
+  using TypeClass = T;
+  using offset_type = typename TypeClass::offset_type;
+  using ArrayType = typename TypeTraits<TypeClass>::ArrayType;
+  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
+
   void SetUp() {
     TestBuilder::SetUp();
-    builder_.reset(new BinaryBuilder(pool_));
+    builder_.reset(new BuilderType(pool_));
   }
 
   void Done() {
     std::shared_ptr<Array> out;
     FinishAndCheckPadding(builder_.get(), &out);
 
-    result_ = std::dynamic_pointer_cast<BinaryArray>(out);
+    result_ = std::dynamic_pointer_cast<ArrayType>(out);
     ASSERT_OK(ValidateArray(*result_));
   }
 
- protected:
-  std::unique_ptr<BinaryBuilder> builder_;
-  std::shared_ptr<BinaryArray> result_;
-};
-
-TEST_F(TestBinaryBuilder, TestScalarAppend) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+  void TestScalarAppend() {
+    std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
 
-  int N = static_cast<int>(strings.size());
-  int reps = 10;
+    int N = static_cast<int>(strings.size());
+    int reps = 10;
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (!is_valid[i]) {
-        ASSERT_OK(builder_->AppendNull());
-      } else {
-        ASSERT_OK(builder_->Append(strings[i]));
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          ASSERT_OK(builder_->AppendNull());
+        } else {
+          ASSERT_OK(builder_->Append(strings[i]));
+        }
       }
     }
-  }
-  Done();
-  ASSERT_OK(ValidateArray(*result_));
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
-
-  CheckStringArray(*result_, strings, is_valid, reps);
-}
-
-TEST_F(TestBinaryBuilder, TestAppendNulls) {
-  ASSERT_OK(builder_->Append("bow"));
-  ASSERT_OK(builder_->AppendNulls(3));
-  ASSERT_OK(builder_->Append("arrow"));
-  Done();
-  ASSERT_OK(ValidateArray(*result_));
+    Done();
 
-  ASSERT_EQ(5, result_->length());
-  ASSERT_EQ(3, result_->null_count());
-  ASSERT_EQ(8, result_->value_data()->size());
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  CheckStringArray(*result_, {"bow", "", "", "", "arrow"}, {1, 0, 0, 0, 1});
-}
+    CheckStringArray(*result_, strings, is_valid, reps);
+  }
 
-TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+  void TestScalarAppendUnsafe() {
+    std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
 
-  int N = static_cast<int>(strings.size());
-  int reps = 13;
-  int total_length = 0;
-  for (auto&& s : strings) total_length += static_cast<int>(s.size());
+    int N = static_cast<int>(strings.size());
+    int reps = 13;
+    int64_t total_length = 0;
+    for (const auto& s : strings) {
+      total_length += static_cast<int64_t>(s.size());
+    }
 
-  ASSERT_OK(builder_->Reserve(N * reps));
-  ASSERT_OK(builder_->ReserveData(total_length * reps));
+    ASSERT_OK(builder_->Reserve(N * reps));
+    ASSERT_OK(builder_->ReserveData(total_length * reps));
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (!is_valid[i]) {
-        builder_->UnsafeAppendNull();
-      } else {
-        builder_->UnsafeAppend(strings[i]);
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          builder_->UnsafeAppendNull();
+        } else {
+          builder_->UnsafeAppend(strings[i]);
+        }
       }
     }
-  }
-  ASSERT_EQ(builder_->value_data_length(), total_length * reps);
-  Done();
-  ASSERT_OK(ValidateArray(*result_));
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * total_length, result_->value_data()->size());
-
-  CheckStringArray(*result_, strings, is_valid, reps);
-}
+    ASSERT_EQ(builder_->value_data_length(), total_length * reps);
+    Done();
 
-TEST_F(TestBinaryBuilder, TestCapacityReserve) {
-  std::vector<std::string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc",
-                                      "dddddddddd"};
-  int N = static_cast<int>(strings.size());
-  int reps = 15;
-  int64_t length = 0;
-  int64_t capacity = 1000;
-  int64_t expected_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
+    ASSERT_OK(ValidateArray(*result_));
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * total_length, result_->value_data()->size());
 
-  ASSERT_OK(builder_->ReserveData(capacity));
+    CheckStringArray(*result_, strings, is_valid, reps);
+  }
 
-  ASSERT_EQ(length, builder_->value_data_length());
-  ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+  void TestVectorAppend() {
+    std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+    std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      ASSERT_OK(builder_->Append(strings[i]));
-      length += static_cast<int>(strings[i].size());
+    int N = static_cast<int>(strings.size());
+    int reps = 1000;
 
-      ASSERT_EQ(length, builder_->value_data_length());
-      ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+    for (int j = 0; j < reps; ++j) {
+      ASSERT_OK(builder_->AppendValues(strings, valid_bytes.data()));
     }
+    Done();
+
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 6, result_->value_data()->size());
+
+    CheckStringArray(*result_, strings, valid_bytes, reps);
   }
 
-  int extra_capacity = 500;
-  expected_capacity = BitUtil::RoundUpToMultipleOf64(length + extra_capacity);
+  void TestAppendCStringsWithValidBytes() {
+    const char* strings[] = {nullptr, "aaa", nullptr, "ignored", ""};
+    std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
 
-  ASSERT_OK(builder_->ReserveData(extra_capacity));
+    int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
+    int reps = 1000;
 
-  ASSERT_EQ(length, builder_->value_data_length());
-  int64_t actual_capacity = builder_->value_data_capacity();
-  ASSERT_GE(actual_capacity, expected_capacity);
-  ASSERT_EQ(actual_capacity & 63, 0);
+    for (int j = 0; j < reps; ++j) {
+      ASSERT_OK(builder_->AppendValues(strings, N, valid_bytes.data()));
+    }
+    Done();
 
-  Done();
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps * 3, result_->null_count());
+    ASSERT_EQ(reps * 3, result_->value_data()->size());
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(0, result_->null_count());
-  ASSERT_EQ(reps * 40, result_->value_data()->size());
+    CheckStringArray(*result_, {"", "aaa", "", "", ""}, {0, 1, 0, 0, 1}, reps);
+  }
 
-  // Capacity is shrunk after `Finish`
-  ASSERT_EQ(640, result_->value_data()->capacity());
-}
+  void TestAppendCStringsWithoutValidBytes() {
+    const char* strings[] = {"", "bb", "a", nullptr, "ccc"};
 
-TEST_F(TestBinaryBuilder, TestZeroLength) {
-  // All buffers are null
-  Done();
-}
+    int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
+    int reps = 1000;
 
-// ----------------------------------------------------------------------
-// Slice tests
+    for (int j = 0; j < reps; ++j) {
+      ASSERT_OK(builder_->AppendValues(strings, N));
+    }
+    Done();
 
-template <typename TYPE>
-void CheckSliceEquality() {
-  using Traits = TypeTraits<TYPE>;
-  using BuilderType = typename Traits::BuilderType;
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  BuilderType builder;
+    CheckStringArray(*result_, {"", "bb", "a", "", "ccc"}, {1, 1, 1, 0, 1}, reps);
+  }
 
-  std::vector<std::string> strings = {"foo", "", "bar", "baz", "qux", ""};
-  std::vector<uint8_t> is_null = {0, 1, 0, 1, 0, 0};
+  void TestCapacityReserve() {
+    std::vector<std::string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc",
+                                        "dddddddddd"};
+    int N = static_cast<int>(strings.size());
+    int reps = 15;
+    int64_t length = 0;
+    int64_t capacity = 1000;
+    int64_t expected_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
 
-  int N = static_cast<int>(strings.size());
-  int reps = 10;
+    ASSERT_OK(builder_->ReserveData(capacity));
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (is_null[i]) {
-        ASSERT_OK(builder.AppendNull());
-      } else {
-        ASSERT_OK(builder.Append(strings[i]));
+    ASSERT_EQ(length, builder_->value_data_length());
+    ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        ASSERT_OK(builder_->Append(strings[i]));
+        length += static_cast<int64_t>(strings[i].size());
+
+        ASSERT_EQ(length, builder_->value_data_length());
+        ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
       }
     }
-  }
 
-  std::shared_ptr<Array> array;
-  FinishAndCheckPadding(&builder, &array);
+    int extra_capacity = 500;
+    expected_capacity = BitUtil::RoundUpToMultipleOf64(length + extra_capacity);
+
+    ASSERT_OK(builder_->ReserveData(extra_capacity));
 
-  std::shared_ptr<Array> slice, slice2;
+    ASSERT_EQ(length, builder_->value_data_length());
+    int64_t actual_capacity = builder_->value_data_capacity();
+    ASSERT_GE(actual_capacity, expected_capacity);
+    ASSERT_EQ(actual_capacity & 63, 0);
 
-  slice = array->Slice(5);
-  slice2 = array->Slice(5);
-  ASSERT_EQ(N * reps - 5, slice->length());
+    Done();
 
-  ASSERT_TRUE(slice->Equals(slice2));
-  ASSERT_TRUE(array->RangeEquals(5, slice->length(), 0, slice));
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(0, result_->null_count());
+    ASSERT_EQ(reps * 40, result_->value_data()->size());
+  }
+
+  void TestZeroLength() {
+    // All buffers are null
+    Done();
+    ASSERT_EQ(result_->length(), 0);
+    ASSERT_EQ(result_->null_count(), 0);
+  }
 
-  // Chained slices
-  slice2 = array->Slice(2)->Slice(3);
-  ASSERT_TRUE(slice->Equals(slice2));
+ protected:
+  std::unique_ptr<BuilderType> builder_;
+  std::shared_ptr<ArrayType> result_;
+};
 
-  slice = array->Slice(5, 20);
-  slice2 = array->Slice(5, 20);
-  ASSERT_EQ(20, slice->length());
+TYPED_TEST_CASE(TestStringBuilder, StringTypes);
 
-  ASSERT_TRUE(slice->Equals(slice2));
-  ASSERT_TRUE(array->RangeEquals(5, 25, 0, slice));
+TYPED_TEST(TestStringBuilder, TestScalarAppend) { this->TestScalarAppend(); }
 
-  ASSERT_OK(builder.Append("a"));
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder.Append(""));
-  }
-  FinishAndCheckPadding(&builder, &array);
-  slice = array->Slice(1);
+TYPED_TEST(TestStringBuilder, TestScalarAppendUnsafe) { this->TestScalarAppendUnsafe(); }
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder.Append(""));
-  }
-  FinishAndCheckPadding(&builder, &array);
+TYPED_TEST(TestStringBuilder, TestVectorAppend) { this->TestVectorAppend(); }
 
-  AssertArraysEqual(*slice, *array);
+TYPED_TEST(TestStringBuilder, TestAppendCStringsWithValidBytes) {
+  this->TestAppendCStringsWithValidBytes();
 }
 
-TEST_F(TestBinaryArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
+TYPED_TEST(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
+  this->TestAppendCStringsWithoutValidBytes();
+}
 
-TEST_F(TestStringArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
+TYPED_TEST(TestStringBuilder, TestCapacityReserve) { this->TestCapacityReserve(); }
 
-TEST_F(TestBinaryArray, LengthZeroCtor) { BinaryArray array(0, nullptr, nullptr); }
+TYPED_TEST(TestStringBuilder, TestZeroLength) { this->TestZeroLength(); }
 
 // ----------------------------------------------------------------------
 // ChunkedBinaryBuilder tests
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 5f76f08..0b7d8f1 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -386,31 +386,26 @@ BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
   SetData(data);
 }
 
-void BinaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
-  ARROW_CHECK_EQ(data->buffers.size(), 3);
-  auto value_offsets = data->buffers[1];
-  auto value_data = data->buffers[2];
-  this->Array::SetData(data);
-  raw_data_ = value_data == nullptr ? nullptr : value_data->data();
-  raw_value_offsets_ = value_offsets == nullptr
-                           ? nullptr
-                           : reinterpret_cast<const int32_t*>(value_offsets->data());
-}
-
 BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
                          const std::shared_ptr<Buffer>& data,
                          const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
-                         int64_t offset)
-    : BinaryArray(binary(), length, value_offsets, data, null_bitmap, null_count,
-                  offset) {}
-
-BinaryArray::BinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
-                         const std::shared_ptr<Buffer>& value_offsets,
-                         const std::shared_ptr<Buffer>& data,
-                         const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
                          int64_t offset) {
-  SetData(ArrayData::Make(type, length, {null_bitmap, value_offsets, data}, null_count,
-                          offset));
+  SetData(ArrayData::Make(binary(), length, {null_bitmap, value_offsets, data},
+                          null_count, offset));
+}
+
+LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::LARGE_BINARY);
+  SetData(data);
+}
+
+LargeBinaryArray::LargeBinaryArray(int64_t length,
+                                   const std::shared_ptr<Buffer>& value_offsets,
+                                   const std::shared_ptr<Buffer>& data,
+                                   const std::shared_ptr<Buffer>& null_bitmap,
+                                   int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(large_binary(), length, {null_bitmap, value_offsets, data},
+                          null_count, offset));
 }
 
 StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
@@ -421,8 +416,24 @@ StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
 StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
                          const std::shared_ptr<Buffer>& data,
                          const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
-                         int64_t offset)
-    : BinaryArray(utf8(), length, value_offsets, data, null_bitmap, null_count, offset) {}
+                         int64_t offset) {
+  SetData(ArrayData::Make(utf8(), length, {null_bitmap, value_offsets, data}, null_count,
+                          offset));
+}
+
+LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
+  SetData(data);
+}
+
+LargeStringArray::LargeStringArray(int64_t length,
+                                   const std::shared_ptr<Buffer>& value_offsets,
+                                   const std::shared_ptr<Buffer>& data,
+                                   const std::shared_ptr<Buffer>& null_bitmap,
+                                   int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(large_utf8(), length, {null_bitmap, value_offsets, data},
+                          null_count, offset));
+}
 
 // ----------------------------------------------------------------------
 // Fixed width binary
@@ -1148,20 +1159,14 @@ struct ValidateVisitor {
     return ValidateOffsets(array);
   }
 
-  Status Visit(const ListArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    auto value_offsets = array.value_offsets();
-    if (array.length() && !value_offsets) {
-      return Status::Invalid("value_offsets_ was null");
-    }
-    if (value_offsets->size() / static_cast<int>(sizeof(int32_t)) < array.length()) {
-      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
-                             " isn't large enough for length: ", array.length());
+  Status Visit(const LargeBinaryArray& array) {
+    if (array.data()->buffers.size() != 3) {
+      return Status::Invalid("number of buffers was != 3");
     }
+    return ValidateOffsets(array);
+  }
 
+  Status Visit(const ListArray& array) {
     if (!array.values()) {
       return Status::Invalid("values was null");
     }
@@ -1181,19 +1186,6 @@ struct ValidateVisitor {
   }
 
   Status Visit(const MapArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    auto value_offsets = array.value_offsets();
-    if (array.length() && !value_offsets) {
-      return Status::Invalid("value_offsets_ was null");
-    }
-    if (value_offsets->size() / static_cast<int>(sizeof(int32_t)) < array.length()) {
-      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
-                             " isn't large enough for length: ", array.length());
-    }
-
     if (!array.keys()) {
       return Status::Invalid("keys was null");
     }
@@ -1224,9 +1216,6 @@ struct ValidateVisitor {
   }
 
   Status Visit(const FixedSizeListArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
     if (!array.values()) {
       return Status::Invalid("values was null");
     }
@@ -1240,14 +1229,6 @@ struct ValidateVisitor {
   }
 
   Status Visit(const StructArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    if (array.null_count() > array.length()) {
-      return Status::Invalid("Null count exceeds the length of this struct");
-    }
-
     if (array.num_fields() > 0) {
       // Validate fields
       int64_t array_length = array.field(0)->length();
@@ -1274,16 +1255,7 @@ struct ValidateVisitor {
     return Status::OK();
   }
 
-  Status Visit(const UnionArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    if (array.null_count() > array.length()) {
-      return Status::Invalid("Null count exceeds the length of this struct");
-    }
-    return Status::OK();
-  }
+  Status Visit(const UnionArray& array) { return Status::OK(); }
 
   Status Visit(const DictionaryArray& array) {
     Type::type index_type_id = array.indices()->type()->id();
@@ -1310,12 +1282,23 @@ struct ValidateVisitor {
  protected:
   template <typename ArrayType>
   Status ValidateOffsets(ArrayType& array) {
-    int32_t prev_offset = array.value_offset(0);
+    using offset_type = typename ArrayType::offset_type;
+
+    auto value_offsets = array.value_offsets();
+    if (array.length() && !value_offsets) {
+      return Status::Invalid("value_offsets_ was null");
+    }
+    if (value_offsets->size() / static_cast<int>(sizeof(offset_type)) < array.length()) {
+      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
+                             " isn't large enough for length: ", array.length());
+    }
+
+    auto prev_offset = array.value_offset(0);
     if (array.offset() == 0 && prev_offset != 0) {
       return Status::Invalid("The first offset wasn't zero");
     }
     for (int64_t i = 1; i <= array.length(); ++i) {
-      int32_t current_offset = array.value_offset(i);
+      auto current_offset = array.value_offset(i);
       if (array.IsNull(i - 1) && current_offset != prev_offset) {
         return Status::Invalid("Offset invariant failure at: ", i,
                                " inconsistent value_offsets for null slot",
@@ -1340,6 +1323,14 @@ Status ValidateArray(const Array& array) {
   const auto layout = type.layout();
   const ArrayData& data = *array.data();
 
+  if (array.length() < 0) {
+    return Status::Invalid("Array length is negative");
+  }
+
+  if (array.null_count() > array.length()) {
+    return Status::Invalid("Null count exceeds array length");
+  }
+
   if (data.buffers.size() != layout.bit_widths.size()) {
     return Status::Invalid("Expected ", layout.bit_widths.size(),
                            " buffers in array "
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 599a6ea..e13088c 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -492,6 +492,7 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
 class ARROW_EXPORT ListArray : public Array {
  public:
   using TypeClass = ListType;
+  using offset_type = ListType::offset_type;
 
   explicit ListArray(const std::shared_ptr<ArrayData>& data);
 
@@ -635,24 +636,20 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
 // ----------------------------------------------------------------------
 // Binary and String
 
-/// Concrete Array class for variable-size binary data
-class ARROW_EXPORT BinaryArray : public FlatArray {
+/// Base class for variable-sized binary arrays, regardless of offset size
+/// and logical interpretation.
+template <typename TYPE>
+class BaseBinaryArray : public FlatArray {
  public:
-  using TypeClass = BinaryType;
-
-  explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
-
-  BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
-              const std::shared_ptr<Buffer>& data,
-              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
-              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
 
   /// Return the pointer to the given elements bytes
   // XXX should GetValue(int64_t i) return a string_view?
-  const uint8_t* GetValue(int64_t i, int32_t* out_length) const {
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
     // Account for base offset
     i += data_->offset;
-    const int32_t pos = raw_value_offsets_[i];
+    const offset_type pos = raw_value_offsets_[i];
     *out_length = raw_value_offsets_[i + 1] - pos;
     return raw_data_ + pos;
   }
@@ -664,7 +661,7 @@ class ARROW_EXPORT BinaryArray : public FlatArray {
   util::string_view GetView(int64_t i) const {
     // Account for base offset
     i += data_->offset;
-    const int32_t pos = raw_value_offsets_[i];
+    const offset_type pos = raw_value_offsets_[i];
     return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
                              raw_value_offsets_[i + 1] - pos);
   }
@@ -681,31 +678,52 @@ class ARROW_EXPORT BinaryArray : public FlatArray {
   /// Note that this buffer does not account for any slice offset
   std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
 
-  const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
+  const offset_type* raw_value_offsets() const {
+    return raw_value_offsets_ + data_->offset;
+  }
 
   // Neither of these functions will perform boundschecking
-  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
-  int32_t value_length(int64_t i) const {
+  offset_type value_offset(int64_t i) const {
+    return raw_value_offsets_[i + data_->offset];
+  }
+  offset_type value_length(int64_t i) const {
     i += data_->offset;
     return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
   }
 
  protected:
   // For subclasses
-  BinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
+  BaseBinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
 
-  /// Protected method for constructors
-  void SetData(const std::shared_ptr<ArrayData>& data);
+  // Protected method for constructors
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    auto value_offsets = data->buffers[1];
+    auto value_data = data->buffers[2];
+    this->Array::SetData(data);
+    raw_data_ = value_data == NULLPTR ? NULLPTR : value_data->data();
+    raw_value_offsets_ =
+        value_offsets == NULLPTR
+            ? NULLPTR
+            : reinterpret_cast<const offset_type*>(value_offsets->data());
+  }
 
-  // Constructor to allow sub-classes/builders to substitute their own logical type
-  BinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
-              const std::shared_ptr<Buffer>& value_offsets,
+  const offset_type* raw_value_offsets_;
+  const uint8_t* raw_data_;
+};
+
+/// Concrete Array class for variable-size binary data
+class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
+ public:
+  explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
               const std::shared_ptr<Buffer>& data,
               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
-  const int32_t* raw_value_offsets_;
-  const uint8_t* raw_data_;
+ protected:
+  // For subclasses such as StringArray
+  BinaryArray() : BaseBinaryArray() {}
 };
 
 /// Concrete Array class for variable-size string (utf-8) data
@@ -721,6 +739,34 @@ class ARROW_EXPORT StringArray : public BinaryArray {
               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 };
 
+/// Concrete Array class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
+ public:
+  explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+  // For subclasses such as LargeStringArray
+  LargeBinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for large variable-size string (utf-8) data
+class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
+ public:
+  using TypeClass = LargeStringType;
+
+  explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+};
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 818ad15..b83897d 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -43,173 +43,15 @@ using internal::checked_cast;
 // ----------------------------------------------------------------------
 // String and binary
 
-BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
-    : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
-
-BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
-
-Status BinaryBuilder::Resize(int64_t capacity) {
-  if (capacity > kListMaximumElements) {
-    return Status::CapacityError(
-        "BinaryBuilder cannot reserve space for more then 2^31 - 1 child elements, got ",
-        capacity);
-  }
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-
-  // one more then requested for offsets
-  RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status BinaryBuilder::ReserveData(int64_t elements) {
-  const int64_t size = value_data_length() + elements;
-  ARROW_RETURN_IF(
-      size > kBinaryMemoryLimit,
-      Status::CapacityError("Cannot reserve capacity larger than 2^31 - 1 for binary"));
-
-  return (size > value_data_capacity()) ? value_data_builder_.Reserve(elements)
-                                        : Status::OK();
-}
-
-Status BinaryBuilder::AppendOverflow(int64_t num_bytes) {
-  return Status::CapacityError("BinaryArray cannot contain more than ",
-                               kBinaryMemoryLimit, " bytes, have ", num_bytes);
-}
-
-Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  // Write final offset (values length)
-  RETURN_NOT_OK(AppendNextOffset());
-
-  // These buffers' padding zeroed by BufferBuilder
-  std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
-  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
-  RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-
-  *out =
-      ArrayData::Make(type_, length_, {null_bitmap, offsets, value_data}, null_count_, 0);
-  Reset();
-  return Status::OK();
-}
-
-void BinaryBuilder::Reset() {
-  ArrayBuilder::Reset();
-  offsets_builder_.Reset();
-  value_data_builder_.Reset();
-}
-
-const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
-  const int32_t* offsets = offsets_builder_.data();
-  int32_t offset = offsets[i];
-  if (i == (length_ - 1)) {
-    *out_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
-  } else {
-    *out_length = offsets[i + 1] - offset;
-  }
-  return value_data_builder_.data() + offset;
-}
-
-util::string_view BinaryBuilder::GetView(int64_t i) const {
-  const int32_t* offsets = offsets_builder_.data();
-  int32_t offset = offsets[i];
-  int32_t value_length;
-  if (i == (length_ - 1)) {
-    value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
-  } else {
-    value_length = offsets[i + 1] - offset;
-  }
-  return util::string_view(
-      reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
-}
+BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BaseBinaryBuilder(binary(), pool) {}
 
 StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
 
-Status StringBuilder::AppendValues(const std::vector<std::string>& values,
-                                   const uint8_t* valid_bytes) {
-  std::size_t total_length = std::accumulate(
-      values.begin(), values.end(), 0ULL,
-      [](uint64_t sum, const std::string& str) { return sum + str.size(); });
-  RETURN_NOT_OK(Reserve(values.size()));
-  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
-  RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
-
-  if (valid_bytes) {
-    for (std::size_t i = 0; i < values.size(); ++i) {
-      UnsafeAppendNextOffset();
-      if (valid_bytes[i]) {
-        value_data_builder_.UnsafeAppend(
-            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
-      }
-    }
-  } else {
-    for (std::size_t i = 0; i < values.size(); ++i) {
-      UnsafeAppendNextOffset();
-      value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i].data()),
-                                       values[i].size());
-    }
-  }
+LargeBinaryBuilder::LargeBinaryBuilder(MemoryPool* pool)
+    : BaseBinaryBuilder(large_binary(), pool) {}
 
-  UnsafeAppendToBitmap(valid_bytes, values.size());
-  return Status::OK();
-}
-
-Status StringBuilder::AppendValues(const char** values, int64_t length,
-                                   const uint8_t* valid_bytes) {
-  std::size_t total_length = 0;
-  std::vector<std::size_t> value_lengths(length);
-  bool have_null_value = false;
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i]) {
-      auto value_length = strlen(values[i]);
-      value_lengths[i] = value_length;
-      total_length += value_length;
-    } else {
-      have_null_value = true;
-    }
-  }
-  RETURN_NOT_OK(Reserve(length));
-  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
-  RETURN_NOT_OK(offsets_builder_.Reserve(length));
-
-  if (valid_bytes) {
-    int64_t valid_bytes_offset = 0;
-    for (int64_t i = 0; i < length; ++i) {
-      UnsafeAppendNextOffset();
-      if (valid_bytes[i]) {
-        if (values[i]) {
-          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
-                                           value_lengths[i]);
-        } else {
-          UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - valid_bytes_offset);
-          UnsafeAppendToBitmap(false);
-          valid_bytes_offset = i + 1;
-        }
-      }
-    }
-    UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
-  } else {
-    if (have_null_value) {
-      std::vector<uint8_t> valid_vector(length, 0);
-      for (int64_t i = 0; i < length; ++i) {
-        UnsafeAppendNextOffset();
-        if (values[i]) {
-          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
-                                           value_lengths[i]);
-          valid_vector[i] = 1;
-        }
-      }
-      UnsafeAppendToBitmap(valid_vector.data(), length);
-    } else {
-      for (int64_t i = 0; i < length; ++i) {
-        UnsafeAppendNextOffset();
-        value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
-                                         value_lengths[i]);
-      }
-      UnsafeAppendToBitmap(nullptr, length);
-    }
-  }
-  return Status::OK();
-}
+LargeStringBuilder::LargeStringBuilder(MemoryPool* pool)
+    : LargeBinaryBuilder(large_utf8(), pool) {}
 
 // ----------------------------------------------------------------------
 // Fixed width binary
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 47d3bae..7ae4d31 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -17,8 +17,11 @@
 
 #pragma once
 
+#include <algorithm>
+#include <cstdint>
 #include <limits>
 #include <memory>
+#include <numeric>
 #include <string>
 #include <vector>
 
@@ -37,15 +40,16 @@ constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
 // ----------------------------------------------------------------------
 // Binary and String
 
-/// \class BinaryBuilder
-/// \brief Builder class for variable-length binary data
-class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
+template <typename TYPE>
+class BaseBinaryBuilder : public ArrayBuilder {
  public:
-  explicit BinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
 
-  BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
+  BaseBinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
 
-  Status Append(const uint8_t* value, int32_t length) {
+  Status Append(const uint8_t* value, offset_type length) {
     ARROW_RETURN_NOT_OK(Reserve(1));
     ARROW_RETURN_NOT_OK(AppendNextOffset());
     // Safety check for UBSAN.
@@ -57,14 +61,22 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
+  Status Append(const char* value, offset_type length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(util::string_view value) {
+    return Append(value.data(), static_cast<offset_type>(value.size()));
+  }
+
   Status AppendNulls(int64_t length) final {
     const int64_t num_bytes = value_data_builder_.length();
-    if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+    if (ARROW_PREDICT_FALSE(num_bytes > memory_limit())) {
       return AppendOverflow(num_bytes);
     }
     ARROW_RETURN_NOT_OK(Reserve(length));
     for (int64_t i = 0; i < length; ++i) {
-      offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     }
     UnsafeAppendToBitmap(length, false);
     return Status::OK();
@@ -77,56 +89,182 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status Append(const char* value, int32_t length) {
-    return Append(reinterpret_cast<const uint8_t*>(value), length);
-  }
-
-  Status Append(util::string_view value) {
-    return Append(value.data(), static_cast<int32_t>(value.size()));
-  }
-
   /// \brief Append without checking capacity
   ///
   /// Offsets and data should have been presized using Reserve() and
   /// ReserveData(), respectively.
-  void UnsafeAppend(const uint8_t* value, int32_t length) {
+  void UnsafeAppend(const uint8_t* value, offset_type length) {
     UnsafeAppendNextOffset();
     value_data_builder_.UnsafeAppend(value, length);
     UnsafeAppendToBitmap(true);
   }
 
-  void UnsafeAppend(const char* value, int32_t length) {
+  void UnsafeAppend(const char* value, offset_type length) {
     UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
   }
 
   void UnsafeAppend(const std::string& value) {
-    UnsafeAppend(value.c_str(), static_cast<int32_t>(value.size()));
+    UnsafeAppend(value.c_str(), static_cast<offset_type>(value.size()));
   }
 
   void UnsafeAppend(util::string_view value) {
-    UnsafeAppend(value.data(), static_cast<int32_t>(value.size()));
+    UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
   }
 
   void UnsafeAppendNull() {
     const int64_t num_bytes = value_data_builder_.length();
-    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     UnsafeAppendToBitmap(false);
   }
 
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
+  /// \brief Append a sequence of strings in one shot.
+  ///
+  /// \param[in] values a vector of strings
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const std::vector<std::string>& values,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = std::accumulate(
+        values.begin(), values.end(), 0ULL,
+        [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+    ARROW_RETURN_NOT_OK(Reserve(values.size()));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+    if (valid_bytes != NULLPTR) {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          value_data_builder_.UnsafeAppend(
+              reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+        }
+      }
+    } else {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        value_data_builder_.UnsafeAppend(
+            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+      }
+    }
+
+    UnsafeAppendToBitmap(valid_bytes, values.size());
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of nul-terminated strings in one shot.
+  ///        If one of the values is NULL, it is processed as a null
+  ///        value even if the corresponding valid_bytes entry is 1.
+  ///
+  /// \param[in] values a contiguous C array of nul-terminated char *
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const char** values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = 0;
+    std::vector<std::size_t> value_lengths(length);
+    bool have_null_value = false;
+    for (int64_t i = 0; i < length; ++i) {
+      if (values[i] != NULLPTR) {
+        auto value_length = strlen(values[i]);
+        value_lengths[i] = value_length;
+        total_length += value_length;
+      } else {
+        have_null_value = true;
+      }
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(ReserveData(total_length));
+
+    if (valid_bytes) {
+      int64_t valid_bytes_offset = 0;
+      for (int64_t i = 0; i < length; ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+          } else {
+            UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset,
+                                 i - valid_bytes_offset);
+            UnsafeAppendToBitmap(false);
+            valid_bytes_offset = i + 1;
+          }
+        }
+      }
+      UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+    } else {
+      if (have_null_value) {
+        std::vector<uint8_t> valid_vector(length, 0);
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+            valid_vector[i] = 1;
+          }
+        }
+        UnsafeAppendToBitmap(valid_vector.data(), length);
+      } else {
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                           value_lengths[i]);
+        }
+        UnsafeAppendToBitmap(NULLPTR, length);
+      }
+    }
+    return Status::OK();
+  }
+
+  void Reset() override {
+    ArrayBuilder::Reset();
+    offsets_builder_.Reset();
+    value_data_builder_.Reset();
+  }
+
+  Status Resize(int64_t capacity) override {
+    // XXX Why is this check necessary?  There is no reason to disallow, say,
+    // binary arrays with more than 2**31 empty or null values.
+    if (capacity > memory_limit()) {
+      return Status::CapacityError("BinaryBuilder cannot reserve space for more than ",
+                                   memory_limit(), " child elements, got ", capacity);
+    }
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
+    // One more than requested for offsets
+    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+    return ArrayBuilder::Resize(capacity);
+  }
 
   /// \brief Ensures there is enough allocated capacity to append the indicated
   /// number of bytes to the value data buffer without additional allocations
-  Status ReserveData(int64_t elements);
+  Status ReserveData(int64_t elements) {
+    const int64_t size = value_data_length() + elements;
+    ARROW_RETURN_IF(size > memory_limit(),
+                    Status::CapacityError("Cannot reserve capacity larger than ",
+                                          memory_limit(), " bytes"));
+    return (size > value_data_capacity()) ? value_data_builder_.Reserve(elements)
+                                          : Status::OK();
+  }
 
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    // Write final offset (values length)
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
 
-  /// \cond FALSE
-  using ArrayBuilder::Finish;
-  /// \endcond
+    // These buffers' padding zeroed by BufferBuilder
+    std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
+    ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
 
-  Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+    *out = ArrayData::Make(type_, length_, {null_bitmap, offsets, value_data},
+                           null_count_, 0);
+    Reset();
+    return Status::OK();
+  }
 
   /// \return size of values buffer so far
   int64_t value_data_length() const { return value_data_builder_.length(); }
@@ -136,33 +274,70 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
   /// Temporary access to a value.
   ///
   /// This pointer becomes invalid on the next modifying operation.
-  const uint8_t* GetValue(int64_t i, int32_t* out_length) const;
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+    const offset_type* offsets = offsets_builder_.data();
+    const auto offset = offsets[i];
+    if (i == (length_ - 1)) {
+      *out_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
+    } else {
+      *out_length = offsets[i + 1] - offset;
+    }
+    return value_data_builder_.data() + offset;
+  }
 
   /// Temporary access to a value.
   ///
   /// This view becomes invalid on the next modifying operation.
-  util::string_view GetView(int64_t i) const;
+  util::string_view GetView(int64_t i) const {
+    offset_type value_length;
+    const uint8_t* value_data = GetValue(i, &value_length);
+    return util::string_view(reinterpret_cast<const char*>(value_data), value_length);
+  }
 
  protected:
-  TypedBufferBuilder<int32_t> offsets_builder_;
+  TypedBufferBuilder<offset_type> offsets_builder_;
   TypedBufferBuilder<uint8_t> value_data_builder_;
 
-  Status AppendOverflow(int64_t num_bytes);
+  Status AppendOverflow(int64_t num_bytes) {
+    return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                 " bytes, have ", num_bytes);
+  }
 
   Status AppendNextOffset() {
     const int64_t num_bytes = value_data_builder_.length();
-    if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+    if (ARROW_PREDICT_FALSE(num_bytes > memory_limit())) {
       return AppendOverflow(num_bytes);
     }
-    return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
+    return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
   }
 
   void UnsafeAppendNextOffset() {
     const int64_t num_bytes = value_data_builder_.length();
-    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+  }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t memory_limit() {
+    return std::numeric_limits<offset_type>::max() - 1;
   }
 };
 
+/// \class BinaryBuilder
+/// \brief Builder class for variable-length binary data
+class ARROW_EXPORT BinaryBuilder : public BaseBinaryBuilder<BinaryType> {
+ public:
+  explicit BinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+
+ protected:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+};
+
 /// \class StringBuilder
 /// \brief Builder class for UTF8 strings
 class ARROW_EXPORT StringBuilder : public BinaryBuilder {
@@ -170,36 +345,41 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
   using BinaryBuilder::BinaryBuilder;
   explicit StringBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
 
-  using BinaryBuilder::Append;
-  using BinaryBuilder::Reset;
-  using BinaryBuilder::UnsafeAppend;
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
 
-  /// \brief Append a sequence of strings in one shot.
-  ///
-  /// \param[in] values a vector of strings
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const std::vector<std::string>& values,
-                      const uint8_t* valid_bytes = NULLPTR);
+  Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+};
 
-  /// \brief Append a sequence of nul-terminated strings in one shot.
-  ///        If one of the values is NULL, it is processed as a null
-  ///        value even if the corresponding valid_bytes entry is 1.
-  ///
-  /// \param[in] values a contiguous C array of nul-terminated char *
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const char** values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
+/// \class LargeBinaryBuilder
+/// \brief Builder class for large variable-length binary data
+class ARROW_EXPORT LargeBinaryBuilder : public BaseBinaryBuilder<LargeBinaryType> {
+ public:
+  explicit LargeBinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
 
   /// \cond FALSE
   using ArrayBuilder::Finish;
   /// \endcond
 
-  Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+  Status Finish(std::shared_ptr<LargeBinaryArray>* out) { return FinishTyped(out); }
+
+ protected:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+};
+
+/// \class LargeStringBuilder
+/// \brief Builder class for large UTF8 strings
+class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
+ public:
+  using LargeBinaryBuilder::LargeBinaryBuilder;
+  explicit LargeStringBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeStringArray>* out) { return FinishTyped(out); }
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array/concatenate-test.cc b/cpp/src/arrow/array/concatenate-test.cc
index cf105ce..730b25a 100644
--- a/cpp/src/arrow/array/concatenate-test.cc
+++ b/cpp/src/arrow/array/concatenate-test.cc
@@ -48,10 +48,11 @@ class ConcatenateTest : public ::testing::Test {
         sizes_({0, 1, 2, 4, 16, 31, 1234}),
         null_probabilities_({0.0, 0.1, 0.5, 0.9, 1.0}) {}
 
-  std::vector<int32_t> Offsets(int32_t length, int32_t slice_count) {
-    std::vector<int32_t> offsets(static_cast<std::size_t>(slice_count + 1));
+  template <typename OffsetType>
+  std::vector<OffsetType> Offsets(int32_t length, int32_t slice_count) {
+    std::vector<OffsetType> offsets(static_cast<std::size_t>(slice_count + 1));
     std::default_random_engine gen(seed_);
-    std::uniform_int_distribution<int32_t> dist(0, length);
+    std::uniform_int_distribution<OffsetType> dist(0, length);
     std::generate(offsets.begin(), offsets.end(), [&] { return dist(gen); });
     std::sort(offsets.begin(), offsets.end());
     return offsets;
@@ -85,7 +86,7 @@ class ConcatenateTest : public ::testing::Test {
   template <typename ArrayFactory>
   void Check(ArrayFactory&& factory) {
     for (auto size : this->sizes_) {
-      auto offsets = this->Offsets(size, 3);
+      auto offsets = this->Offsets<int32_t>(size, 3);
       for (auto null_probability : this->null_probabilities_) {
         std::shared_ptr<Array> array;
         factory(size, null_probability, &array);
@@ -146,16 +147,16 @@ TYPED_TEST(PrimitiveConcatenateTest, Primitives) {
 
 TEST_F(ConcatenateTest, StringType) {
   Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
-    auto values_size = size * 4;
-    auto char_array = this->GeneratePrimitive<Int8Type>(values_size, null_probability);
-    std::shared_ptr<Buffer> offsets;
-    auto offsets_vector = this->Offsets(values_size, size);
-    // ensure the first offset is 0, which is expected for StringType
-    offsets_vector[0] = 0;
-    ASSERT_OK(CopyBufferFromVector(offsets_vector, default_memory_pool(), &offsets));
-    *out = MakeArray(ArrayData::Make(
-        utf8(), size,
-        {char_array->data()->buffers[0], offsets, char_array->data()->buffers[1]}));
+    *out = rng_.String(size, /*min_length =*/0, /*max_length =*/15, null_probability);
+    ASSERT_OK(ValidateArray(**out));
+  });
+}
+
+TEST_F(ConcatenateTest, LargeStringType) {
+  Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
+    *out =
+        rng_.LargeString(size, /*min_length =*/0, /*max_length =*/15, null_probability);
+    ASSERT_OK(ValidateArray(**out));
   });
 }
 
@@ -163,7 +164,7 @@ TEST_F(ConcatenateTest, ListType) {
   Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
     auto values_size = size * 4;
     auto values = this->GeneratePrimitive<Int8Type>(values_size, null_probability);
-    auto offsets_vector = this->Offsets(values_size, size);
+    auto offsets_vector = this->Offsets<int32_t>(values_size, size);
     // ensure the first offset is 0, which is expected for ListType
     offsets_vector[0] = 0;
     std::shared_ptr<Array> offsets;
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index 60da0d3..a20b157 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -184,14 +184,21 @@ class ConcatenateImpl {
 
   Status Visit(const BinaryType&) {
     std::vector<Range> value_ranges;
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, *offset_type), pool_,
+    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, sizeof(int32_t)), pool_,
+                                              &out_.buffers[1], &value_ranges));
+    return ConcatenateBuffers(Buffers(2, value_ranges), pool_, &out_.buffers[2]);
+  }
+
+  Status Visit(const LargeBinaryType&) {
+    std::vector<Range> value_ranges;
+    RETURN_NOT_OK(ConcatenateOffsets<int64_t>(Buffers(1, sizeof(int64_t)), pool_,
                                               &out_.buffers[1], &value_ranges));
     return ConcatenateBuffers(Buffers(2, value_ranges), pool_, &out_.buffers[2]);
   }
 
   Status Visit(const ListType&) {
     std::vector<Range> value_ranges;
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, *offset_type), pool_,
+    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, sizeof(int32_t)), pool_,
                                               &out_.buffers[1], &value_ranges));
     return ConcatenateImpl(ChildData(0, value_ranges), pool_)
         .Concatenate(out_.child_data[0].get());
@@ -277,13 +284,11 @@ class ConcatenateImpl {
   }
 
   // Gather the index-th buffer of each input into a vector.
-  // Buffers are assumed to contain elements of fixed.bit_width(),
+  // Buffers are assumed to contain elements of the given byte_width,
   // those elements are sliced with that input's offset and length.
   // Note that BufferVector will not contain the buffer of in_[i] if it's
   // nullptr.
-  BufferVector Buffers(size_t index, const FixedWidthType& fixed) {
-    DCHECK_EQ(fixed.bit_width() % 8, 0);
-    auto byte_width = fixed.bit_width() / 8;
+  BufferVector Buffers(size_t index, int byte_width) {
     BufferVector buffers;
     buffers.reserve(in_.size());
     for (const ArrayData& array_data : in_) {
@@ -296,6 +301,16 @@ class ConcatenateImpl {
     return buffers;
   }
 
+  // Gather the index-th buffer of each input into a vector.
+  // Buffers are assumed to contain elements of fixed.bit_width(),
+  // those elements are sliced with that input's offset and length.
+  // Note that BufferVector will not contain the buffer of in_[i] if it's
+  // nullptr.
+  BufferVector Buffers(size_t index, const FixedWidthType& fixed) {
+    DCHECK_EQ(fixed.bit_width() % 8, 0);
+    return Buffers(index, fixed.bit_width() / 8);
+  }
+
   // Gather the index-th buffer of each input as a Bitmap
   // into a vector of Bitmaps.
   std::vector<Bitmap> Bitmaps(size_t index) {
@@ -328,15 +343,11 @@ class ConcatenateImpl {
     return child_data;
   }
 
-  static const std::shared_ptr<FixedWidthType> offset_type;
   const std::vector<ArrayData>& in_;
   MemoryPool* pool_;
   ArrayData out_;
 };
 
-const std::shared_ptr<FixedWidthType> ConcatenateImpl::offset_type =
-    std::static_pointer_cast<FixedWidthType>(int32());
-
 Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
                    std::shared_ptr<Array>* out) {
   if (arrays.size() == 0) {
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index cee443c..44b0d04 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -107,6 +107,8 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
       BUILDER_CASE(DOUBLE, DoubleBuilder);
       BUILDER_CASE(STRING, StringBuilder);
       BUILDER_CASE(BINARY, BinaryBuilder);
+      BUILDER_CASE(LARGE_STRING, LargeStringBuilder);
+      BUILDER_CASE(LARGE_BINARY, LargeBinaryBuilder);
       BUILDER_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryBuilder);
       BUILDER_CASE(DECIMAL, Decimal128Builder);
     case Type::DICTIONARY: {
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 097bc8f..cb606e3 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -144,8 +144,9 @@ class RangeEqualsVisitor {
     return Status::OK();
   }
 
-  bool CompareBinaryRange(const BinaryArray& left) const {
-    const auto& right = checked_cast<const BinaryArray&>(right_);
+  template <typename BinaryArrayType>
+  bool CompareBinaryRange(const BinaryArrayType& left) const {
+    const auto& right = checked_cast<const BinaryArrayType&>(right_);
 
     for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
          ++i, ++o_i) {
@@ -154,10 +155,10 @@ class RangeEqualsVisitor {
         return false;
       }
       if (is_null) continue;
-      const int32_t begin_offset = left.value_offset(i);
-      const int32_t end_offset = left.value_offset(i + 1);
-      const int32_t right_begin_offset = right.value_offset(o_i);
-      const int32_t right_end_offset = right.value_offset(o_i + 1);
+      const auto begin_offset = left.value_offset(i);
+      const auto end_offset = left.value_offset(i + 1);
+      const auto right_begin_offset = right.value_offset(o_i);
+      const auto right_end_offset = right.value_offset(o_i + 1);
       // Underlying can't be equal if the size isn't equal
       if (end_offset - begin_offset != right_end_offset - right_begin_offset) {
         return false;
@@ -278,6 +279,11 @@ class RangeEqualsVisitor {
     return Status::OK();
   }
 
+  Status Visit(const LargeBinaryArray& left) {
+    result_ = CompareBinaryRange(left);
+    return Status::OK();
+  }
+
   Status Visit(const FixedSizeBinaryArray& left) {
     const auto& right = checked_cast<const FixedSizeBinaryArray&>(right_);
 
@@ -489,18 +495,21 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
 
   template <typename ArrayType>
   bool ValueOffsetsEqual(const ArrayType& left) {
+    using offset_type = typename ArrayType::offset_type;
+
     const auto& right = checked_cast<const ArrayType&>(right_);
 
     if (left.offset() == 0 && right.offset() == 0) {
       return left.value_offsets()->Equals(*right.value_offsets(),
-                                          (left.length() + 1) * sizeof(int32_t));
+                                          (left.length() + 1) * sizeof(offset_type));
     } else {
       // One of the arrays is sliced; logic is more complicated because the
       // value offsets are not both 0-based
       auto left_offsets =
-          reinterpret_cast<const int32_t*>(left.value_offsets()->data()) + left.offset();
+          reinterpret_cast<const offset_type*>(left.value_offsets()->data()) +
+          left.offset();
       auto right_offsets =
-          reinterpret_cast<const int32_t*>(right.value_offsets()->data()) +
+          reinterpret_cast<const offset_type*>(right.value_offsets()->data()) +
           right.offset();
 
       for (int64_t i = 0; i < left.length() + 1; ++i) {
@@ -512,10 +521,11 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
     }
   }
 
-  bool CompareBinary(const BinaryArray& left) {
-    const auto& right = checked_cast<const BinaryArray&>(right_);
+  template <typename BinaryArrayType>
+  bool CompareBinary(const BinaryArrayType& left) {
+    const auto& right = checked_cast<const BinaryArrayType&>(right_);
 
-    bool equal_offsets = ValueOffsetsEqual<BinaryArray>(left);
+    bool equal_offsets = ValueOffsetsEqual<BinaryArrayType>(left);
     if (!equal_offsets) {
       return false;
     }
@@ -544,8 +554,8 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
       }
     } else {
       // ARROW-537: Only compare data in non-null slots
-      const int32_t* left_offsets = left.raw_value_offsets();
-      const int32_t* right_offsets = right.raw_value_offsets();
+      auto left_offsets = left.raw_value_offsets();
+      auto right_offsets = right.raw_value_offsets();
       for (int64_t i = 0; i < left.length(); ++i) {
         if (left.IsNull(i)) {
           continue;
@@ -564,6 +574,11 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
     return Status::OK();
   }
 
+  Status Visit(const LargeBinaryArray& left) {
+    result_ = CompareBinary(left);
+    return Status::OK();
+  }
+
   Status Visit(const ListArray& left) {
     const auto& right = checked_cast<const ListArray&>(right_);
     bool equal_offsets = ValueOffsetsEqual<ListArray>(left);
@@ -822,6 +837,15 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
+  template <typename T>
+  typename std::enable_if<std::is_base_of<LargeBinaryScalar, T>::value, Status>::type
+  Visit(const T& left_) {
+    const auto& left = checked_cast<const LargeBinaryScalar&>(left_);
+    const auto& right = checked_cast<const LargeBinaryScalar&>(right_);
+    result_ = internal::SharedPtrEquals(left.value, right.value);
+    return Status::OK();
+  }
+
   Status Visit(const Decimal128Scalar& left) {
     const auto& right = checked_cast<const Decimal128Scalar&>(right_);
     result_ = left.value == right.value;
diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
index 6bf4f94..80538f2 100644
--- a/cpp/src/arrow/compute/kernels/cast-test.cc
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -52,6 +52,8 @@ namespace compute {
 
 using internal::checked_cast;
 
+static constexpr const char* kInvalidUtf8 = "\xa0\xa1";
+
 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
     uint8(), int8(),   uint16(), int16(),   uint32(),
     int32(), uint64(), int64(),  float32(), float64()};
@@ -131,6 +133,132 @@ class TestCast : public ComputeFixture, public TestBase {
       CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
     }
   }
+
+  template <typename SourceType, typename DestType>
+  void TestCastBinaryToString() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+    auto dest_type = TypeTraits<DestType>::type_singleton();
+
+    // All valid except the last one
+    std::vector<bool> all = {1, 1, 1, 1, 1};
+    std::vector<bool> valid = {1, 1, 1, 1, 0};
+    std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
+
+    std::shared_ptr<Array> array;
+
+    // Should accept when invalid but null.
+    ArrayFromVector<SourceType, std::string>(src_type, valid, strings, &array);
+    CheckZeroCopy(*array, dest_type);
+
+    // Should refuse due to invalid utf8 payload
+    CheckFails<SourceType, std::string>(src_type, strings, all, dest_type, options);
+
+    // Should accept due to option override
+    options.allow_invalid_utf8 = true;
+    CheckCase<SourceType, std::string, DestType, std::string>(
+        src_type, strings, all, dest_type, strings, options);
+  }
+
+  template <typename SourceType>
+  void TestCastStringToNumber() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+
+    std::vector<bool> is_valid = {true, false, true, true, true};
+
+    // string to int
+    std::vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
+    std::vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
+    std::vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
+    std::vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
+    std::vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
+    CheckCase<SourceType, std::string, Int8Type, int8_t>(src_type, v_int, is_valid,
+                                                         int8(), e_int8, options);
+    CheckCase<SourceType, std::string, Int16Type, int16_t>(src_type, v_int, is_valid,
+                                                           int16(), e_int16, options);
+    CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
+                                                           int32(), e_int32, options);
+    CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
+                                                           int64(), e_int64, options);
+
+    v_int = {"2147483647", "0", "-2147483648", "0", "0"};
+    e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
+    CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
+                                                           int32(), e_int32, options);
+    v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
+    e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
+    CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
+                                                           int64(), e_int64, options);
+
+    // string to uint
+    std::vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
+    std::vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
+    std::vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
+    std::vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
+    std::vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
+    CheckCase<SourceType, std::string, UInt8Type, uint8_t>(src_type, v_uint, is_valid,
+                                                           uint8(), e_uint8, options);
+    CheckCase<SourceType, std::string, UInt16Type, uint16_t>(src_type, v_uint, is_valid,
+                                                             uint16(), e_uint16, options);
+    CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
+                                                             uint32(), e_uint32, options);
+    CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
+                                                             uint64(), e_uint64, options);
+
+    v_uint = {"4294967295", "0", "0", "0", "0"};
+    e_uint32 = {4294967295, 0, 0, 0, 0};
+    CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
+                                                             uint32(), e_uint32, options);
+    v_uint = {"18446744073709551615", "0", "0", "0", "0"};
+    e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
+    CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
+                                                             uint64(), e_uint64, options);
+
+    // string to float
+    std::vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
+    std::vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
+    std::vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
+    CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
+                                                         float32(), e_float, options);
+    CheckCase<SourceType, std::string, DoubleType, double>(src_type, v_float, is_valid,
+                                                           float64(), e_double, options);
+
+    // Test that casting is locale-independent
+    auto global_locale = std::locale();
+    try {
+      // French locale uses the comma as decimal point
+      std::locale::global(std::locale("fr_FR.UTF-8"));
+    } catch (std::runtime_error&) {
+      // Locale unavailable, ignore
+    }
+    CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
+                                                         float32(), e_float, options);
+    CheckCase<SourceType, std::string, DoubleType, double>(src_type, v_float, is_valid,
+                                                           float64(), e_double, options);
+    std::locale::global(global_locale);
+  }
+
+  template <typename SourceType>
+  void TestCastStringToTimestamp() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+
+    std::vector<bool> is_valid = {true, false, true};
+    std::vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
+
+    auto type = timestamp(TimeUnit::SECOND);
+    std::vector<int64_t> e = {0, 0, 951782400};
+    CheckCase<SourceType, std::string, TimestampType, int64_t>(
+        src_type, strings, is_valid, type, e, options);
+
+    type = timestamp(TimeUnit::MICRO);
+    e = {0, 0, 951782400000000LL};
+    CheckCase<SourceType, std::string, TimestampType, int64_t>(
+        src_type, strings, is_valid, type, e, options);
+
+    // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
+  }
 };
 
 TEST_F(TestCast, SameTypeZeroCopy) {
@@ -922,6 +1050,10 @@ TEST_F(TestCast, StringToBoolean) {
                                                         e, options);
   CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
                                                         e, options);
+
+  // Same with LargeStringType
+  CheckCase<LargeStringType, std::string, BooleanType, bool>(large_utf8(), v1, is_valid,
+                                                             boolean(), e, options);
 }
 
 TEST_F(TestCast, StringToBooleanErrors) {
@@ -931,84 +1063,13 @@ TEST_F(TestCast, StringToBooleanErrors) {
 
   CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
   CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
+  CheckFails<LargeStringType, std::string>(large_utf8(), {"T"}, is_valid, boolean(),
+                                           options);
 }
 
-TEST_F(TestCast, StringToNumber) {
-  CastOptions options;
+TEST_F(TestCast, StringToNumber) { TestCastStringToNumber<StringType>(); }
 
-  std::vector<bool> is_valid = {true, false, true, true, true};
-
-  // string to int
-  std::vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
-  std::vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
-  std::vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
-  std::vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
-  std::vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
-  CheckCase<StringType, std::string, Int8Type, int8_t>(utf8(), v_int, is_valid, int8(),
-                                                       e_int8, options);
-  CheckCase<StringType, std::string, Int16Type, int16_t>(utf8(), v_int, is_valid, int16(),
-                                                         e_int16, options);
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  v_int = {"2147483647", "0", "-2147483648", "0", "0"};
-  e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
-  e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  // string to uint
-  std::vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
-  std::vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
-  std::vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
-  std::vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
-  std::vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
-  CheckCase<StringType, std::string, UInt8Type, uint8_t>(utf8(), v_uint, is_valid,
-                                                         uint8(), e_uint8, options);
-  CheckCase<StringType, std::string, UInt16Type, uint16_t>(utf8(), v_uint, is_valid,
-                                                           uint16(), e_uint16, options);
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  v_uint = {"4294967295", "0", "0", "0", "0"};
-  e_uint32 = {4294967295, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  v_uint = {"18446744073709551615", "0", "0", "0", "0"};
-  e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  // string to float
-  std::vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
-  std::vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
-  std::vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-
-  // Test that casting is locale-independent
-  auto global_locale = std::locale();
-  try {
-    // French locale uses the comma as decimal point
-    std::locale::global(std::locale("fr_FR.UTF-8"));
-  } catch (std::runtime_error&) {
-    // Locale unavailable, ignore
-  }
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-  std::locale::global(global_locale);
-}
+TEST_F(TestCast, LargeStringToNumber) { TestCastStringToNumber<LargeStringType>(); }
 
 TEST_F(TestCast, StringToNumberErrors) {
   CastOptions options;
@@ -1027,24 +1088,9 @@ TEST_F(TestCast, StringToNumberErrors) {
   CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
 }
 
-TEST_F(TestCast, StringToTimestamp) {
-  CastOptions options;
-
-  std::vector<bool> is_valid = {true, false, true};
-  std::vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
+TEST_F(TestCast, StringToTimestamp) { TestCastStringToTimestamp<StringType>(); }
 
-  auto type = timestamp(TimeUnit::SECOND);
-  std::vector<int64_t> e = {0, 0, 951782400};
-  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
-                                                             type, e, options);
-
-  type = timestamp(TimeUnit::MICRO);
-  e = {0, 0, 951782400000000LL};
-  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
-                                                             type, e, options);
-
-  // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
-}
+TEST_F(TestCast, LargeStringToTimestamp) { TestCastStringToTimestamp<LargeStringType>(); }
 
 TEST_F(TestCast, StringToTimestampErrors) {
   CastOptions options;
@@ -1058,29 +1104,10 @@ TEST_F(TestCast, StringToTimestampErrors) {
   }
 }
 
-constexpr const char* kInvalidUtf8 = "\xa0\xa1";
-
-TEST_F(TestCast, BinaryToString) {
-  CastOptions options;
-
-  // All valid except the last one
-  std::vector<bool> all = {1, 1, 1, 1, 1};
-  std::vector<bool> valid = {1, 1, 1, 1, 0};
-  std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
-
-  std::shared_ptr<Array> array;
-
-  // Should accept when invalid but null.
-  ArrayFromVector<BinaryType, std::string>(binary(), valid, strings, &array);
-  CheckZeroCopy(*array, utf8());
-
-  // Should refuse due to invalid utf8 payload
-  CheckFails<BinaryType, std::string>(binary(), strings, all, utf8(), options);
+TEST_F(TestCast, BinaryToString) { TestCastBinaryToString<BinaryType, StringType>(); }
 
-  // Should accept due to option override
-  options.allow_invalid_utf8 = true;
-  CheckCase<BinaryType, std::string, StringType, std::string>(binary(), strings, all,
-                                                              utf8(), strings, options);
+TEST_F(TestCast, LargeBinaryToLargeString) {
+  TestCastBinaryToString<LargeBinaryType, LargeStringType>();
 }
 
 TEST_F(TestCast, ListToList) {
diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc
index 88a4f30..a8b6615 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -905,13 +905,15 @@ struct CastFunctor<T, DictionaryType> {
 // ----------------------------------------------------------------------
 // String to Number
 
-template <typename O>
-struct CastFunctor<O, StringType, enable_if_number<O>> {
+template <typename I, typename O>
+struct CastFunctor<O, I,
+                   typename std::enable_if<is_any_string_type<I>::value &&
+                                           is_number_type<O>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
     using out_type = typename O::c_type;
 
-    StringArray input_array(input.Copy());
+    typename TypeTraits<I>::ArrayType input_array(input.Copy());
     auto out_data = output->GetMutableValues<out_type>(1);
     internal::StringConverter<O> converter;
 
@@ -933,15 +935,15 @@ struct CastFunctor<O, StringType, enable_if_number<O>> {
 // ----------------------------------------------------------------------
 // String to Boolean
 
-template <typename O>
-struct CastFunctor<O, StringType,
-                   typename std::enable_if<std::is_same<BooleanType, O>::value>::type> {
+template <typename I>
+struct CastFunctor<BooleanType, I,
+                   typename std::enable_if<is_any_string_type<I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
-    StringArray input_array(input.Copy());
+    typename TypeTraits<I>::ArrayType input_array(input.Copy());
     internal::FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(),
                                            output->offset, input.length);
-    internal::StringConverter<O> converter;
+    internal::StringConverter<BooleanType> converter;
 
     for (int64_t i = 0; i < input.length; ++i) {
       if (input_array.IsNull(i)) {
@@ -972,13 +974,14 @@ struct CastFunctor<O, StringType,
 // ----------------------------------------------------------------------
 // String to Timestamp
 
-template <>
-struct CastFunctor<TimestampType, StringType> {
+template <typename I>
+struct CastFunctor<TimestampType, I,
+                   typename std::enable_if<is_any_string_type<I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
     using out_type = TimestampType::c_type;
 
-    StringArray input_array(input.Copy());
+    typename TypeTraits<I>::ArrayType input_array(input.Copy());
     auto out_data = output->GetMutableValues<out_type>(1);
     internal::StringConverter<TimestampType> converter(output->type);
 
@@ -1001,47 +1004,51 @@ struct CastFunctor<TimestampType, StringType> {
 // Binary to String
 //
 
-template <typename I>
-struct CastFunctor<StringType, I,
-                   typename std::enable_if<std::is_same<BinaryType, I>::value>::type> {
+#if defined(_MSC_VER)
+// Silence warning: """'visitor': unreferenced local variable"""
+#pragma warning(push)
+#pragma warning(disable : 4101)
+#endif
+
+template <typename I, typename O>
+struct BinaryToStringSameWidthCastFunctor {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
-    BinaryArray binary(input.Copy());
+    if (!options.allow_invalid_utf8) {
+      util::InitializeUTF8();
 
-    if (options.allow_invalid_utf8) {
-      ZeroCopyData(input, output);
-      return;
+      ArrayDataVisitor<I> visitor;
+      Status st = visitor.Visit(input, this);
+      if (!st.ok()) {
+        ctx->SetStatus(st);
+        return;
+      }
     }
+    ZeroCopyData(input, output);
+  }
 
-    util::InitializeUTF8();
-
-    if (binary.null_count() != 0) {
-      for (int64_t i = 0; i < input.length; i++) {
-        if (binary.IsNull(i)) {
-          continue;
-        }
-
-        const auto str = binary.GetView(i);
-        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
-          return;
-        }
-      }
+  Status VisitNull() { return Status::OK(); }
 
-    } else {
-      for (int64_t i = 0; i < input.length; i++) {
-        const auto str = binary.GetView(i);
-        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
-          return;
-        }
-      }
+  Status VisitValue(util::string_view str) {
+    if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
+      return Status::Invalid("Invalid UTF8 payload");
     }
-
-    ZeroCopyData(input, output);
+    return Status::OK();
   }
 };
 
+template <>
+struct CastFunctor<StringType, BinaryType>
+    : public BinaryToStringSameWidthCastFunctor<StringType, BinaryType> {};
+
+template <>
+struct CastFunctor<LargeStringType, LargeBinaryType>
+    : public BinaryToStringSameWidthCastFunctor<LargeStringType, LargeBinaryType> {};
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
 // ----------------------------------------------------------------------
 
 typedef std::function<void(FunctionContext*, const CastOptions& options, const ArrayData&,
@@ -1142,6 +1149,8 @@ GET_CAST_FUNCTION(TIME64_CASES, Time64Type)
 GET_CAST_FUNCTION(TIMESTAMP_CASES, TimestampType)
 GET_CAST_FUNCTION(BINARY_CASES, BinaryType)
 GET_CAST_FUNCTION(STRING_CASES, StringType)
+GET_CAST_FUNCTION(LARGEBINARY_CASES, LargeBinaryType)
+GET_CAST_FUNCTION(LARGESTRING_CASES, LargeStringType)
 GET_CAST_FUNCTION(DICTIONARY_CASES, DictionaryType)
 
 #define CAST_FUNCTION_CASE(InType)                      \
@@ -1225,6 +1234,8 @@ Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_ty
     CAST_FUNCTION_CASE(TimestampType);
     CAST_FUNCTION_CASE(BinaryType);
     CAST_FUNCTION_CASE(StringType);
+    CAST_FUNCTION_CASE(LargeBinaryType);
+    CAST_FUNCTION_CASE(LargeStringType);
     CAST_FUNCTION_CASE(DictionaryType);
     case Type::LIST:
       RETURN_NOT_OK(GetListCastFunc(in_type, std::move(out_type), options, kernel));
diff --git a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
index 77334af..fb82067 100644
--- a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
+++ b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
@@ -171,6 +171,9 @@
 #define BINARY_CASES(TEMPLATE) \
   TEMPLATE(BinaryType, StringType)
 
+#define LARGEBINARY_CASES(TEMPLATE) \
+  TEMPLATE(LargeBinaryType, LargeStringType)
+
 #define STRING_CASES(TEMPLATE) \
   TEMPLATE(StringType, BooleanType) \
   TEMPLATE(StringType, UInt8Type) \
@@ -185,6 +188,20 @@
   TEMPLATE(StringType, DoubleType) \
   TEMPLATE(StringType, TimestampType)
 
+#define LARGESTRING_CASES(TEMPLATE) \
+  TEMPLATE(LargeStringType, BooleanType) \
+  TEMPLATE(LargeStringType, UInt8Type) \
+  TEMPLATE(LargeStringType, Int8Type) \
+  TEMPLATE(LargeStringType, UInt16Type) \
+  TEMPLATE(LargeStringType, Int16Type) \
+  TEMPLATE(LargeStringType, UInt32Type) \
+  TEMPLATE(LargeStringType, Int32Type) \
+  TEMPLATE(LargeStringType, UInt64Type) \
+  TEMPLATE(LargeStringType, Int64Type) \
+  TEMPLATE(LargeStringType, FloatType) \
+  TEMPLATE(LargeStringType, DoubleType) \
+  TEMPLATE(LargeStringType, TimestampType)
+
 #define DICTIONARY_CASES(TEMPLATE) \
   TEMPLATE(DictionaryType, UInt8Type) \
   TEMPLATE(DictionaryType, Int8Type) \
diff --git a/cpp/src/arrow/compute/kernels/generated/codegen.py b/cpp/src/arrow/compute/kernels/generated/codegen.py
index 04fc386..c9db7ea 100644
--- a/cpp/src/arrow/compute/kernels/generated/codegen.py
+++ b/cpp/src/arrow/compute/kernels/generated/codegen.py
@@ -85,7 +85,9 @@ CAST_GENERATORS = [
     CastCodeGenerator('Timestamp', ['Date32', 'Date64', 'Timestamp'],
                       parametric=True),
     CastCodeGenerator('Binary', ['String']),
+    CastCodeGenerator('LargeBinary', ['LargeString']),
     CastCodeGenerator('String', NUMERIC_TYPES + ['Timestamp']),
+    CastCodeGenerator('LargeString', NUMERIC_TYPES + ['Timestamp']),
     CastCodeGenerator('Dictionary',
                       INTEGER_TYPES + FLOATING_TYPES + DATE_TIME_TYPES +
                       ['Null', 'Binary', 'FixedSizeBinary', 'String',
diff --git a/cpp/src/arrow/csv/converter-test.cc b/cpp/src/arrow/csv/converter-test.cc
index a5e4c03..53176ff 100644
--- a/cpp/src/arrow/csv/converter-test.cc
+++ b/cpp/src/arrow/csv/converter-test.cc
@@ -30,6 +30,7 @@
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 
@@ -118,11 +119,17 @@ void AssertConversionError(const std::shared_ptr<DataType>& type,
 //////////////////////////////////////////////////////////////////////////
 // Test functions begin here
 
-TEST(BinaryConversion, Basics) {
-  AssertConversion<BinaryType, std::string>(binary(), {"ab,cdé\n", ",\xffgh\n"},
-                                            {{"ab", ""}, {"cdé", "\xffgh"}});
+template <typename T>
+static void TestBinaryConversionBasics() {
+  auto type = TypeTraits<T>::type_singleton();
+  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                   {{"ab", ""}, {"cdé", "\xffgh"}});
 }
 
+TEST(BinaryConversion, Basics) { TestBinaryConversionBasics<BinaryType>(); }
+
+TEST(LargeBinaryConversion, Basics) { TestBinaryConversionBasics<LargeBinaryType>(); }
+
 TEST(BinaryConversion, Nulls) {
   AssertConversion<BinaryType, std::string>(binary(), {"ab,N/A\n", "NULL,\n"},
                                             {{"ab", "NULL"}, {"N/A", ""}},
@@ -135,16 +142,22 @@ TEST(BinaryConversion, Nulls) {
                                             {{true, false}, {false, false}}, options);
 }
 
-TEST(StringConversion, Basics) {
-  AssertConversion<StringType, std::string>(utf8(), {"ab,cdé\n", ",gh\n"},
-                                            {{"ab", ""}, {"cdé", "gh"}});
+template <typename T>
+static void TestStringConversionBasics() {
+  auto type = TypeTraits<T>::type_singleton();
+  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",gh\n"},
+                                   {{"ab", ""}, {"cdé", "gh"}});
 
   auto options = ConvertOptions::Defaults();
   options.check_utf8 = false;
-  AssertConversion<StringType, std::string>(utf8(), {"ab,cdé\n", ",\xffgh\n"},
-                                            {{"ab", ""}, {"cdé", "\xffgh"}}, options);
+  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                   {{"ab", ""}, {"cdé", "\xffgh"}}, options);
 }
 
+TEST(StringConversion, Basics) { TestStringConversionBasics<StringType>(); }
+
+TEST(LargeStringConversion, Basics) { TestStringConversionBasics<LargeStringType>(); }
+
 TEST(StringConversion, Nulls) {
   AssertConversion<StringType, std::string>(utf8(), {"ab,N/A\n", "NULL,\n"},
                                             {{"ab", "NULL"}, {"N/A", ""}},
@@ -157,11 +170,17 @@ TEST(StringConversion, Nulls) {
                                             {{true, false}, {false, false}}, options);
 }
 
-TEST(StringConversion, Errors) {
+template <typename T>
+static void TestStringConversionErrors() {
+  auto type = TypeTraits<T>::type_singleton();
   // Invalid UTF8 in column 0
-  AssertConversionError(utf8(), {"ab,cdé\n", "\xff,gh\n"}, {0});
+  AssertConversionError(type, {"ab,cdé\n", "\xff,gh\n"}, {0});
 }
 
+TEST(StringConversion, Errors) { TestStringConversionErrors<StringType>(); }
+
+TEST(LargeStringConversion, Errors) { TestStringConversionErrors<LargeStringType>(); }
+
 TEST(FixedSizeBinaryConversion, Basics) {
   AssertConversion<FixedSizeBinaryType, std::string>(
       fixed_size_binary(2), {"ab,cd\n", "gh,ij\n"}, {{"ab", "gh"}, {"cd", "ij"}});
diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index 53495cf..1c61d3c 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -431,6 +431,7 @@ Status Converter::Make(const std::shared_ptr<DataType>& type,
     CONVERTER_CASE(Type::BOOL, BooleanConverter)
     CONVERTER_CASE(Type::TIMESTAMP, TimestampConverter)
     CONVERTER_CASE(Type::BINARY, (VarSizeBinaryConverter<BinaryType, false>))
+    CONVERTER_CASE(Type::LARGE_BINARY, (VarSizeBinaryConverter<LargeBinaryType, false>))
     CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
     CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
 
@@ -442,6 +443,14 @@ Status Converter::Make(const std::shared_ptr<DataType>& type,
       }
       break;
 
+    case Type::LARGE_STRING:
+      if (options.check_utf8) {
+        result = new VarSizeBinaryConverter<LargeStringType, true>(type, options, pool);
+      } else {
+        result = new VarSizeBinaryConverter<LargeStringType, false>(type, options, pool);
+      }
+      break;
+
     default: {
       return Status::NotImplemented("CSV conversion to ", type->ToString(),
                                     " is not supported");
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 7cd64c8..8436bd2 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -367,6 +367,8 @@ class TableReader::TableReaderImpl {
           PRIMITIVE_CASE(DOUBLE, float64);
           PRIMITIVE_CASE(UTF8, utf8);
           PRIMITIVE_CASE(BINARY, binary);
+          PRIMITIVE_CASE(LARGE_UTF8, large_utf8);
+          PRIMITIVE_CASE(LARGE_BINARY, large_binary);
           default:
             return Status::Invalid("Unrecognized type");
         }
@@ -410,6 +412,10 @@ class TableReader::TableReaderImpl {
       int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int32_t));
       buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
       offset += offsets_size;
+    } else if (is_large_binary_like(type->id())) {
+      int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int64_t));
+      buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
+      offset += offsets_size;
     }
 
     buffers.push_back(SliceBuffer(buffer, offset, buffer->size() - offset));
@@ -585,6 +591,10 @@ fbs::Type ToFlatbufferType(Type::type type) {
       return fbs::Type_UTF8;
     case Type::BINARY:
       return fbs::Type_BINARY;
+    case Type::LARGE_STRING:
+      return fbs::Type_LARGE_UTF8;
+    case Type::LARGE_BINARY:
+      return fbs::Type_LARGE_BINARY;
     case Type::DATE32:
       return fbs::Type_INT32;
     case Type::TIMESTAMP:
@@ -644,7 +654,8 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
   }
 
   Status LoadArrayMetadata(const Array& values, ArrayMetadata* meta) {
-    if (!(is_primitive(values.type_id()) || is_binary_like(values.type_id()))) {
+    if (!(is_primitive(values.type_id()) || is_binary_like(values.type_id()) ||
+          is_large_binary_like(values.type_id()))) {
       return Status::Invalid("Array is not primitive type: ", values.type()->ToString());
     }
 
@@ -659,6 +670,32 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
     return Status::OK();
   }
 
+  template <typename ArrayType>
+  Status WriteBinaryArray(const ArrayType& values, ArrayMetadata* meta,
+                          const uint8_t** values_buffer, int64_t* values_bytes,
+                          int64_t* bytes_written) {
+    using offset_type = typename ArrayType::offset_type;
+
+    int64_t offset_bytes = sizeof(offset_type) * (values.length() + 1);
+
+    if (values.value_offsets()) {
+      *values_bytes = values.raw_value_offsets()[values.length()];
+
+      // Write the variable-length offsets
+      RETURN_NOT_OK(WritePadded(
+          stream_.get(), reinterpret_cast<const uint8_t*>(values.raw_value_offsets()),
+          offset_bytes, bytes_written));
+    } else {
+      RETURN_NOT_OK(WritePaddedBlank(stream_.get(), offset_bytes, bytes_written));
+    }
+    meta->total_bytes += *bytes_written;
+
+    if (values.value_data()) {
+      *values_buffer = values.value_data()->data();
+    }
+    return Status::OK();
+  }
+
   Status WriteArray(const Array& values, ArrayMetadata* meta) {
     RETURN_NOT_OK(CheckStarted());
     RETURN_NOT_OK(LoadArrayMetadata(values, meta));
@@ -687,26 +724,11 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
     const uint8_t* values_buffer = nullptr;
 
     if (is_binary_like(values.type_id())) {
-      const auto& bin_values = checked_cast<const BinaryArray&>(values);
-
-      int64_t offset_bytes = sizeof(int32_t) * (values.length() + 1);
-
-      if (bin_values.value_offsets()) {
-        values_bytes = bin_values.raw_value_offsets()[values.length()];
-
-        // Write the variable-length offsets
-        RETURN_NOT_OK(
-            WritePadded(stream_.get(),
-                        reinterpret_cast<const uint8_t*>(bin_values.raw_value_offsets()),
-                        offset_bytes, &bytes_written));
-      } else {
-        RETURN_NOT_OK(WritePaddedBlank(stream_.get(), offset_bytes, &bytes_written));
-      }
-      meta->total_bytes += bytes_written;
-
-      if (bin_values.value_data()) {
-        values_buffer = bin_values.value_data()->data();
-      }
+      RETURN_NOT_OK(WriteBinaryArray(checked_cast<const BinaryArray&>(values), meta,
+                                     &values_buffer, &values_bytes, &bytes_written));
+    } else if (is_large_binary_like(values.type_id())) {
+      RETURN_NOT_OK(WriteBinaryArray(checked_cast<const LargeBinaryArray&>(values), meta,
+                                     &values_buffer, &values_bytes, &bytes_written));
     } else {
       const auto& prim_values = checked_cast<const PrimitiveArray&>(values);
       const auto& fw_type = checked_cast<const FixedWidthType&>(*values.type());
@@ -760,6 +782,8 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
   VISIT_PRIMITIVE(DoubleArray)
   VISIT_PRIMITIVE(BinaryArray)
   VISIT_PRIMITIVE(StringArray)
+  VISIT_PRIMITIVE(LargeBinaryArray)
+  VISIT_PRIMITIVE(LargeStringArray)
 
 #undef VISIT_PRIMITIVE
 
diff --git a/cpp/src/arrow/ipc/feather.fbs b/cpp/src/arrow/ipc/feather.fbs
index a27d399..5ec0629 100644
--- a/cpp/src/arrow/ipc/feather.fbs
+++ b/cpp/src/arrow/ipc/feather.fbs
@@ -48,7 +48,10 @@ enum Type : byte {
 
   TIMESTAMP = 14,
   DATE = 15,
-  TIME = 16
+  TIME = 16,
+
+  LARGE_UTF8 = 17,
+  LARGE_BINARY = 18
 }
 
 enum Encoding : byte {
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
index 1352965..49a884e 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -312,6 +312,10 @@ class SchemaWriter {
   Status Visit(const TimeType& type) { return WritePrimitive("time", type); }
   Status Visit(const StringType& type) { return WriteVarBytes("utf8", type); }
   Status Visit(const BinaryType& type) { return WriteVarBytes("binary", type); }
+  Status Visit(const LargeStringType& type) { return WriteVarBytes("large_utf8", type); }
+  Status Visit(const LargeBinaryType& type) {
+    return WriteVarBytes("large_binary", type);
+  }
   Status Visit(const FixedSizeBinaryType& type) {
     return WritePrimitive("fixedsizebinary", type);
   }
@@ -430,20 +434,26 @@ class ArrayWriter {
     }
   }
 
-  // Binary, encode to hexadecimal. UTF8 string write as is
+  // Binary, encode to hexadecimal.
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryArray, T>::value, void>::type
+  typename std::enable_if<std::is_same<BinaryArray, T>::value ||
+                              std::is_same<LargeBinaryArray, T>::value,
+                          void>::type
   WriteDataValues(const T& arr) {
     for (int64_t i = 0; i < arr.length(); ++i) {
-      int32_t length;
-      const uint8_t* buf = arr.GetValue(i, &length);
+      writer_->String(HexEncode(arr.GetView(i)));
+    }
+  }
 
-      if (std::is_base_of<StringArray, T>::value) {
-        // Presumed UTF-8
-        writer_->String(reinterpret_cast<const char*>(buf), length);
-      } else {
-        writer_->String(HexEncode(buf, length));
-      }
+  // UTF8 string, write as is
+  template <typename T>
+  typename std::enable_if<std::is_same<StringArray, T>::value ||
+                              std::is_same<LargeStringArray, T>::value,
+                          void>::type
+  WriteDataValues(const T& arr) {
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      auto view = arr.GetView(i);
+      writer_->String(view.data(), static_cast<rj::SizeType>(view.size()));
     }
   }
 
@@ -558,8 +568,10 @@ class ArrayWriter {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryArray, T>::value, Status>::type Visit(
-      const T& array) {
+  typename std::enable_if<std::is_base_of<BinaryArray, T>::value ||
+                              std::is_base_of<LargeBinaryArray, T>::value,
+                          Status>::type
+  Visit(const T& array) {
     WriteValidityField(array);
     WriteIntegerField("OFFSET", array.raw_value_offsets(), array.length() + 1);
     WriteDataField(array);
@@ -911,6 +923,10 @@ static Status GetType(const RjObject& json_type,
     *type = utf8();
   } else if (type_name == "binary") {
     *type = binary();
+  } else if (type_name == "large_utf8") {
+    *type = large_utf8();
+  } else if (type_name == "large_binary") {
+    *type = large_binary();
   } else if (type_name == "fixedsizebinary") {
     return GetFixedSizeBinary(json_type, type);
   } else if (type_name == "decimal") {
@@ -1091,9 +1107,10 @@ class ArrayReader {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type Visit(
+  typename std::enable_if<std::is_base_of<BaseBinaryType, T>::value, Status>::type Visit(
       const T& type) {
     typename TypeTraits<T>::BuilderType builder(pool_);
+    using offset_type = typename T::offset_type;
 
     const auto& json_data = obj_.FindMember(kData);
     RETURN_NOT_ARRAY(kData, json_data, obj_);
@@ -1110,23 +1127,27 @@ class ArrayReader {
 
       const rj::Value& val = json_data_arr[i];
       DCHECK(val.IsString());
-      if (std::is_base_of<StringType, T>::value) {
+
+      if (T::is_utf8) {
         RETURN_NOT_OK(builder.Append(val.GetString()));
       } else {
         std::string hex_string = val.GetString();
 
-        DCHECK(hex_string.size() % 2 == 0) << "Expected base16 hex string";
-        int32_t length = static_cast<int>(hex_string.size()) / 2;
+        if (hex_string.size() % 2 != 0) {
+          return Status::Invalid("Expected base16 hex string");
+        }
+        const auto value_len = static_cast<int64_t>(hex_string.size()) / 2;
 
         std::shared_ptr<Buffer> byte_buffer;
-        RETURN_NOT_OK(AllocateBuffer(pool_, length, &byte_buffer));
+        RETURN_NOT_OK(AllocateBuffer(pool_, value_len, &byte_buffer));
 
         const char* hex_data = hex_string.c_str();
         uint8_t* byte_buffer_data = byte_buffer->mutable_data();
-        for (int32_t j = 0; j < length; ++j) {
+        for (int64_t j = 0; j < value_len; ++j) {
           RETURN_NOT_OK(ParseHexValue(hex_data + j * 2, &byte_buffer_data[j]));
         }
-        RETURN_NOT_OK(builder.Append(byte_buffer_data, length));
+        RETURN_NOT_OK(
+            builder.Append(byte_buffer_data, static_cast<offset_type>(value_len)));
       }
     }
 
diff --git a/cpp/src/arrow/ipc/json-simple-test.cc b/cpp/src/arrow/ipc/json-simple-test.cc
index 3c42775..77ab770 100644
--- a/cpp/src/arrow/ipc/json-simple-test.cc
+++ b/cpp/src/arrow/ipc/json-simple-test.cc
@@ -322,6 +322,21 @@ TEST(TestString, Basics) {
   AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
 }
 
+TEST(TestLargeString, Basics) {
+  // Similar as TestString above, only testing the basics
+  std::shared_ptr<DataType> type = large_utf8();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<LargeStringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
+  AssertJSONArray<LargeStringType, std::string>(type, "[\"\", null]", {true, false},
+                                                {"", ""});
+
+  // Large binary type
+  type = large_binary();
+  AssertJSONArray<LargeBinaryType, std::string>(type, "[\"\", \"foo\", null]",
+                                                {true, true, false}, {"", "foo", ""});
+}
+
 TEST(TestTimestamp, Basics) {
   // Timestamp type
   auto type = timestamp(TimeUnit::SECOND);
diff --git a/cpp/src/arrow/ipc/json-simple.cc b/cpp/src/arrow/ipc/json-simple.cc
index ac0237f..20ac025 100644
--- a/cpp/src/arrow/ipc/json-simple.cc
+++ b/cpp/src/arrow/ipc/json-simple.cc
@@ -26,6 +26,7 @@
 #include "arrow/ipc/json-internal.h"
 #include "arrow/ipc/json-simple.h"
 #include "arrow/memory_pool.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
@@ -344,11 +345,14 @@ class TimestampConverter final : public ConcreteConverter<TimestampConverter> {
 // ------------------------------------------------------------------------
 // Converter for binary and string arrays
 
-class StringConverter final : public ConcreteConverter<StringConverter> {
+template <typename TYPE>
+class StringConverter final : public ConcreteConverter<StringConverter<TYPE>> {
  public:
+  using BuilderType = typename TypeTraits<TYPE>::BuilderType;
+
   explicit StringConverter(const std::shared_ptr<DataType>& type) {
     this->type_ = type;
-    builder_ = std::make_shared<BinaryBuilder>(type, default_memory_pool());
+    builder_ = std::make_shared<BuilderType>(type, default_memory_pool());
   }
 
   Status AppendNull() override { return builder_->AppendNull(); }
@@ -368,7 +372,7 @@ class StringConverter final : public ConcreteConverter<StringConverter> {
   std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
 
  private:
-  std::shared_ptr<BinaryBuilder> builder_;
+  std::shared_ptr<BuilderType> builder_;
 };
 
 // ------------------------------------------------------------------------
@@ -734,8 +738,10 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
     SIMPLE_CONVERTER_CASE(Type::MAP, MapConverter)
     SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_LIST, FixedSizeListConverter)
     SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter)
-    SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter)
-    SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter)
+    SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter<StringType>)
+    SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
+    SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
+    SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
     SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
     SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
     SIMPLE_CONVERTER_CASE(Type::UNION, UnionConverter)
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index e505dde..93f859a 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -232,6 +232,9 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
     case flatbuf::Type_Binary:
       *out = binary();
       return Status::OK();
+    case flatbuf::Type_LargeBinary:
+      *out = large_binary();
+      return Status::OK();
     case flatbuf::Type_FixedSizeBinary: {
       auto fw_binary = static_cast<const flatbuf::FixedSizeBinary*>(type_data);
       *out = fixed_size_binary(fw_binary->byteWidth());
@@ -240,6 +243,9 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
     case flatbuf::Type_Utf8:
       *out = utf8();
       return Status::OK();
+    case flatbuf::Type_LargeUtf8:
+      *out = large_utf8();
+      return Status::OK();
     case flatbuf::Type_Bool:
       *out = boolean();
       return Status::OK();
@@ -541,12 +547,24 @@ class FieldToFlatbufferVisitor {
     return Status::OK();
   }
 
+  Status Visit(const LargeBinaryType& type) {
+    fb_type_ = flatbuf::Type_LargeBinary;
+    type_offset_ = flatbuf::CreateLargeBinary(fbb_).Union();
+    return Status::OK();
+  }
+
   Status Visit(const StringType& type) {
     fb_type_ = flatbuf::Type_Utf8;
     type_offset_ = flatbuf::CreateUtf8(fbb_).Union();
     return Status::OK();
   }
 
+  Status Visit(const LargeStringType& type) {
+    fb_type_ = flatbuf::Type_LargeUtf8;
+    type_offset_ = flatbuf::CreateLargeUtf8(fbb_).Union();
+    return Status::OK();
+  }
+
   Status Visit(const Date32Type& type) {
     fb_type_ = flatbuf::Type_Date;
     type_offset_ = flatbuf::CreateDate(fbb_, flatbuf::DateUnit_DAY).Union();
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 8806c61..8ddec2e 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -249,8 +249,10 @@ class ArrayLoader {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type Visit(
-      const T& type) {
+  typename std::enable_if<std::is_base_of<BinaryType, T>::value ||
+                              std::is_base_of<LargeBinaryType, T>::value,
+                          Status>::type
+  Visit(const T& type) {
     return LoadBinary<T>();
   }
 
diff --git a/cpp/src/arrow/ipc/test-common.cc b/cpp/src/arrow/ipc/test-common.cc
index 47c3076..4cf13ec 100644
--- a/cpp/src/arrow/ipc/test-common.cc
+++ b/cpp/src/arrow/ipc/test-common.cc
@@ -34,6 +34,7 @@
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 
 namespace arrow {
@@ -205,18 +206,16 @@ Status MakeRandomStringArray(int64_t length, bool include_nulls, MemoryPool* poo
   return builder.Finish(out);
 }
 
-template <class Builder, class RawType>
+template <class BuilderType>
 static Status MakeBinaryArrayWithUniqueValues(int64_t length, bool include_nulls,
                                               MemoryPool* pool,
                                               std::shared_ptr<Array>* out) {
-  Builder builder(pool);
+  BuilderType builder(pool);
   for (int64_t i = 0; i < length; ++i) {
     if (include_nulls && (i % 7 == 0)) {
       RETURN_NOT_OK(builder.AppendNull());
     } else {
-      const std::string value = std::to_string(i);
-      RETURN_NOT_OK(builder.Append(reinterpret_cast<const RawType*>(value.data()),
-                                   static_cast<int32_t>(value.size())));
+      RETURN_NOT_OK(builder.Append(std::to_string(i)));
     }
   }
   return builder.Finish(out);
@@ -224,28 +223,37 @@ static Status MakeBinaryArrayWithUniqueValues(int64_t length, bool include_nulls
 
 Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out, bool with_nulls) {
   const int64_t length = 500;
-  auto string_type = utf8();
-  auto binary_type = binary();
-  auto f0 = field("f0", string_type);
-  auto f1 = field("f1", binary_type);
-  auto schema = ::arrow::schema({f0, f1});
+  auto f0 = field("strings", utf8());
+  auto f1 = field("binaries", binary());
+  auto f2 = field("large_strings", large_utf8());
+  auto f3 = field("large_binaries", large_binary());
+  auto schema = ::arrow::schema({f0, f1, f2, f3});
 
-  std::shared_ptr<Array> a0, a1;
+  std::shared_ptr<Array> a0, a1, a2, a3;
   MemoryPool* pool = default_memory_pool();
 
   // Quirk with RETURN_NOT_OK macro and templated functions
   {
-    auto s = MakeBinaryArrayWithUniqueValues<StringBuilder, char>(length, with_nulls,
-                                                                  pool, &a0);
+    auto s =
+        MakeBinaryArrayWithUniqueValues<StringBuilder>(length, with_nulls, pool, &a0);
     RETURN_NOT_OK(s);
   }
-
   {
-    auto s = MakeBinaryArrayWithUniqueValues<BinaryBuilder, uint8_t>(length, with_nulls,
-                                                                     pool, &a1);
+    auto s =
+        MakeBinaryArrayWithUniqueValues<BinaryBuilder>(length, with_nulls, pool, &a1);
     RETURN_NOT_OK(s);
   }
-  *out = RecordBatch::Make(schema, length, {a0, a1});
+  {
+    auto s = MakeBinaryArrayWithUniqueValues<LargeStringBuilder>(length, with_nulls, pool,
+                                                                 &a2);
+    RETURN_NOT_OK(s);
+  }
+  {
+    auto s = MakeBinaryArrayWithUniqueValues<LargeBinaryBuilder>(length, with_nulls, pool,
+                                                                 &a3);
+    RETURN_NOT_OK(s);
+  }
+  *out = RecordBatch::Make(schema, length, {a0, a1, a2, a3});
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index e1c2eca..ec37207 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -225,7 +225,8 @@ class RecordBatchSerializer : public ArrayVisitor {
   template <typename ArrayType>
   Status GetZeroBasedValueOffsets(const ArrayType& array,
                                   std::shared_ptr<Buffer>* value_offsets) {
-    // Share slicing logic between ListArray and BinaryArray
+    // Share slicing logic between ListArray, BinaryArray and LargeBinaryArray
+    using offset_type = typename ArrayType::offset_type;
 
     auto offsets = array.value_offsets();
 
@@ -235,11 +236,12 @@ class RecordBatchSerializer : public ArrayVisitor {
       // b) slice the values array accordingly
 
       std::shared_ptr<Buffer> shifted_offsets;
-      RETURN_NOT_OK(AllocateBuffer(pool_, sizeof(int32_t) * (array.length() + 1),
+      RETURN_NOT_OK(AllocateBuffer(pool_, sizeof(offset_type) * (array.length() + 1),
                                    &shifted_offsets));
 
-      int32_t* dest_offsets = reinterpret_cast<int32_t*>(shifted_offsets->mutable_data());
-      const int32_t start_offset = array.value_offset(0);
+      offset_type* dest_offsets =
+          reinterpret_cast<offset_type*>(shifted_offsets->mutable_data());
+      const offset_type start_offset = array.value_offset(0);
 
       for (int i = 0; i < array.length(); ++i) {
         dest_offsets[i] = array.value_offset(i) - start_offset;
@@ -253,9 +255,10 @@ class RecordBatchSerializer : public ArrayVisitor {
     return Status::OK();
   }
 
-  Status VisitBinary(const BinaryArray& array) {
+  template <typename ArrayType>
+  Status VisitBinary(const ArrayType& array) {
     std::shared_ptr<Buffer> value_offsets;
-    RETURN_NOT_OK(GetZeroBasedValueOffsets<BinaryArray>(array, &value_offsets));
+    RETURN_NOT_OK(GetZeroBasedValueOffsets<ArrayType>(array, &value_offsets));
     auto data = array.value_data();
 
     int64_t total_data_bytes = 0;
@@ -343,6 +346,10 @@ class RecordBatchSerializer : public ArrayVisitor {
 
   Status Visit(const BinaryArray& array) override { return VisitBinary(array); }
 
+  Status Visit(const LargeStringArray& array) override { return VisitBinary(array); }
+
+  Status Visit(const LargeBinaryArray& array) override { return VisitBinary(array); }
+
   Status Visit(const ListArray& array) override { return VisitList(array); }
 
   Status Visit(const MapArray& array) override { return VisitList(array); }
diff --git a/cpp/src/arrow/json/converter-test.cc b/cpp/src/arrow/json/converter-test.cc
index 86e8e8d..cf09e61 100644
--- a/cpp/src/arrow/json/converter-test.cc
+++ b/cpp/src/arrow/json/converter-test.cc
@@ -85,6 +85,11 @@ TEST(ConverterTest, String) {
   AssertConvert(utf8(), src, src);
 }
 
+TEST(ConverterTest, LargeString) {
+  std::string src = R"(["a", "b c", null, "d e f", "g"])";
+  AssertConvert(large_utf8(), src, src);
+}
+
 TEST(ConverterTest, Timestamp) {
   std::string src = R"([null, "1970-01-01", "2018-11-13 17:11:10"])";
   AssertConvert(timestamp(TimeUnit::SECOND), src, src);
diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc
index 078e314..6b7b730 100644
--- a/cpp/src/arrow/json/converter.cc
+++ b/cpp/src/arrow/json/converter.cc
@@ -264,6 +264,8 @@ Status MakeConverter(const std::shared_ptr<DataType>& out_type, MemoryPool* pool
     CONVERTER_CASE(Type::DATE64, DateTimeConverter<Date64Type>);
     CONVERTER_CASE(Type::BINARY, BinaryConverter<BinaryType>);
     CONVERTER_CASE(Type::STRING, BinaryConverter<StringType>);
+    CONVERTER_CASE(Type::LARGE_BINARY, BinaryConverter<LargeBinaryType>);
+    CONVERTER_CASE(Type::LARGE_STRING, BinaryConverter<LargeStringType>);
     default:
       return Status::NotImplemented("JSON conversion to ", *out_type,
                                     " is not supported");
diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index c77a92b..cdb230c 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -155,6 +155,7 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
   null
 ])expected";
   CheckPrimitive<StringType, std::string>({0, 10}, is_valid, values3, ex3);
+  CheckPrimitive<LargeStringType, std::string>({0, 10}, is_valid, values3, ex3);
   static const char* ex3_in2 = R"expected(  [
     "foo",
     "bar",
@@ -163,6 +164,7 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
     null
   ])expected";
   CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
+  CheckPrimitive<LargeStringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
 }
 
 TEST_F(TestPrettyPrint, Int8) {
@@ -338,9 +340,11 @@ TEST_F(TestPrettyPrint, BinaryType) {
   std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
   static const char* ex = "[\n  666F6F,\n  626172,\n  null,\n  62617A,\n  ,\n  FF\n]";
   CheckPrimitive<BinaryType, std::string>({0}, is_valid, values, ex);
+  CheckPrimitive<LargeBinaryType, std::string>({0}, is_valid, values, ex);
   static const char* ex_in2 =
       "  [\n    666F6F,\n    626172,\n    null,\n    62617A,\n    ,\n    FF\n  ]";
   CheckPrimitive<BinaryType, std::string>({2}, is_valid, values, ex_in2);
+  CheckPrimitive<LargeBinaryType, std::string>({2}, is_valid, values, ex_in2);
 }
 
 TEST_F(TestPrettyPrint, ListType) {
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 6caef17..5a54e13 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -205,7 +205,9 @@ class ArrayPrinter : public PrettyPrinter {
 
   // String (Utf8)
   template <typename T>
-  inline typename std::enable_if<std::is_same<StringArray, T>::value, Status>::type
+  inline typename std::enable_if<std::is_same<StringArray, T>::value ||
+                                     std::is_same<LargeStringArray, T>::value,
+                                 Status>::type
   WriteDataValues(const T& array) {
     WriteValues(array, [&](int64_t i) { (*sink_) << "\"" << array.GetView(i) << "\""; });
     return Status::OK();
@@ -213,7 +215,9 @@ class ArrayPrinter : public PrettyPrinter {
 
   // Binary
   template <typename T>
-  inline typename std::enable_if<std::is_same<BinaryArray, T>::value, Status>::type
+  inline typename std::enable_if<std::is_same<BinaryArray, T>::value ||
+                                     std::is_same<LargeBinaryArray, T>::value,
+                                 Status>::type
   WriteDataValues(const T& array) {
     WriteValues(array, [&](int64_t i) { (*sink_) << HexEncode(array.GetView(i)); });
     return Status::OK();
@@ -314,6 +318,7 @@ class ArrayPrinter : public PrettyPrinter {
   typename std::enable_if<std::is_base_of<PrimitiveArray, T>::value ||
                               std::is_base_of<FixedSizeBinaryArray, T>::value ||
                               std::is_base_of<BinaryArray, T>::value ||
+                              std::is_base_of<LargeBinaryArray, T>::value ||
                               std::is_base_of<ListArray, T>::value ||
                               std::is_base_of<MapArray, T>::value ||
                               std::is_base_of<FixedSizeListArray, T>::value,
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 4f0589a..76aecd0 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -91,20 +91,22 @@ struct NumericScalar : public internal::PrimitiveScalar {
       : internal::PrimitiveScalar{type, is_valid}, value(value) {}
 };
 
-struct ARROW_EXPORT BinaryScalar : public Scalar {
+template <typename Type>
+struct BaseBinaryScalar : public Scalar {
   std::shared_ptr<Buffer> value;
-  explicit BinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
-      : BinaryScalar(value, binary(), is_valid) {}
 
  protected:
-  BinaryScalar(const std::shared_ptr<Buffer>& value,
-               const std::shared_ptr<DataType>& type, bool is_valid = true)
+  BaseBinaryScalar(const std::shared_ptr<Buffer>& value,
+                   const std::shared_ptr<DataType>& type, bool is_valid = true)
       : Scalar{type, is_valid}, value(value) {}
 };
 
-struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
-  FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
-                        const std::shared_ptr<DataType>& type, bool is_valid = true);
+struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar<BinaryType> {
+  explicit BinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
+      : BaseBinaryScalar(value, binary(), is_valid) {}
+
+ protected:
+  using BaseBinaryScalar::BaseBinaryScalar;
 };
 
 struct ARROW_EXPORT StringScalar : public BinaryScalar {
@@ -112,6 +114,24 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar {
       : BinaryScalar(value, utf8(), is_valid) {}
 };
 
+struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar<LargeBinaryType> {
+  explicit LargeBinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
+      : BaseBinaryScalar(value, large_binary(), is_valid) {}
+
+ protected:
+  using BaseBinaryScalar::BaseBinaryScalar;
+};
+
+struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
+  explicit LargeStringScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
+      : LargeBinaryScalar(value, utf8(), is_valid) {}
+};
+
+struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
+  FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+                        const std::shared_ptr<DataType>& type, bool is_valid = true);
+};
+
 class ARROW_EXPORT Date32Scalar : public NumericScalar<Date32Type> {
  public:
   using NumericScalar<Date32Type>::NumericScalar;
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index 3346e63..a6f03a3 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -145,24 +145,30 @@ PRIMITIVE_RAND_FLOAT_IMPL(Float64, double, DoubleType)
 #undef PRIMITIVE_RAND_FLOAT_IMPL
 #undef PRIMITIVE_RAND_IMPL
 
-std::shared_ptr<arrow::Array> RandomArrayGenerator::String(int64_t size,
-                                                           int32_t min_length,
-                                                           int32_t max_length,
-                                                           double null_probability) {
+template <typename TypeClass>
+static std::shared_ptr<arrow::Array> GenerateBinaryArray(RandomArrayGenerator* gen,
+                                                         int64_t size, int32_t min_length,
+                                                         int32_t max_length,
+                                                         double null_probability) {
+  using offset_type = typename TypeClass::offset_type;
+  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
+  using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+  using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
+
   if (null_probability < 0 || null_probability > 1) {
     ABORT_NOT_OK(Status::Invalid("null_probability must be between 0 and 1"));
   }
 
-  auto int32_lengths = Int32(size, min_length, max_length, null_probability);
-  auto lengths = std::dynamic_pointer_cast<Int32Array>(int32_lengths);
+  auto lengths = std::dynamic_pointer_cast<OffsetArrayType>(
+      gen->Numeric<OffsetArrowType>(size, min_length, max_length, null_probability));
 
   // Visual Studio does not implement uniform_int_distribution for char types.
   using GenOpt = GenerateOptions<uint8_t, std::uniform_int_distribution<uint16_t>>;
-  GenOpt options(seed(), static_cast<uint8_t>('A'), static_cast<uint8_t>('z'),
+  GenOpt options(gen->seed(), static_cast<uint8_t>('A'), static_cast<uint8_t>('z'),
                  /*null_probability=*/0);
 
   std::vector<uint8_t> str_buffer(max_length);
-  StringBuilder builder;
+  BuilderType builder;
 
   for (int64_t i = 0; i < size; ++i) {
     if (lengths->IsValid(i)) {
@@ -178,6 +184,22 @@ std::shared_ptr<arrow::Array> RandomArrayGenerator::String(int64_t size,
   return result;
 }
 
+std::shared_ptr<arrow::Array> RandomArrayGenerator::String(int64_t size,
+                                                           int32_t min_length,
+                                                           int32_t max_length,
+                                                           double null_probability) {
+  return GenerateBinaryArray<StringType>(this, size, min_length, max_length,
+                                         null_probability);
+}
+
+std::shared_ptr<arrow::Array> RandomArrayGenerator::LargeString(int64_t size,
+                                                                int32_t min_length,
+                                                                int32_t max_length,
+                                                                double null_probability) {
+  return GenerateBinaryArray<LargeStringType>(this, size, min_length, max_length,
+                                              null_probability);
+}
+
 std::shared_ptr<arrow::Array> RandomArrayGenerator::BinaryWithRepeats(
     int64_t size, int64_t unique, int32_t min_length, int32_t max_length,
     double null_probability) {
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 3126a69..75f6bdf 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -214,6 +214,19 @@ class ARROW_EXPORT RandomArrayGenerator {
   std::shared_ptr<arrow::Array> String(int64_t size, int32_t min_length,
                                        int32_t max_length, double null_probability);
 
+  /// \brief Generates a random LargeStringArray
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min_length the lower bound of the string length
+  ///            determined by the uniform distribution
+  /// \param[in] max_length the upper bound of the string length
+  ///            determined by the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> LargeString(int64_t size, int32_t min_length,
+                                            int32_t max_length, double null_probability);
+
   /// \brief Generates a random StringArray with repeated values
   ///
   /// \param[in] size the size of the array to generate
@@ -235,9 +248,9 @@ class ARROW_EXPORT RandomArrayGenerator {
                                                   int32_t min_length, int32_t max_length,
                                                   double null_probability);
 
- private:
   SeedType seed() { return seed_distribution_(seed_rng_); }
 
+ private:
   std::uniform_int_distribution<SeedType> seed_distribution_;
   std::default_random_engine seed_rng_;
 };
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 7ad1d8a..7bfb720 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -354,6 +354,20 @@ TEST(TestStringType, ToString) {
   ASSERT_EQ(str.ToString(), std::string("string"));
 }
 
+TEST(TestLargeBinaryTypes, ToString) {
+  BinaryType bt1;
+  LargeBinaryType t1;
+  LargeBinaryType e1;
+  LargeStringType t2;
+  EXPECT_TRUE(t1.Equals(e1));
+  EXPECT_FALSE(t1.Equals(t2));
+  EXPECT_FALSE(t1.Equals(bt1));
+  ASSERT_EQ(t1.id(), Type::LARGE_BINARY);
+  ASSERT_EQ(t1.ToString(), std::string("large_binary"));
+  ASSERT_EQ(t2.id(), Type::LARGE_STRING);
+  ASSERT_EQ(t2.ToString(), std::string("large_string"));
+}
+
 TEST(TestFixedSizeBinaryType, ToString) {
   auto t = fixed_size_binary(10);
   ASSERT_EQ(t->id(), Type::FIXED_SIZE_BINARY);
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 76be841..dc00a79 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -143,8 +143,6 @@ FloatingPointType::Precision DoubleType::precision() const {
   return FloatingPointType::DOUBLE;
 }
 
-std::string StringType::ToString() const { return std::string("string"); }
-
 std::string ListType::ToString() const {
   std::stringstream s;
   s << "list<" << value_field()->ToString() << ">";
@@ -178,7 +176,13 @@ std::string FixedSizeListType::ToString() const {
   return s.str();
 }
 
-std::string BinaryType::ToString() const { return std::string("binary"); }
+std::string BinaryType::ToString() const { return "binary"; }
+
+std::string LargeBinaryType::ToString() const { return "large_binary"; }
+
+std::string StringType::ToString() const { return "string"; }
+
+std::string LargeStringType::ToString() const { return "large_string"; }
 
 int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
 
@@ -671,7 +675,9 @@ TYPE_FACTORY(float16, HalfFloatType)
 TYPE_FACTORY(float32, FloatType)
 TYPE_FACTORY(float64, DoubleType)
 TYPE_FACTORY(utf8, StringType)
+TYPE_FACTORY(large_utf8, LargeStringType)
 TYPE_FACTORY(binary, BinaryType)
+TYPE_FACTORY(large_binary, LargeBinaryType)
 TYPE_FACTORY(date64, Date64Type)
 TYPE_FACTORY(date32, Date32Type)
 
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index fc235bb..572b888 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -143,7 +143,13 @@ struct Type {
 
     /// Measure of elapsed time in either seconds, milliseconds, microseconds
     /// or nanoseconds.
-    DURATION
+    DURATION,
+
+    /// Like STRING, but with 64-bit offsets
+    LARGE_STRING,
+
+    /// Like BINARY, but with 64-bit offsets
+    LARGE_BINARY
   };
 };
 
@@ -472,6 +478,7 @@ class ARROW_EXPORT DoubleType
 class ARROW_EXPORT ListType : public NestedType {
  public:
   static constexpr Type::type type_id = Type::LIST;
+  using offset_type = int32_t;
 
   // List can contain any other logical value type
   explicit ListType(const std::shared_ptr<DataType>& value_type)
@@ -486,7 +493,7 @@ class ARROW_EXPORT ListType : public NestedType {
   std::shared_ptr<DataType> value_type() const { return children_[0]->type(); }
 
   DataTypeLayout layout() const override {
-    return {{1, CHAR_BIT * sizeof(int32_t)}, false};
+    return {{1, CHAR_BIT * sizeof(offset_type)}, false};
   }
 
   std::string ToString() const override;
@@ -550,23 +557,78 @@ class ARROW_EXPORT FixedSizeListType : public NestedType {
   int32_t list_size_;
 };
 
+/// \brief Base class for all variable-size binary data types
+class ARROW_EXPORT BaseBinaryType : public DataType, public NoExtraMeta {
+ public:
+  using DataType::DataType;
+};
+
 /// \brief Concrete type class for variable-size binary data
-class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
+class ARROW_EXPORT BinaryType : public BaseBinaryType {
  public:
   static constexpr Type::type type_id = Type::BINARY;
+  static constexpr bool is_utf8 = false;
+  using offset_type = int32_t;
 
   BinaryType() : BinaryType(Type::BINARY) {}
 
   DataTypeLayout layout() const override {
-    return {{1, CHAR_BIT * sizeof(int32_t), DataTypeLayout::kVariableSizeBuffer}, false};
+    return {{1, CHAR_BIT * sizeof(offset_type), DataTypeLayout::kVariableSizeBuffer},
+            false};
   }
 
   std::string ToString() const override;
   std::string name() const override { return "binary"; }
 
  protected:
-  // Allow subclasses to change the logical type.
-  explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
+  // Allow subclasses like StringType to change the logical type.
+  explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
+};
+
+/// \brief Concrete type class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
+ public:
+  static constexpr Type::type type_id = Type::LARGE_BINARY;
+  static constexpr bool is_utf8 = false;
+  using offset_type = int64_t;
+
+  LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}
+
+  DataTypeLayout layout() const override {
+    return {{1, CHAR_BIT * sizeof(offset_type), DataTypeLayout::kVariableSizeBuffer},
+            false};
+  }
+
+  std::string ToString() const override;
+  std::string name() const override { return "large_binary"; }
+
+ protected:
+  // Allow subclasses like LargeStringType to change the logical type.
+  explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
+};
+
+/// \brief Concrete type class for variable-size string data, utf8-encoded
+class ARROW_EXPORT StringType : public BinaryType {
+ public:
+  static constexpr Type::type type_id = Type::STRING;
+  static constexpr bool is_utf8 = true;
+
+  StringType() : BinaryType(Type::STRING) {}
+
+  std::string ToString() const override;
+  std::string name() const override { return "utf8"; }
+};
+
+/// \brief Concrete type class for large variable-size string data, utf8-encoded
+class ARROW_EXPORT LargeStringType : public LargeBinaryType {
+ public:
+  static constexpr Type::type type_id = Type::LARGE_STRING;
+  static constexpr bool is_utf8 = true;
+
+  LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
+
+  std::string ToString() const override;
+  std::string name() const override { return "large_utf8"; }
 };
 
 /// \brief Concrete type class for fixed-size binary data
@@ -591,17 +653,6 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public Parametri
   int32_t byte_width_;
 };
 
-/// \brief Concrete type class for variable-size string data, utf8-encoded
-class ARROW_EXPORT StringType : public BinaryType {
- public:
-  static constexpr Type::type type_id = Type::STRING;
-
-  StringType() : BinaryType(Type::STRING) {}
-
-  std::string ToString() const override;
-  std::string name() const override { return "utf8"; }
-};
-
 /// \brief Concrete type class for struct data
 class ARROW_EXPORT StructType : public NestedType {
  public:
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index c42d661..9935af5 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -65,6 +65,11 @@ class BinaryArray;
 class BinaryBuilder;
 struct BinaryScalar;
 
+class LargeBinaryType;
+class LargeBinaryArray;
+class LargeBinaryBuilder;
+struct LargeBinaryScalar;
+
 class FixedSizeBinaryType;
 class FixedSizeBinaryArray;
 class FixedSizeBinaryBuilder;
@@ -75,6 +80,11 @@ class StringArray;
 class StringBuilder;
 struct StringScalar;
 
+class LargeStringType;
+class LargeStringArray;
+class LargeStringBuilder;
+struct LargeStringScalar;
+
 class ListType;
 class ListArray;
 class ListBuilder;
@@ -218,8 +228,12 @@ std::shared_ptr<DataType> ARROW_EXPORT float32();
 std::shared_ptr<DataType> ARROW_EXPORT float64();
 /// \brief Return a StringType instance
 std::shared_ptr<DataType> ARROW_EXPORT utf8();
+/// \brief Return a LargeStringType instance
+std::shared_ptr<DataType> ARROW_EXPORT large_utf8();
 /// \brief Return a BinaryType instance
 std::shared_ptr<DataType> ARROW_EXPORT binary();
+/// \brief Return a LargeBinaryType instance
+std::shared_ptr<DataType> ARROW_EXPORT large_binary();
 /// \brief Return a Date32Type instance
 std::shared_ptr<DataType> ARROW_EXPORT date32();
 /// \brief Return a Date64Type instance
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index c4c549f..df3e280 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -244,6 +244,15 @@ struct TypeTraits<BinaryType> {
 };
 
 template <>
+struct TypeTraits<LargeBinaryType> {
+  using ArrayType = LargeBinaryArray;
+  using BuilderType = LargeBinaryBuilder;
+  using ScalarType = LargeBinaryScalar;
+  constexpr static bool is_parameter_free = true;
+  static inline std::shared_ptr<DataType> type_singleton() { return large_binary(); }
+};
+
+template <>
 struct TypeTraits<FixedSizeBinaryType> {
   using ArrayType = FixedSizeBinaryArray;
   using BuilderType = FixedSizeBinaryBuilder;
@@ -261,6 +270,15 @@ struct TypeTraits<StringType> {
 };
 
 template <>
+struct TypeTraits<LargeStringType> {
+  using ArrayType = LargeStringArray;
+  using BuilderType = LargeStringBuilder;
+  using ScalarType = LargeStringScalar;
+  constexpr static bool is_parameter_free = true;
+  static inline std::shared_ptr<DataType> type_singleton() { return large_utf8(); }
+};
+
+template <>
 struct CTypeTraits<std::string> : public TypeTraits<StringType> {
   using ArrowType = StringType;
 };
@@ -367,6 +385,12 @@ struct is_8bit_int {
       (std::is_same<UInt8Type, T>::value || std::is_same<Int8Type, T>::value);
 };
 
+template <typename T>
+struct is_any_string_type {
+  static constexpr bool value =
+      std::is_same<StringType, T>::value || std::is_same<LargeStringType, T>::value;
+};
+
 template <typename T, typename R = void>
 using enable_if_8bit_int = typename std::enable_if<is_8bit_int<T>::value, R>::type;
 
@@ -419,10 +443,18 @@ template <typename T, typename R = void>
 using enable_if_null = typename std::enable_if<std::is_same<NullType, T>::value, R>::type;
 
 template <typename T, typename R = void>
+using enable_if_base_binary =
+    typename std::enable_if<std::is_base_of<BaseBinaryType, T>::value, R>::type;
+
+template <typename T, typename R = void>
 using enable_if_binary =
     typename std::enable_if<std::is_base_of<BinaryType, T>::value, R>::type;
 
 template <typename T, typename R = void>
+using enable_if_large_binary =
+    typename std::enable_if<std::is_base_of<LargeBinaryType, T>::value, R>::type;
+
+template <typename T, typename R = void>
 using enable_if_boolean =
     typename std::enable_if<std::is_same<BooleanType, T>::value, R>::type;
 
@@ -580,6 +612,17 @@ static inline bool is_binary_like(Type::type type_id) {
   return false;
 }
 
+static inline bool is_large_binary_like(Type::type type_id) {
+  switch (type_id) {
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_dictionary(Type::type type_id) {
   return type_id == Type::DICTIONARY;
 }
diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc
index 53b341b..2ec6c64 100644
--- a/cpp/src/arrow/visitor.cc
+++ b/cpp/src/arrow/visitor.cc
@@ -47,6 +47,8 @@ ARRAY_VISITOR_DEFAULT(FloatArray)
 ARRAY_VISITOR_DEFAULT(DoubleArray)
 ARRAY_VISITOR_DEFAULT(BinaryArray)
 ARRAY_VISITOR_DEFAULT(StringArray)
+ARRAY_VISITOR_DEFAULT(LargeBinaryArray)
+ARRAY_VISITOR_DEFAULT(LargeStringArray)
 ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray)
 ARRAY_VISITOR_DEFAULT(Date32Array)
 ARRAY_VISITOR_DEFAULT(Date64Array)
@@ -90,6 +92,8 @@ TYPE_VISITOR_DEFAULT(FloatType)
 TYPE_VISITOR_DEFAULT(DoubleType)
 TYPE_VISITOR_DEFAULT(StringType)
 TYPE_VISITOR_DEFAULT(BinaryType)
+TYPE_VISITOR_DEFAULT(LargeStringType)
+TYPE_VISITOR_DEFAULT(LargeBinaryType)
 TYPE_VISITOR_DEFAULT(FixedSizeBinaryType)
 TYPE_VISITOR_DEFAULT(Date64Type)
 TYPE_VISITOR_DEFAULT(Date32Type)
@@ -134,6 +138,8 @@ SCALAR_VISITOR_DEFAULT(FloatScalar)
 SCALAR_VISITOR_DEFAULT(DoubleScalar)
 SCALAR_VISITOR_DEFAULT(StringScalar)
 SCALAR_VISITOR_DEFAULT(BinaryScalar)
+SCALAR_VISITOR_DEFAULT(LargeStringScalar)
+SCALAR_VISITOR_DEFAULT(LargeBinaryScalar)
 SCALAR_VISITOR_DEFAULT(FixedSizeBinaryScalar)
 SCALAR_VISITOR_DEFAULT(Date64Scalar)
 SCALAR_VISITOR_DEFAULT(Date32Scalar)
diff --git a/cpp/src/arrow/visitor.h b/cpp/src/arrow/visitor.h
index a4979e9..1c854c4 100644
--- a/cpp/src/arrow/visitor.h
+++ b/cpp/src/arrow/visitor.h
@@ -43,6 +43,8 @@ class ARROW_EXPORT ArrayVisitor {
   virtual Status Visit(const DoubleArray& array);
   virtual Status Visit(const StringArray& array);
   virtual Status Visit(const BinaryArray& array);
+  virtual Status Visit(const LargeStringArray& array);
+  virtual Status Visit(const LargeBinaryArray& array);
   virtual Status Visit(const FixedSizeBinaryArray& array);
   virtual Status Visit(const Date32Array& array);
   virtual Status Visit(const Date64Array& array);
@@ -81,6 +83,8 @@ class ARROW_EXPORT TypeVisitor {
   virtual Status Visit(const DoubleType& type);
   virtual Status Visit(const StringType& type);
   virtual Status Visit(const BinaryType& type);
+  virtual Status Visit(const LargeStringType& type);
+  virtual Status Visit(const LargeBinaryType& type);
   virtual Status Visit(const FixedSizeBinaryType& type);
   virtual Status Visit(const Date64Type& type);
   virtual Status Visit(const Date32Type& type);
@@ -119,6 +123,8 @@ class ARROW_EXPORT ScalarVisitor {
   virtual Status Visit(const DoubleScalar& scalar);
   virtual Status Visit(const StringScalar& scalar);
   virtual Status Visit(const BinaryScalar& scalar);
+  virtual Status Visit(const LargeStringScalar& scalar);
+  virtual Status Visit(const LargeBinaryScalar& scalar);
   virtual Status Visit(const FixedSizeBinaryScalar& scalar);
   virtual Status Visit(const Date64Scalar& scalar);
   virtual Status Visit(const Date32Scalar& scalar);
diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h
index 544763a..3ed058e 100644
--- a/cpp/src/arrow/visitor_inline.h
+++ b/cpp/src/arrow/visitor_inline.h
@@ -47,6 +47,8 @@ namespace arrow {
   ACTION(Double);                            \
   ACTION(String);                            \
   ACTION(Binary);                            \
+  ACTION(LargeString);                       \
+  ACTION(LargeBinary);                       \
   ACTION(FixedSizeBinary);                   \
   ACTION(Duration);                          \
   ACTION(Date32);                            \
@@ -186,12 +188,13 @@ struct ArrayDataVisitor<T, enable_if_has_c_type<T>> {
 };
 
 template <typename T>
-struct ArrayDataVisitor<T, enable_if_binary<T>> {
+struct ArrayDataVisitor<T, enable_if_base_binary<T>> {
   template <typename Visitor>
   static Status Visit(const ArrayData& arr, Visitor* visitor) {
+    using offset_type = typename T::offset_type;
     constexpr uint8_t empty_value = 0;
 
-    const int32_t* offsets = arr.GetValues<int32_t>(1);
+    const offset_type* offsets = arr.GetValues<offset_type>(1);
     const uint8_t* data;
     if (!arr.buffers[2]) {
       data = &empty_value;
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 3612792..06bcf6e 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -107,9 +107,20 @@ table FloatingPoint {
 table Utf8 {
 }
 
+/// Opaque binary data
 table Binary {
 }
 
+/// Same as Utf8, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+table LargeUtf8 {
+}
+
+/// Same as Binary, but with 64-bit offsets, allowing to represent
+/// extremely large data values.
+table LargeBinary {
+}
+
 table FixedSizeBinary {
   /// Number of bytes per value
   byteWidth: int;
@@ -235,6 +246,8 @@ union Type {
   FixedSizeList,
   Map,
   Duration,
+  LargeBinary,
+  LargeUtf8,
 }
 
 /// ----------------------------------------------------------------------