You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bk...@apache.org on 2022/11/30 14:50:45 UTC

[arrow] 01/15: Draft basic scaffolding for Binary/StringView types and get compiling

This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch feature/format-string-view
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 8e1c1442feebe9af2db607e50abd4b9bb900e3fb
Author: Wes McKinney <we...@apache.org>
AuthorDate: Fri Sep 9 16:35:27 2022 -0500

    Draft basic scaffolding for Binary/StringView types and get compiling
---
 LICENSE.txt                                        |  16 +-
 cpp/src/arrow/array/array_base.cc                  |   4 +
 cpp/src/arrow/array/array_binary.cc                |  12 +
 cpp/src/arrow/array/array_binary.h                 |  58 +++++
 cpp/src/arrow/array/builder_binary.cc              |  86 +++++++
 cpp/src/arrow/array/builder_binary.h               | 248 +++++++++++++++++++++
 cpp/src/arrow/array/builder_dict.cc                |   6 +
 cpp/src/arrow/array/builder_dict.h                 |  10 +
 cpp/src/arrow/array/concatenate.cc                 |   4 +
 cpp/src/arrow/array/util.cc                        |  13 ++
 cpp/src/arrow/array/validate.cc                    |  20 +-
 cpp/src/arrow/compare.cc                           |  13 +-
 cpp/src/arrow/ipc/feather.cc                       |   4 +-
 cpp/src/arrow/ipc/metadata_internal.cc             |  10 +
 cpp/src/arrow/ipc/reader.cc                        |   5 +
 cpp/src/arrow/ipc/writer.cc                        |   4 +
 cpp/src/arrow/json/test_common.h                   |  10 +-
 cpp/src/arrow/scalar.cc                            |  14 ++
 cpp/src/arrow/scalar.h                             |  29 +++
 cpp/src/arrow/testing/json_internal.cc             |  10 +-
 cpp/src/arrow/type.cc                              |  16 +-
 cpp/src/arrow/type.h                               |  46 ++++
 cpp/src/arrow/type_fwd.h                           |  21 ++
 cpp/src/arrow/type_test.cc                         |  12 +
 cpp/src/arrow/type_traits.h                        |  57 ++++-
 cpp/src/arrow/util/string_header.h                 | 219 ++++++++++++++++++
 cpp/src/arrow/visitor.cc                           |   8 +-
 cpp/src/arrow/visitor.h                            |   6 +
 cpp/src/arrow/visitor_generate.h                   |   2 +
 cpp/src/parquet/column_writer.cc                   |   1 +
 python/pyarrow/src/arrow/python/arrow_to_pandas.cc |  38 +---
 python/pyarrow/src/arrow/python/python_to_arrow.cc |  23 +-
 32 files changed, 974 insertions(+), 51 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index 86cfaf546c..d282bfe7b3 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1965,7 +1965,7 @@ This project includes code from the autobrew project.
 The following files are based on code from the autobrew project:
 * r/tools/autobrew
 * dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb
-* dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb 
+* dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb
 
 Copyright (c) 2019, Jeroen Ooms
 License: MIT
@@ -2047,6 +2047,20 @@ License: http://www.apache.org/licenses/LICENSE-2.0
 
 --------------------------------------------------------------------------------
 
+This project includes code from Velox.
+
+ * cpp/src/arrow/util/bytes_header.h
+
+is based on Velox's
+
+ * velox/type/StringView.h
+
+Copyright: Copyright (c) Facebook, Inc. and its affiliates.
+Home page: https://github.com/facebookincubator/velox
+License: http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
 The file cpp/src/arrow/vendored/musl/strptime.c has the following license
 
 Copyright © 2005-2020 Rich Felker, et al.
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index 5d27b2aedf..de9ab2e985 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -82,6 +82,10 @@ struct ScalarFromArraySlotImpl {
     return Finish(a.GetString(index_));
   }
 
+  Status Visit(const BinaryViewArray& a) {
+    return Status::NotImplemented("ScalarFromArraySlot -> BinaryView");
+  }
+
   Status Visit(const FixedSizeBinaryArray& a) { return Finish(a.GetString(index_)); }
 
   Status Visit(const DayTimeIntervalArray& a) { return Finish(a.Value(index_)); }
diff --git a/cpp/src/arrow/array/array_binary.cc b/cpp/src/arrow/array/array_binary.cc
index 9466b5a48f..cfc467160a 100644
--- a/cpp/src/arrow/array/array_binary.cc
+++ b/cpp/src/arrow/array/array_binary.cc
@@ -89,6 +89,18 @@ LargeStringArray::LargeStringArray(int64_t length,
 
 Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }
 
+BinaryViewArray::BinaryViewArray(const std::shared_ptr<ArrayData>& data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::BINARY_VIEW);
+  SetData(data);
+}
+
+StringViewArray::StringViewArray(const std::shared_ptr<ArrayData>& data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::STRING_VIEW);
+  SetData(data);
+}
+
+Status StringViewArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }
+
 FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
   SetData(data);
 }
diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h
index 7e58a96ff8..03ee77fab8 100644
--- a/cpp/src/arrow/array/array_binary.h
+++ b/cpp/src/arrow/array/array_binary.h
@@ -22,6 +22,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
@@ -217,6 +218,63 @@ class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
   Status ValidateUTF8() const;
 };
 
+// ----------------------------------------------------------------------
+// BinaryView and StringView
+
+/// Concrete Array class for variable-size binary view data using the
+/// StringHeader struct to reference in-line or out-of-line string values
+class ARROW_EXPORT BinaryViewArray : public PrimitiveArray {
+ public:
+  using TypeClass = BinaryViewType;
+  using IteratorType = stl::ArrayIterator<BinaryViewArray>;
+
+  explicit BinaryViewArray(const std::shared_ptr<ArrayData>& data);
+
+  BinaryViewArray(int64_t length, const std::shared_ptr<Buffer>& data,
+                  const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                  int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : PrimitiveArray(binary_view(), length, data, null_bitmap, null_count, offset) {}
+
+  const StringHeader* raw_values() const {
+    return reinterpret_cast<const StringHeader*>(raw_values_) + data_->offset;
+  }
+
+  StringHeader Value(int64_t i) const { return raw_values()[i]; }
+
+  // For API compatibility with BinaryArray etc.
+  std::string_view GetView(int64_t i) const { return std::string_view(Value(i)); }
+
+  // EXPERIMENTAL
+  std::optional<std::string_view> operator[](int64_t i) const {
+    return *IteratorType(*this, i);
+  }
+
+  IteratorType begin() const { return IteratorType(*this); }
+  IteratorType end() const { return IteratorType(*this, length()); }
+
+ protected:
+  using PrimitiveArray::PrimitiveArray;
+};
+
+/// Concrete Array class for variable-size string view (utf-8) data using
+/// StringHeader to reference in-line or out-of-line string values
+class ARROW_EXPORT StringViewArray : public BinaryViewArray {
+ public:
+  using TypeClass = StringViewType;
+
+  explicit StringViewArray(const std::shared_ptr<ArrayData>& data);
+
+  StringViewArray(int64_t length, const std::shared_ptr<Buffer>& data,
+                  const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                  int64_t null_count = kUnknownNullCount, int64_t offset = 0)
+      : BinaryViewArray(utf8_view(), length, data, null_bitmap, null_count, offset) {}
+
+  /// \brief Validate that this array contains only valid UTF8 entries
+  ///
+  /// This check is also implied by ValidateFull()
+  Status ValidateUTF8() const;
+};
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 571f450aab..e0a7bc1193 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -40,6 +40,92 @@ namespace arrow {
 
 using internal::checked_cast;
 
+// ----------------------------------------------------------------------
+// Binary/StringView
+
+Status BinaryViewBuilder::AppendValues(const std::vector<std::string>& values,
+                                       const uint8_t* valid_bytes) {
+  // We only need to allocate memory for the out-of-line strings
+  std::size_t out_of_line_total = std::accumulate(
+      values.begin(), values.end(), 0ULL, [](uint64_t sum, const std::string& str) {
+        size_t length = str.size();
+        return sum + (length > StringHeader::kInlineSize ? length : 0);
+      });
+  RETURN_NOT_OK(Reserve(values.size()));
+  RETURN_NOT_OK(ReserveData(out_of_line_total));
+
+  if (valid_bytes != nullptr) {
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      if (valid_bytes[i]) {
+        UnsafeAppend(values[i]);
+      } else {
+        UnsafeAppendNull();
+      }
+    }
+  } else {
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      UnsafeAppend(values[i]);
+    }
+  }
+  UnsafeAppendToBitmap(valid_bytes, values.size());
+  return Status::OK();
+}
+
+Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offset,
+                                           int64_t length) {
+  auto bitmap = array.GetValues<uint8_t>(0, 0);
+  auto values = array.GetValues<StringHeader>(1) + offset;
+
+  int64_t out_of_line_total = 0;
+  for (int64_t i = 0; i < length; i++) {
+    if (!values[i].IsInline()) {
+      out_of_line_total += static_cast<int64_t>(values[i].size());
+    }
+  }
+  RETURN_NOT_OK(Reserve(length));
+  RETURN_NOT_OK(ReserveData(out_of_line_total));
+  for (int64_t i = 0; i < length; i++) {
+    if (!bitmap || bit_util::GetBit(bitmap, array.offset + offset + i)) {
+      if (values[i].IsInline()) {
+        UnsafeAppend(values[i]);
+      } else {
+        UnsafeAppend(values[i].data(), values[i].size());
+      }
+    } else {
+      UnsafeAppendNull();
+    }
+  }
+  return Status::OK();
+}
+
+Status BinaryViewBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_));
+  ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
+  BufferVector buffers = {null_bitmap, data};
+  for (auto&& buffer : data_heap_builder_.Finish()) {
+    buffers.push_back(std::move(buffer));
+  }
+  *out = ArrayData::Make(type(), length_, std::move(buffers), null_count_);
+  capacity_ = length_ = null_count_ = 0;
+  Reset();
+  return Status::OK();
+}
+
+Status BinaryViewBuilder::ReserveData(int64_t length) {
+  if (ARROW_PREDICT_FALSE(length > ValueSizeLimit())) {
+    return Status::CapacityError(
+        "BinaryView or StringView elements cannot reference "
+        "strings larger than 4GB");
+  }
+  return data_heap_builder_.Reserve(length);
+}
+
+void BinaryViewBuilder::Reset() {
+  ArrayBuilder::Reset();
+  data_builder_.Reset();
+  data_heap_builder_.Reset();
+}
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 25183ca169..c716e6d225 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -459,6 +459,254 @@ class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
   std::shared_ptr<DataType> type() const override { return large_utf8(); }
 };
 
+// ----------------------------------------------------------------------
+// BinaryViewBuilder, StringViewBuilder
+//
+// The builders permit two styles of use: one where appended data is
+// accumulated in a third buffer that is appended to the resulting ArrayData,
+// and one where only the StringHeaders are appended. If you only want to
+// append StringHeaders, then use the Append(const StringHeader&) methods
+
+namespace internal {
+
+// Because we construct StringHeader objects incrementally, resizing buffers is
+// not an option as memory addresses for out-of-line strings will change. Thus,
+// we allocate medium-sized memory chunks and accumulate data in those, which
+// may result in some waste if there are many large-ish strings. If a string
+// comes along that does not fit into a block, we allocate a new block and
+// write into that.
+//
+// Later we can implement optimizations to continuing filling underfull blocks
+// after encountering a large string that required allocating a new block.
+class ARROW_EXPORT StringHeapBuilder {
+ public:
+  static constexpr int64_t kDefaultBlocksize = 1 << 20;  // 1MB
+
+  StringHeapBuilder(MemoryPool* pool, int64_t blocksize = kDefaultBlocksize)
+      : pool_(pool), blocksize_(blocksize) {}
+
+  const uint8_t* UnsafeAppend(const uint8_t* data, int64_t num_bytes) {
+    memcpy(current_out_buffer_, data, static_cast<size_t>(num_bytes));
+    const uint8_t* result = current_out_buffer_;
+    current_out_buffer_ += num_bytes;
+    current_remaining_bytes_ -= num_bytes;
+    return result;
+  }
+
+  Result<const uint8_t*> Append(const uint8_t* data, int64_t num_bytes) {
+    if (num_bytes > current_remaining_bytes_) {
+      ARROW_RETURN_NOT_OK(Reserve(num_bytes));
+    }
+    return UnsafeAppend(data, num_bytes);
+  }
+
+  /// \brief Ensure that the indicated number of bytes can be appended via
+  /// UnsafeAppend operations without the need to allocate more memory
+  Status Reserve(int64_t num_bytes) {
+    if (num_bytes > current_remaining_bytes_) {
+      current_remaining_bytes_ =
+          num_bytes > kDefaultBlocksize ? num_bytes : kDefaultBlocksize;
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> new_block,
+                            AllocateBuffer(current_remaining_bytes_, pool_));
+      current_out_buffer_ = new_block->mutable_data();
+      blocks_.emplace_back(std::move(new_block));
+    }
+    return Status::OK();
+  }
+
+  void Reset() {
+    current_out_buffer_ = nullptr;
+    current_remaining_bytes_ = 0;
+    blocks_.clear();
+  }
+
+  int64_t current_remaining_bytes() const { return current_remaining_bytes_; }
+
+  std::vector<std::shared_ptr<Buffer>> Finish() {
+    current_out_buffer_ = nullptr;
+    current_remaining_bytes_ = 0;
+    return std::move(blocks_);
+  }
+
+ private:
+  MemoryPool* pool_;
+  const int64_t blocksize_;
+  std::vector<std::shared_ptr<Buffer>> blocks_;
+
+  uint8_t* current_out_buffer_ = nullptr;
+  int64_t current_remaining_bytes_ = 0;
+};
+
+}  // namespace internal
+
+class ARROW_EXPORT BinaryViewBuilder : public ArrayBuilder {
+ public:
+  using TypeClass = BinaryViewType;
+
+  BinaryViewBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : BinaryViewBuilder(pool) {}
+
+  int64_t current_block_bytes_remaining() const {
+    return data_heap_builder_.current_remaining_bytes();
+  }
+
+  Status Append(const uint8_t* value, int64_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    if (length > static_cast<int64_t>(StringHeader::kInlineSize)) {
+      // String is stored out-of-line
+      if (ARROW_PREDICT_FALSE(length > ValueSizeLimit())) {
+        return Status::CapacityError(
+            "BinaryView or StringView elements cannot reference "
+            "strings larger than 4GB");
+      }
+      // Overwrite 'value' since we will use that for the StringHeader value below
+      ARROW_ASSIGN_OR_RAISE(value, data_heap_builder_.Append(value, length));
+    }
+    UnsafeAppend(StringHeader(value, length));
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  Status Append(const char* value, int64_t length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(std::string_view value) {
+    return Append(value.data(), static_cast<int64_t>(value.size()));
+  }
+
+  Status Append(StringHeader value) {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    UnsafeAppend(value);
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  /// \brief Append without checking capacity
+  ///
+  /// Builder should have been presized using Reserve() and ReserveData(),
+  /// respectively, and the value must not be larger than 4GB
+  void UnsafeAppend(const uint8_t* value, int64_t length) {
+    if (length > static_cast<int64_t>(StringHeader::kInlineSize)) {
+      // String is stored out-of-line
+      // Overwrite 'value' since we will use that for the StringHeader value below
+      value = data_heap_builder_.UnsafeAppend(value, length);
+    }
+    UnsafeAppend(StringHeader(value, length));
+    UnsafeAppendToBitmap(true);
+  }
+
+  void UnsafeAppend(const char* value, int64_t length) {
+    UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  void UnsafeAppend(const std::string& value) {
+    UnsafeAppend(value.c_str(), static_cast<int64_t>(value.size()));
+  }
+
+  void UnsafeAppend(std::string_view value) {
+    UnsafeAppend(value.data(), static_cast<int64_t>(value.size()));
+  }
+
+  void UnsafeAppend(StringHeader value) {
+    data_builder_.UnsafeAppend(value);
+    UnsafeAppendToBitmap(true);
+  }
+
+  /// \brief Ensures there is enough allocated available capacity in the
+  /// out-of-line data heap to append the indicated number of bytes without
+  /// additional allocations
+  Status ReserveData(int64_t length);
+
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, StringHeader());  // zero
+    UnsafeSetNull(length);
+    return Status::OK();
+  }
+
+  /// \brief Append a single null element
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(StringHeader());  // zero
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  /// \brief Append a empty element (length-0 inline string)
+  Status AppendEmptyValue() final {
+    ARROW_RETURN_NOT_OK(Reserve(1));
+    data_builder_.UnsafeAppend(StringHeader(""));  // zero
+    UnsafeAppendToBitmap(true);
+    return Status::OK();
+  }
+
+  /// \brief Append several empty elements
+  Status AppendEmptyValues(int64_t length) final {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(length, StringHeader(""));
+    UnsafeSetNotNull(length);
+    return Status::OK();
+  }
+
+  void UnsafeAppendNull() {
+    data_builder_.UnsafeAppend(StringHeader());
+    UnsafeAppendToBitmap(false);
+  }
+
+  void UnsafeAppendEmptyValue() {
+    data_builder_.UnsafeAppend(StringHeader(""));
+    UnsafeAppendToBitmap(true);
+  }
+
+  /// \brief Append a sequence of strings in one shot.
+  ///
+  /// \param[in] values a vector of strings
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const std::vector<std::string>& values,
+                      const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a slice of a BinaryViewArray passed as an ArraySpan. Copies
+  /// the underlying out-of-line string memory to avoid memory lifetime issues
+  Status AppendArraySlice(const ArraySpan& array, int64_t offset,
+                          int64_t length) override;
+
+  void Reset() override;
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+    ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
+    return ArrayBuilder::Resize(capacity);
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  std::shared_ptr<DataType> type() const override { return binary_view(); }
+
+ protected:
+  explicit BinaryViewBuilder(MemoryPool* pool = default_memory_pool())
+      : ArrayBuilder(pool), data_builder_(pool), data_heap_builder_(pool) {}
+
+  static constexpr int64_t ValueSizeLimit() {
+    return std::numeric_limits<uint32_t>::max();
+  }
+
+  TypedBufferBuilder<StringHeader> data_builder_;
+
+  // Accumulates out-of-line data in fixed-size chunks which are then attached
+  // to the resulting ArrayData
+  internal::StringHeapBuilder data_heap_builder_;
+};
+
+class ARROW_EXPORT StringViewBuilder : public BinaryViewBuilder {
+ public:
+  using BinaryViewBuilder::BinaryViewBuilder;
+  std::shared_ptr<DataType> type() const override { return utf8_view(); }
+};
+
 // ----------------------------------------------------------------------
 // FixedSizeBinaryBuilder
 
diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc
index 061fb60041..c99a6facee 100644
--- a/cpp/src/arrow/array/builder_dict.cc
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -193,6 +193,12 @@ Status DictionaryMemoTable::GetOrInsert(const BinaryType*, std::string_view valu
   return impl_->GetOrInsert<BinaryType>(value, out);
 }
 
+Status DictionaryMemoTable::GetOrInsert(const BinaryViewType*, std::string_view value,
+                                        int32_t* out) {
+  // Create BinaryArray dictionary for now
+  return impl_->GetOrInsert<BinaryType>(value, out);
+}
+
 Status DictionaryMemoTable::GetOrInsert(const LargeBinaryType*, std::string_view value,
                                         int32_t* out) {
   return impl_->GetOrInsert<LargeBinaryType>(value, out);
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
index cb0aaf3099..0cc82930a1 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -60,6 +60,12 @@ struct DictionaryValue<T, enable_if_base_binary<T>> {
                                 BinaryType, LargeBinaryType>::type;
 };
 
+template <typename T>
+struct DictionaryValue<T, enable_if_binary_view_like<T>> {
+  using type = std::string_view;
+  using PhysicalType = BinaryViewType;
+};
+
 template <typename T>
 struct DictionaryValue<T, enable_if_fixed_size_binary<T>> {
   using type = std::string_view;
@@ -115,6 +121,10 @@ class ARROW_EXPORT DictionaryMemoTable {
   Status GetOrInsert(const BinaryType*, std::string_view value, int32_t* out);
   Status GetOrInsert(const LargeBinaryType*, std::string_view value, int32_t* out);
 
+  // TODO: Consider working StringHeader throughout the hashing machinery to
+  // benefit from faster comparisons, reduced need to allocate memory
+  Status GetOrInsert(const BinaryViewType*, std::string_view value, int32_t* out);
+
   class DictionaryMemoTableImpl;
   std::unique_ptr<DictionaryMemoTableImpl> impl_;
 };
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index aab734284f..3dd0ccea93 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -227,6 +227,10 @@ class ConcatenateImpl {
     return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
   }
 
+  Status Visit(const BinaryViewType&) {
+    return Status::NotImplemented("binary / string view");
+  }
+
   Status Visit(const ListType&) {
     std::vector<Range> value_ranges;
     ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index c0cdcab730..ac9d76d469 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -264,6 +264,14 @@ class ArrayDataEndianSwapper {
     return Status::OK();
   }
 
+  template <typename T>
+  enable_if_t<std::is_same<BinaryViewType, T>::value ||
+                  std::is_same<StringViewType, T>::value,
+              Status>
+  Visit(const T& type) {
+    return Status::NotImplemented("Binary / string view");
+  }
+
   Status Visit(const ListType& type) {
     RETURN_NOT_OK(SwapOffsets<int32_t>(1));
     return Status::OK();
@@ -596,6 +604,11 @@ class RepeatedArrayFactory {
     return Status::OK();
   }
 
+  template <typename T>
+  enable_if_binary_view_like<T, Status> Visit(const T&) {
+    return Status::NotImplemented("binary / string view");
+  }
+
   template <typename T>
   enable_if_var_size_list<T, Status> Visit(const T& type) {
     using ScalarType = typename TypeTraits<T>::ScalarType;
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 56470ac74b..cddb086005 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -47,6 +47,19 @@ struct UTF8DataValidator {
     return Status::NotImplemented("");
   }
 
+  Status Visit(const StringViewType&) {
+    util::InitializeUTF8();
+
+    const auto* values = data.GetValues<StringHeader>(1);
+    for (int64_t i = 0; i < data.length; ++i) {
+      if (ARROW_PREDICT_FALSE(!util::ValidateUTF8(
+              reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()))) {
+        return Status::Invalid("Invalid UTF8 sequence at string index ", i);
+      }
+    }
+    return Status::OK();
+  }
+
   template <typename StringType>
   enable_if_string<StringType, Status> Visit(const StringType&) {
     util::InitializeUTF8();
@@ -247,6 +260,10 @@ struct ValidateArrayImpl {
 
   Status Visit(const LargeBinaryType& type) { return ValidateBinaryLike(type); }
 
+  Status Visit(const BinaryViewType& type) {
+    return Status::NotImplemented("binary / string view");
+  }
+
   Status Visit(const ListType& type) { return ValidateListLike(type); }
 
   Status Visit(const LargeListType& type) { return ValidateListLike(type); }
@@ -716,7 +733,8 @@ Status ValidateArrayFull(const Array& array) { return ValidateArrayFull(*array.d
 
 ARROW_EXPORT
 Status ValidateUTF8(const ArrayData& data) {
-  DCHECK(data.type->id() == Type::STRING || data.type->id() == Type::LARGE_STRING);
+  DCHECK(data.type->id() == Type::STRING || data.type->id() == Type::STRING_VIEW ||
+         data.type->id() == Type::LARGE_STRING);
   UTF8DataValidator validator{data};
   return VisitTypeInline(*data.type, &validator);
 }
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index baadd10cca..8ccc645046 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -259,6 +259,11 @@ class RangeDataEqualsImpl {
   // Also matches StringType
   Status Visit(const BinaryType& type) { return CompareBinary(type); }
 
+  // Also matches StringViewType
+  Status Visit(const BinaryViewType& type) {
+    return Status::NotImplemented("Binary / string view");
+  }
+
   // Also matches LargeStringType
   Status Visit(const LargeBinaryType& type) { return CompareBinary(type); }
 
@@ -577,7 +582,7 @@ class TypeEqualsVisitor {
 
   template <typename T>
   enable_if_t<is_null_type<T>::value || is_primitive_ctype<T>::value ||
-                  is_base_binary_type<T>::value,
+                  is_base_binary_type<T>::value || is_binary_view_like_type<T>::value,
               Status>
   Visit(const T&) {
     result_ = true;
@@ -729,6 +734,12 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
+  Status Visit(const BinaryViewScalar& left) {
+    const auto& right = checked_cast<const BinaryViewScalar&>(right_);
+    result_ = left.value == right.value;
+    return Status::OK();
+  }
+
   Status Visit(const Decimal128Scalar& left) {
     const auto& right = checked_cast<const Decimal128Scalar&>(right_);
     result_ = left.value == right.value;
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index b6d3a3d7d8..1ef076fac4 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -536,8 +536,8 @@ struct ArrayWriterV1 {
       is_nested_type<T>::value || is_null_type<T>::value || is_decimal_type<T>::value ||
           std::is_same<DictionaryType, T>::value || is_duration_type<T>::value ||
           is_interval_type<T>::value || is_fixed_size_binary_type<T>::value ||
-          std::is_same<Date64Type, T>::value || std::is_same<Time64Type, T>::value ||
-          std::is_same<ExtensionType, T>::value,
+          is_binary_view_like_type<T>::value || std::is_same<Date64Type, T>::value ||
+          std::is_same<Time64Type, T>::value || std::is_same<ExtensionType, T>::value,
       Status>::type
   Visit(const T& type) {
     return Status::NotImplemented(type.ToString());
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc
index 2e450b9d46..367b31d5dd 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -523,6 +523,16 @@ class FieldToFlatbufferVisitor {
     return Status::OK();
   }
 
+  Status Visit(const BinaryViewType& type) {
+    // BinaryView will be written to IPC as a normal binary array
+    return Visit(BinaryType());
+  }
+
+  Status Visit(const StringViewType& type) {
+    // StringView will be written to IPC as a normal UTF8 string array
+    return Visit(StringType());
+  }
+
   Status Visit(const LargeBinaryType& type) {
     fb_type_ = flatbuf::Type::LargeBinary;
     type_offset_ = flatbuf::CreateLargeBinary(fbb_).Union();
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index a1b17afaaf..843d5917b3 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -348,6 +348,11 @@ class ArrayLoader {
     return LoadBinary<T>(type.id());
   }
 
+  Status Visit(const BinaryViewType& type) {
+    DCHECK(false);
+    return Status::NotImplemented("Reading IPC format to binary view is not supported");
+  }
+
   Status Visit(const FixedSizeBinaryType& type) {
     out_->buffers.resize(2);
     RETURN_NOT_OK(LoadCommon(type.id()));
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index b89604e6fe..d68da651f3 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -388,6 +388,10 @@ class RecordBatchSerializer {
     return Status::OK();
   }
 
+  Status Visit(const BinaryViewArray& array) {
+    return Status::NotImplemented("Binary / string view type");
+  }
+
   Status Visit(const FixedSizeListArray& array) {
     --max_recursion_depth_;
     auto size = array.list_type()->list_size();
diff --git a/cpp/src/arrow/json/test_common.h b/cpp/src/arrow/json/test_common.h
index c01036047c..86a03c82ab 100644
--- a/cpp/src/arrow/json/test_common.h
+++ b/cpp/src/arrow/json/test_common.h
@@ -110,8 +110,7 @@ struct GenerateImpl {
     return OK(writer.Double(val));
   }
 
-  template <typename T>
-  enable_if_base_binary<T, Status> Visit(const T&) {
+  Status GenerateAscii(const DataType&) {
     auto size = std::poisson_distribution<>{4}(e);
     std::uniform_int_distribution<uint16_t> gen_char(32, 126);  // FIXME generate UTF8
     std::string s(size, '\0');
@@ -119,6 +118,13 @@ struct GenerateImpl {
     return OK(writer.String(s.c_str()));
   }
 
+  template <typename T>
+  enable_if_base_binary<T, Status> Visit(const T& t) {
+    return GenerateAscii(t);
+  }
+
+  Status Visit(const BinaryViewType& t) { return GenerateAscii(t); }
+
   template <typename T>
   enable_if_list_like<T, Status> Visit(const T& t) {
     auto size = std::poisson_distribution<>{4}(e);
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 0ca08d7a82..d139845bd7 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -70,6 +70,12 @@ struct ScalarHashImpl {
 
   Status Visit(const BaseBinaryScalar& s) { return BufferHash(*s.value); }
 
+  Status Visit(const BinaryViewScalar& s) {
+    const StringHeader& v = s.value;
+    hash_ ^= internal::ComputeStringHash<1>(v.data(), v.size());
+    return Status::OK();
+  }
+
   template <typename T>
   Status Visit(const TemporalScalar<T>& s) {
     return ValueHash(s);
@@ -226,6 +232,14 @@ struct ScalarValidateImpl {
 
   Status Visit(const StringScalar& s) { return ValidateStringScalar(s); }
 
+  Status Visit(const BinaryViewScalar& s) {
+    return Status::NotImplemented("Binary view");
+  }
+
+  Status Visit(const StringViewScalar& s) {
+    return Status::NotImplemented("String view");
+  }
+
   Status Visit(const LargeStringScalar& s) { return ValidateStringScalar(s); }
 
   template <typename ScalarType>
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index cf852dff36..9b7f604132 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -37,6 +37,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/compare.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/string_header.h"
 #include "arrow/util/visibility.h"
 #include "arrow/visit_type_inline.h"
 
@@ -282,6 +283,34 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar {
   StringScalar() : StringScalar(utf8()) {}
 };
 
+struct ARROW_EXPORT BinaryViewScalar : public internal::PrimitiveScalarBase {
+  using internal::PrimitiveScalarBase::PrimitiveScalarBase;
+  using TypeClass = BinaryViewType;
+
+  explicit BinaryViewScalar(StringHeader value, std::shared_ptr<DataType> type)
+      : internal::PrimitiveScalarBase(std::move(type), true), value(value) {}
+
+  explicit BinaryViewScalar(StringHeader value)
+      : BinaryViewScalar(value, binary_view()) {}
+
+  BinaryViewScalar() : internal::PrimitiveScalarBase(binary_view(), false) {}
+
+  void* mutable_data() override { return reinterpret_cast<void*>(&this->value); }
+
+  std::string_view view() const override { return std::string_view(this->value); }
+
+  StringHeader value;
+};
+
+struct ARROW_EXPORT StringViewScalar : public BinaryViewScalar {
+  using TypeClass = StringViewType;
+
+  explicit StringViewScalar(StringHeader value)
+      : BinaryViewScalar(std::move(value), utf8_view()) {}
+
+  StringViewScalar() : BinaryViewScalar(utf8_view()) {}
+};
+
 struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar {
   using BaseBinaryScalar::BaseBinaryScalar;
   using TypeClass = LargeBinaryType;
diff --git a/cpp/src/arrow/testing/json_internal.cc b/cpp/src/arrow/testing/json_internal.cc
index c1d45aa2e0..a296e0fba7 100644
--- a/cpp/src/arrow/testing/json_internal.cc
+++ b/cpp/src/arrow/testing/json_internal.cc
@@ -227,8 +227,8 @@ class SchemaWriter {
 
   template <typename T>
   enable_if_t<is_null_type<T>::value || is_primitive_ctype<T>::value ||
-              is_base_binary_type<T>::value || is_base_list_type<T>::value ||
-              is_struct_type<T>::value>
+              is_base_binary_type<T>::value || is_binary_view_like_type<T>::value ||
+              is_base_list_type<T>::value || is_struct_type<T>::value>
   WriteTypeMetadata(const T& type) {}
 
   void WriteTypeMetadata(const MapType& type) {
@@ -386,6 +386,8 @@ class SchemaWriter {
   Status Visit(const TimeType& type) { return WritePrimitive("time", type); }
   Status Visit(const StringType& type) { return WriteVarBytes("utf8", type); }
   Status Visit(const BinaryType& type) { return WriteVarBytes("binary", type); }
+  Status Visit(const StringViewType& type) { return WritePrimitive("utf8_view", type); }
+  Status Visit(const BinaryViewType& type) { return WritePrimitive("binary_view", type); }
   Status Visit(const LargeStringType& type) { return WriteVarBytes("largeutf8", type); }
   Status Visit(const LargeBinaryType& type) { return WriteVarBytes("largebinary", type); }
   Status Visit(const FixedSizeBinaryType& type) {
@@ -1320,6 +1322,10 @@ class ArrayReader {
     return FinishBuilder(&builder);
   }
 
+  Status Visit(const BinaryViewType& type) {
+    return Status::NotImplemented("Binary / string view");
+  }
+
   Status Visit(const DayTimeIntervalType& type) {
     DayTimeIntervalBuilder builder(pool_);
 
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index ea9525404c..b976260ccd 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -59,10 +59,14 @@ constexpr Type::type FixedSizeListType::type_id;
 
 constexpr Type::type BinaryType::type_id;
 
+constexpr Type::type BinaryViewType::type_id;
+
 constexpr Type::type LargeBinaryType::type_id;
 
 constexpr Type::type StringType::type_id;
 
+constexpr Type::type StringViewType::type_id;
+
 constexpr Type::type LargeStringType::type_id;
 
 constexpr Type::type FixedSizeBinaryType::type_id;
@@ -188,7 +192,9 @@ std::string ToString(Type::type id) {
     TO_STRING_CASE(INTERVAL_MONTHS)
     TO_STRING_CASE(DURATION)
     TO_STRING_CASE(STRING)
+    TO_STRING_CASE(STRING_VIEW)
     TO_STRING_CASE(BINARY)
+    TO_STRING_CASE(BINARY_VIEW)
     TO_STRING_CASE(LARGE_STRING)
     TO_STRING_CASE(LARGE_BINARY)
     TO_STRING_CASE(FIXED_SIZE_BINARY)
@@ -564,10 +570,14 @@ std::string FixedSizeListType::ToString() const {
 
 std::string BinaryType::ToString() const { return "binary"; }
 
+std::string BinaryViewType::ToString() const { return "binary_view"; }
+
 std::string LargeBinaryType::ToString() const { return "large_binary"; }
 
 std::string StringType::ToString() const { return "string"; }
 
+std::string StringViewType::ToString() const { return "string_view"; }
+
 std::string LargeStringType::ToString() const { return "large_string"; }
 
 int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
@@ -2114,8 +2124,10 @@ PARAMETER_LESS_FINGERPRINT(HalfFloat)
 PARAMETER_LESS_FINGERPRINT(Float)
 PARAMETER_LESS_FINGERPRINT(Double)
 PARAMETER_LESS_FINGERPRINT(Binary)
+PARAMETER_LESS_FINGERPRINT(BinaryView)
 PARAMETER_LESS_FINGERPRINT(LargeBinary)
 PARAMETER_LESS_FINGERPRINT(String)
+PARAMETER_LESS_FINGERPRINT(StringView)
 PARAMETER_LESS_FINGERPRINT(LargeString)
 PARAMETER_LESS_FINGERPRINT(Date32)
 PARAMETER_LESS_FINGERPRINT(Date64)
@@ -2283,8 +2295,10 @@ TYPE_FACTORY(float16, HalfFloatType)
 TYPE_FACTORY(float32, FloatType)
 TYPE_FACTORY(float64, DoubleType)
 TYPE_FACTORY(utf8, StringType)
+TYPE_FACTORY(utf8_view, StringViewType)
 TYPE_FACTORY(large_utf8, LargeStringType)
 TYPE_FACTORY(binary, BinaryType)
+TYPE_FACTORY(binary_view, BinaryViewType)
 TYPE_FACTORY(large_binary, LargeBinaryType)
 TYPE_FACTORY(date64, Date64Type)
 TYPE_FACTORY(date32, Date32Type)
@@ -2532,7 +2546,7 @@ void InitStaticData() {
   // * Time32
   // * Time64
   // * Timestamp
-  g_primitive_types = {null(), boolean(), date32(), date64()};
+  g_primitive_types = {null(), boolean(), date32(), date64(), binary_view(), utf8_view()};
   Extend(g_numeric_types, &g_primitive_types);
   Extend(g_base_binary_types, &g_primitive_types);
 }
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 415aaacf1c..f4e082b3f6 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -33,6 +33,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/string_header.h"
 #include "arrow/util/visibility.h"
 #include "arrow/visitor.h"  // IWYU pragma: keep
 
@@ -686,6 +687,33 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
   explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
 };
 
+/// \brief Concrete type class for variable-size binary view data using
+/// StringHeader structs
+class ARROW_EXPORT BinaryViewType : public DataType {
+ public:
+  static constexpr Type::type type_id = Type::BINARY_VIEW;
+  static constexpr bool is_utf8 = false;
+  using PhysicalType = BinaryViewType;
+
+  static constexpr const char* type_name() { return "binary_view"; }
+
+  BinaryViewType() : BinaryViewType(Type::BINARY_VIEW) {}
+
+  DataTypeLayout layout() const override {
+    return DataTypeLayout(
+        {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(StringHeader))});
+  }
+
+  std::string ToString() const override;
+  std::string name() const override { return "binary_view"; }
+
+ protected:
+  std::string ComputeFingerprint() const override;
+
+  // Allow subclasses like StringType to change the logical type.
+  explicit BinaryViewType(Type::type logical_type) : DataType(logical_type) {}
+};
+
 /// \brief Concrete type class for large variable-size binary data
 class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
  public:
@@ -732,6 +760,24 @@ class ARROW_EXPORT StringType : public BinaryType {
   std::string ComputeFingerprint() const override;
 };
 
+/// \brief Concrete type class for variable-size string data, utf8-encoded
+class ARROW_EXPORT StringViewType : public BinaryViewType {
+ public:
+  static constexpr Type::type type_id = Type::STRING_VIEW;
+  static constexpr bool is_utf8 = true;
+  using PhysicalType = BinaryViewType;
+
+  static constexpr const char* type_name() { return "utf8_view"; }
+
+  StringViewType() : BinaryViewType(Type::STRING_VIEW) {}
+
+  std::string ToString() const override;
+  std::string name() const override { return "utf8_view"; }
+
+ protected:
+  std::string ComputeFingerprint() const override;
+};
+
 /// \brief Concrete type class for large variable-size string data, utf8-encoded
 class ARROW_EXPORT LargeStringType : public LargeBinaryType {
  public:
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index ba0e635f73..1066d50321 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -108,6 +108,11 @@ class BinaryArray;
 class BinaryBuilder;
 struct BinaryScalar;
 
+class BinaryViewType;
+class BinaryViewArray;
+class BinaryViewBuilder;
+struct BinaryViewScalar;
+
 class LargeBinaryType;
 class LargeBinaryArray;
 class LargeBinaryBuilder;
@@ -123,6 +128,11 @@ class StringArray;
 class StringBuilder;
 struct StringScalar;
 
+class StringViewType;
+class StringViewArray;
+class StringViewBuilder;
+struct StringViewScalar;
+
 class LargeStringType;
 class LargeStringArray;
 class LargeStringBuilder;
@@ -405,6 +415,13 @@ struct Type {
     /// Calendar interval type with three fields.
     INTERVAL_MONTH_DAY_NANO,
 
+    /// String (UTF8) view type with 4-byte prefix and inline small string
+    /// optimization
+    STRING_VIEW,
+
+    /// Bytes view type with 4-byte prefix and inline small string optimization
+    BINARY_VIEW,
+
     // Leave this at the end
     MAX_ID
   };
@@ -446,10 +463,14 @@ ARROW_EXPORT const std::shared_ptr<DataType>& float32();
 ARROW_EXPORT const std::shared_ptr<DataType>& float64();
 /// \brief Return a StringType instance
 ARROW_EXPORT const std::shared_ptr<DataType>& utf8();
+/// \brief Return a StringViewType instance
+ARROW_EXPORT const std::shared_ptr<DataType>& utf8_view();
 /// \brief Return a LargeStringType instance
 ARROW_EXPORT const std::shared_ptr<DataType>& large_utf8();
 /// \brief Return a BinaryType instance
 ARROW_EXPORT const std::shared_ptr<DataType>& binary();
+/// \brief Return a BinaryViewType instance
+ARROW_EXPORT const std::shared_ptr<DataType>& binary_view();
 /// \brief Return a LargeBinaryType instance
 ARROW_EXPORT const std::shared_ptr<DataType>& large_binary();
 /// \brief Return a Date32Type instance
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index 954ad63c8a..ad0804be8b 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -1189,9 +1189,21 @@ TEST(TestBinaryType, ToString) {
 TEST(TestStringType, ToString) {
   StringType str;
   ASSERT_EQ(str.id(), Type::STRING);
+  ASSERT_EQ(str.name(), std::string("utf8"));
+  ASSERT_EQ(str.type_name(), std::string("utf8"));
   ASSERT_EQ(str.ToString(), std::string("string"));
 }
 
+TEST(TestBinaryViewType, ToString) {
+  BinaryViewType t1;
+  BinaryViewType e1;
+  StringViewType t2;
+  AssertTypeEqual(t1, e1);
+  AssertTypeNotEqual(t1, t2);
+  ASSERT_EQ(t1.id(), Type::BINARY_VIEW);
+  ASSERT_EQ(t1.ToString(), std::string("binary_view"));
+}
+
 TEST(TestLargeBinaryTypes, ToString) {
   BinaryType bt1;
   LargeBinaryType t1;
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 5873969066..dcd7c36ba2 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -341,6 +341,16 @@ struct TypeTraits<BinaryType> {
   static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
 };
 
+template <>
+struct TypeTraits<BinaryViewType> {
+  using ArrayType = BinaryViewArray;
+  using BuilderType = BinaryViewBuilder;
+  using ScalarType = BinaryViewScalar;
+  using CType = StringHeader;
+  constexpr static bool is_parameter_free = true;
+  static inline std::shared_ptr<DataType> type_singleton() { return binary_view(); }
+};
+
 template <>
 struct TypeTraits<LargeBinaryType> {
   using ArrayType = LargeBinaryArray;
@@ -371,6 +381,16 @@ struct TypeTraits<StringType> {
   static inline std::shared_ptr<DataType> type_singleton() { return utf8(); }
 };
 
+template <>
+struct TypeTraits<StringViewType> {
+  using ArrayType = StringViewArray;
+  using BuilderType = StringViewBuilder;
+  using ScalarType = StringViewScalar;
+  using CType = StringHeader;
+  constexpr static bool is_parameter_free = true;
+  static inline std::shared_ptr<DataType> type_singleton() { return utf8_view(); }
+};
+
 template <>
 struct TypeTraits<LargeStringType> {
   using ArrayType = LargeStringArray;
@@ -390,6 +410,11 @@ struct CTypeTraits<std::string> : public TypeTraits<StringType> {
   using ArrowType = StringType;
 };
 
+template <>
+struct CTypeTraits<StringHeader> : public TypeTraits<BinaryViewType> {
+  using ArrowType = BinaryViewType;
+};
+
 template <>
 struct CTypeTraits<const char*> : public CTypeTraits<std::string> {};
 
@@ -605,9 +630,28 @@ using is_string_type =
 template <typename T, typename R = void>
 using enable_if_string = enable_if_t<is_string_type<T>::value, R>;
 
+template <typename T>
+using is_binary_view_like_type = std::is_base_of<BinaryViewType, T>;
+
+template <typename T>
+using is_binary_view_type = std::is_same<BinaryViewType, T>;
+
+template <typename T>
+using is_string_view_type = std::is_same<StringViewType, T>;
+
+template <typename T, typename R = void>
+using enable_if_binary_view_like = enable_if_t<is_binary_view_like_type<T>::value, R>;
+
+template <typename T, typename R = void>
+using enable_if_binary_view = enable_if_t<is_binary_view_type<T>::value, R>;
+
+template <typename T, typename R = void>
+using enable_if_string_view = enable_if_t<is_string_view_type<T>::value, R>;
+
 template <typename T>
 using is_string_like_type =
-    std::integral_constant<bool, is_base_binary_type<T>::value && T::is_utf8>;
+    std::integral_constant<bool, (is_base_binary_type<T>::value && T::is_utf8) ||
+                                     is_string_view_type<T>::value>;
 
 template <typename T, typename R = void>
 using enable_if_string_like = enable_if_t<is_string_like_type<T>::value, R>;
@@ -630,10 +674,9 @@ template <typename T, typename R = void>
 using enable_if_fixed_width_type = enable_if_t<is_fixed_width_type<T>::value, R>;
 
 template <typename T>
-using is_binary_like_type =
-    std::integral_constant<bool, (is_base_binary_type<T>::value &&
-                                  !is_string_like_type<T>::value) ||
-                                     is_fixed_size_binary_type<T>::value>;
+using is_binary_like_type = std::integral_constant<
+    bool, (is_base_binary_type<T>::value && !is_string_like_type<T>::value) ||
+              is_binary_view_type<T>::value || is_fixed_size_binary_type<T>::value>;
 
 template <typename T, typename R = void>
 using enable_if_binary_like = enable_if_t<is_binary_like_type<T>::value, R>;
@@ -786,8 +829,10 @@ using enable_if_has_c_type = enable_if_t<has_c_type<T>::value, R>;
 template <typename T>
 using has_string_view =
     std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
-                                     std::is_same<LargeBinaryType, T>::value ||
+                           std::is_same<BinaryViewType, T>::value ||
+                           std::is_same<LargeBinaryType, T>::value ||
                                      std::is_same<StringType, T>::value ||
+                                     std::is_same<StringViewType, T>::value ||
                                      std::is_same<LargeStringType, T>::value ||
                                      std::is_same<FixedSizeBinaryType, T>::value>;
 
diff --git a/cpp/src/arrow/util/string_header.h b/cpp/src/arrow/util/string_header.h
new file mode 100644
index 0000000000..29f378a580
--- /dev/null
+++ b/cpp/src/arrow/util/string_header.h
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <ostream>
+#include <string>
+#include <string_view>
+
+namespace arrow {
+
+// Variable length string or binary with 4 byte prefix and inline optimization
+// for small values (12 bytes or fewer). This is similar to std::string_view
+// except that the referenced is limited in size to UINT32_MAX and up to the
+// first four bytes of the string are copied into the struct. The prefix allows
+// failing comparisons early and can reduce the CPU cache working set when
+// dealing with short strings.
+//
+// Short string   |----|----|--------|
+//                 ^    ^      ^
+//                 |    |      |
+//                 size prefix remaining in-line portion
+//
+// Long string    |----|----|--------|
+//                 ^    ^      ^
+//                 |    |      |
+//                 size prefix pointer to out-of-line portion
+//
+// Adapted from TU Munich's UmbraDB [1], Velox, DuckDB.
+//
+// [1]: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf
+struct StringHeader {
+ public:
+  using value_type = char;
+
+  static constexpr size_t kPrefixSize = 4;
+  static constexpr size_t kInlineSize = 12;
+
+  StringHeader() {
+    static_assert(sizeof(StringHeader) == 16, "struct expected by exactly 16 bytes");
+    ;
+    memset(this, 0, sizeof(StringHeader));
+  }
+
+  explicit StringHeader(uint32_t size) : size_(size) {
+    memset(prefix_, 0, kPrefixSize);
+    value_.data = nullptr;
+  }
+
+  StringHeader(const char* data, size_t len) : size_(len) {
+    // TODO: better option than assert?
+    assert(data || size_ == 0);
+    if (IsInline()) {
+      // Zero the inline part.
+      // this makes sure that inline strings can be compared for equality with 2
+      // int64 compares.
+      memset(prefix_, 0, kPrefixSize);
+      if (size_ == 0) {
+        return;
+      }
+      // small string: inlined. Zero the last 8 bytes first to allow for whole
+      // word comparison.
+      value_.data = nullptr;
+      memcpy(prefix_, data, size_);
+    } else {
+      // large string: store pointer
+      memcpy(prefix_, data, kPrefixSize);
+      value_.data = data;
+    }
+  }
+
+  StringHeader(const uint8_t* data, int64_t len)
+      : StringHeader(reinterpret_cast<const char*>(data), static_cast<size_t>(len)) {}
+
+  // Making StringHeader implicitly constructible/convertible from char* and
+  // string literals, in order to allow for a more flexible API and optional
+  // interoperability. E.g:
+  //
+  //   StringHeader bh = "literal";
+  //   std::optional<BytesView> obh = "literal";
+  //
+  /* implicit */ StringHeader(const char* data) : StringHeader(data, strlen(data)) {}
+
+  explicit StringHeader(const std::string& value)
+      : StringHeader(value.data(), value.size()) {}
+
+  explicit StringHeader(const std::string_view& value)
+      : StringHeader(value.data(), value.size()) {}
+
+  bool IsInline() const { return IsInline(size_); }
+
+  static constexpr bool IsInline(uint32_t size) { return size <= kInlineSize; }
+
+  const char* data() const { return IsInline() ? prefix_ : value_.data; }
+
+  size_t size() const { return size_; }
+
+  size_t capacity() const { return size_; }
+
+  friend std::ostream& operator<<(std::ostream& os, const StringHeader& header) {
+    os.write(header.data(), header.size());
+    return os;
+  }
+
+  bool operator==(const StringHeader& other) const {
+    // Compare lengths and first 4 characters.
+    if (SizeAndPrefixAsInt64() != other.SizeAndPrefixAsInt64()) {
+      return false;
+    }
+    if (IsInline()) {
+      // The inline part is zeroed at construction, so we can compare
+      // a word at a time if data extends past 'prefix_'.
+      return size_ <= kPrefixSize || InlinedAsInt64() == other.InlinedAsInt64();
+    }
+    // Sizes are equal and this is not inline, therefore both are out
+    // of line and have kPrefixSize first in common.
+    return memcmp(value_.data + kPrefixSize, other.value_.data + kPrefixSize,
+                  size_ - kPrefixSize) == 0;
+  }
+
+  bool operator!=(const StringHeader& other) const { return !(*this == other); }
+
+  // Returns 0, if this == other
+  //       < 0, if this < other
+  //       > 0, if this > other
+  int32_t Compare(const StringHeader& other) const {
+    if (PrefixAsInt() != other.PrefixAsInt()) {
+      // The result is decided on prefix. The shorter will be less
+      // because the prefix is padded with zeros.
+      return memcmp(prefix_, other.prefix_, kPrefixSize);
+    }
+    int32_t size = std::min(size_, other.size_) - kPrefixSize;
+    if (size <= 0) {
+      // One ends within the prefix.
+      return size_ - other.size_;
+    }
+    if (static_cast<uint32_t>(size) <= kInlineSize && IsInline() && other.IsInline()) {
+      int32_t result = memcmp(value_.inlined, other.value_.inlined, size);
+      return (result != 0) ? result : size_ - other.size_;
+    }
+    int32_t result = memcmp(data() + kPrefixSize, other.data() + kPrefixSize, size);
+    return (result != 0) ? result : size_ - other.size_;
+  }
+
+  bool operator<(const StringHeader& other) const { return Compare(other) < 0; }
+
+  bool operator<=(const StringHeader& other) const { return Compare(other) <= 0; }
+
+  bool operator>(const StringHeader& other) const { return Compare(other) > 0; }
+
+  bool operator>=(const StringHeader& other) const { return Compare(other) >= 0; }
+
+  operator std::string() const { return std::string(data(), size()); }
+
+  std::string GetString() const { return *this; }
+
+  explicit operator std::string_view() const { return std::string_view(data(), size()); }
+
+  const char* begin() const { return data(); }
+
+  const char* end() const { return data() + size(); }
+
+  bool empty() const { return size() == 0; }
+
+ private:
+  inline int64_t SizeAndPrefixAsInt64() const {
+    return reinterpret_cast<const int64_t*>(this)[0];
+  }
+
+  inline int64_t InlinedAsInt64() const {
+    return reinterpret_cast<const int64_t*>(this)[1];
+  }
+
+  int32_t PrefixAsInt() const { return *reinterpret_cast<const int32_t*>(&prefix_); }
+
+  // We rely on all members being laid out top to bottom . C++
+  // guarantees this.
+  uint32_t size_;
+  char prefix_[4];
+  union {
+    char inlined[8];
+    const char* data;
+  } value_;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc
index d22efc942e..03381a08a7 100644
--- a/cpp/src/arrow/visitor.cc
+++ b/cpp/src/arrow/visitor.cc
@@ -45,8 +45,10 @@ ARRAY_VISITOR_DEFAULT(UInt64Array)
 ARRAY_VISITOR_DEFAULT(HalfFloatArray)
 ARRAY_VISITOR_DEFAULT(FloatArray)
 ARRAY_VISITOR_DEFAULT(DoubleArray)
-ARRAY_VISITOR_DEFAULT(BinaryArray)
 ARRAY_VISITOR_DEFAULT(StringArray)
+ARRAY_VISITOR_DEFAULT(StringViewArray)
+ARRAY_VISITOR_DEFAULT(BinaryArray)
+ARRAY_VISITOR_DEFAULT(BinaryViewArray)
 ARRAY_VISITOR_DEFAULT(LargeBinaryArray)
 ARRAY_VISITOR_DEFAULT(LargeStringArray)
 ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray)
@@ -95,7 +97,9 @@ TYPE_VISITOR_DEFAULT(HalfFloatType)
 TYPE_VISITOR_DEFAULT(FloatType)
 TYPE_VISITOR_DEFAULT(DoubleType)
 TYPE_VISITOR_DEFAULT(StringType)
+TYPE_VISITOR_DEFAULT(StringViewType)
 TYPE_VISITOR_DEFAULT(BinaryType)
+TYPE_VISITOR_DEFAULT(BinaryViewType)
 TYPE_VISITOR_DEFAULT(LargeStringType)
 TYPE_VISITOR_DEFAULT(LargeBinaryType)
 TYPE_VISITOR_DEFAULT(FixedSizeBinaryType)
@@ -145,7 +149,9 @@ SCALAR_VISITOR_DEFAULT(HalfFloatScalar)
 SCALAR_VISITOR_DEFAULT(FloatScalar)
 SCALAR_VISITOR_DEFAULT(DoubleScalar)
 SCALAR_VISITOR_DEFAULT(StringScalar)
+SCALAR_VISITOR_DEFAULT(StringViewScalar)
 SCALAR_VISITOR_DEFAULT(BinaryScalar)
+SCALAR_VISITOR_DEFAULT(BinaryViewScalar)
 SCALAR_VISITOR_DEFAULT(LargeStringScalar)
 SCALAR_VISITOR_DEFAULT(LargeBinaryScalar)
 SCALAR_VISITOR_DEFAULT(FixedSizeBinaryScalar)
diff --git a/cpp/src/arrow/visitor.h b/cpp/src/arrow/visitor.h
index 7f83c9ebab..58330de9d0 100644
--- a/cpp/src/arrow/visitor.h
+++ b/cpp/src/arrow/visitor.h
@@ -45,7 +45,9 @@ class ARROW_EXPORT ArrayVisitor {
   virtual Status Visit(const FloatArray& array);
   virtual Status Visit(const DoubleArray& array);
   virtual Status Visit(const StringArray& array);
+  virtual Status Visit(const StringViewArray& array);
   virtual Status Visit(const BinaryArray& array);
+  virtual Status Visit(const BinaryViewArray& array);
   virtual Status Visit(const LargeStringArray& array);
   virtual Status Visit(const LargeBinaryArray& array);
   virtual Status Visit(const FixedSizeBinaryArray& array);
@@ -93,7 +95,9 @@ class ARROW_EXPORT TypeVisitor {
   virtual Status Visit(const FloatType& type);
   virtual Status Visit(const DoubleType& type);
   virtual Status Visit(const StringType& type);
+  virtual Status Visit(const StringViewType& type);
   virtual Status Visit(const BinaryType& type);
+  virtual Status Visit(const BinaryViewType& type);
   virtual Status Visit(const LargeStringType& type);
   virtual Status Visit(const LargeBinaryType& type);
   virtual Status Visit(const FixedSizeBinaryType& type);
@@ -141,7 +145,9 @@ class ARROW_EXPORT ScalarVisitor {
   virtual Status Visit(const FloatScalar& scalar);
   virtual Status Visit(const DoubleScalar& scalar);
   virtual Status Visit(const StringScalar& scalar);
+  virtual Status Visit(const StringViewScalar& scalar);
   virtual Status Visit(const BinaryScalar& scalar);
+  virtual Status Visit(const BinaryViewScalar& scalar);
   virtual Status Visit(const LargeStringScalar& scalar);
   virtual Status Visit(const LargeBinaryScalar& scalar);
   virtual Status Visit(const FixedSizeBinaryScalar& scalar);
diff --git a/cpp/src/arrow/visitor_generate.h b/cpp/src/arrow/visitor_generate.h
index 265c76197a..2c267576ca 100644
--- a/cpp/src/arrow/visitor_generate.h
+++ b/cpp/src/arrow/visitor_generate.h
@@ -40,7 +40,9 @@ namespace arrow {
   ACTION(Boolean);                              \
   ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(ACTION); \
   ACTION(String);                               \
+  ACTION(StringView);                           \
   ACTION(Binary);                               \
+  ACTION(BinaryView);                           \
   ACTION(LargeString);                          \
   ACTION(LargeBinary);                          \
   ACTION(FixedSizeBinary);                      \
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index f7898c02d4..e62e34abb0 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -129,6 +129,7 @@ struct ValueBufferSlicer {
   NOT_IMPLEMENTED_VISIT(FixedSizeList);
   NOT_IMPLEMENTED_VISIT(Dictionary);
   NOT_IMPLEMENTED_VISIT(Extension);
+  NOT_IMPLEMENTED_VISIT(BinaryView);
 
 #undef NOT_IMPLEMENTED_VISIT
 
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index f3cee6c65e..7e48f09889 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -116,39 +116,21 @@ void BufferCapsule_Destructor(PyObject* capsule) {
 using internal::arrow_traits;
 using internal::npy_traits;
 
-template <typename T>
+template <typename T, typename Enable = void>
 struct WrapBytes {};
 
-template <>
-struct WrapBytes<StringType> {
-  static inline PyObject* Wrap(const char* data, int64_t length) {
-    return PyUnicode_FromStringAndSize(data, length);
-  }
-};
-
-template <>
-struct WrapBytes<LargeStringType> {
+template <typename T>
+struct WrapBytes<T, enable_if_t<is_string_type<T>::value ||
+                                is_string_view_type<T>::value>> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
     return PyUnicode_FromStringAndSize(data, length);
   }
 };
 
-template <>
-struct WrapBytes<BinaryType> {
-  static inline PyObject* Wrap(const char* data, int64_t length) {
-    return PyBytes_FromStringAndSize(data, length);
-  }
-};
-
-template <>
-struct WrapBytes<LargeBinaryType> {
-  static inline PyObject* Wrap(const char* data, int64_t length) {
-    return PyBytes_FromStringAndSize(data, length);
-  }
-};
-
-template <>
-struct WrapBytes<FixedSizeBinaryType> {
+template <typename T>
+struct WrapBytes<T, enable_if_t<is_binary_type<T>::value ||
+                                is_binary_view_type<T>::value ||
+                                is_fixed_size_binary_type<T>::value>> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
     return PyBytes_FromStringAndSize(data, length);
   }
@@ -1026,7 +1008,9 @@ struct ObjectWriterVisitor {
   }
 
   template <typename Type>
-  enable_if_t<is_base_binary_type<Type>::value || is_fixed_size_binary_type<Type>::value,
+  enable_if_t<is_base_binary_type<Type>::value ||
+              is_binary_view_like_type<Type>::value ||
+              is_fixed_size_binary_type<Type>::value,
               Status>
   Visit(const Type& type) {
     auto WrapValue = [](const std::string_view& view, PyObject** out) {
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 9e7f07ef81..3ffff8cf19 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -479,13 +479,17 @@ class PyValue {
 
   // The binary-like intermediate representation is PyBytesView because it keeps temporary
   // python objects alive (non-contiguous memoryview) and stores whether the original
-  // object was unicode encoded or not, which is used for unicode -> bytes coersion if
+  // object was unicode encoded or not, which is used for unicode -> bytes coercion if
   // there is a non-unicode object observed.
 
   static Status Convert(const BaseBinaryType*, const O&, I obj, PyBytesView& view) {
     return view.ParseString(obj);
   }
 
+  static Status Convert(const BinaryViewType*, const O&, I obj, PyBytesView& view) {
+    return view.ParseString(obj);
+  }
+
   static Status Convert(const FixedSizeBinaryType* type, const O&, I obj,
                         PyBytesView& view) {
     ARROW_RETURN_NOT_OK(view.ParseString(obj));
@@ -672,12 +676,9 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::
   PyBytesView view_;
 };
 
-template <typename T>
-class PyPrimitiveConverter<T, enable_if_base_binary<T>>
-    : public PrimitiveConverter<T, PyConverter> {
+template <typename T, typename OffsetType>
+class PyBinaryConverter : public PrimitiveConverter<T, PyConverter> {
  public:
-  using OffsetType = typename T::offset_type;
-
   Status Append(PyObject* value) override {
     if (PyValue::IsNull(this->options_, value)) {
       this->primitive_builder_->UnsafeAppendNull();
@@ -701,7 +702,7 @@ class PyPrimitiveConverter<T, enable_if_base_binary<T>>
   Result<std::shared_ptr<Array>> ToArray() override {
     ARROW_ASSIGN_OR_RAISE(auto array, (PrimitiveConverter<T, PyConverter>::ToArray()));
     if (observed_binary_) {
-      // if we saw any non-unicode, cast results to BinaryArray
+      // if we saw any non-unicode, cast results to BinaryArray/BinaryViewArray
       auto binary_type = TypeTraits<typename T::PhysicalType>::type_singleton();
       return array->View(binary_type);
     } else {
@@ -714,6 +715,14 @@ class PyPrimitiveConverter<T, enable_if_base_binary<T>>
   bool observed_binary_ = false;
 };
 
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_base_binary<T>>
+    : public PyBinaryConverter<T, typename T::offset_type> {};
+
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_binary_view_like<T>>
+    : public PyBinaryConverter<T, int64_t> {};
+
 template <typename U>
 class PyDictionaryConverter<U, enable_if_has_c_type<U>>
     : public DictionaryConverter<U, PyConverter> {