You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/30 18:31:20 UTC
[arrow] branch master updated: ARROW-3893: [C++] Improve adaptive int builder performance

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e87110f  ARROW-3893: [C++] Improve adaptive int builder performance
e87110f is described below

commit e87110f174ec0374f3c59f7fdafe3a22433e7ce4
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Fri Nov 30 12:29:17 2018 -0600

    ARROW-3893: [C++] Improve adaptive int builder performance
    
    The strategy is two-pronged:
    - scalar append to int builder appends to a small uint64_t scratch space, deferring width detection and conversion until the scratch space is full
    - bulk int width detection is much improved
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #3040 from pitrou/ARROW-3893-adaptive-int-builder-perf and squashes the following commits:
    
    61a101b12 <Antoine Pitrou> ARROW-3893:  Improve adaptive int builder performance
---
 cpp/src/arrow/CMakeLists.txt             |   1 +
 cpp/src/arrow/array-test.cc              | 107 ++++++--
 cpp/src/arrow/builder-benchmark.cc       |  37 ++-
 cpp/src/arrow/builder.cc                 | 223 ++++++++++-------
 cpp/src/arrow/builder.h                  | 126 +++-------
 cpp/src/arrow/util/CMakeLists.txt        |   2 +
 cpp/src/arrow/util/int-util-benchmark.cc | 109 +++++++++
 cpp/src/arrow/util/int-util-test.cc      | 379 +++++++++++++++++++++++++++++
 cpp/src/arrow/util/int-util.cc           | 406 +++++++++++++++++++++++++++++++
 cpp/src/arrow/util/int-util.h            |  69 ++++++
 10 files changed, 1245 insertions(+), 214 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 8ff17a2..a56079f 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -48,6 +48,7 @@ set(ARROW_SRCS
   util/compression.cc
   util/cpu-info.cc
   util/decimal.cc
+  util/int-util.cc
   util/io-util.cc
   util/logging.cc
   util/key_value_metadata.cc
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index ab03ced..5866058 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -1199,7 +1199,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt8) {
 
   std::vector<int8_t> expected_values({0, 127, -128});
   ArrayFromVector<Int8Type, int8_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 }
 
 TEST_F(TestAdaptiveIntBuilder, TestInt16) {
@@ -1209,7 +1209,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt16) {
 
   std::vector<int16_t> expected_values({0, 128});
   ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 
   SetUp();
   ASSERT_OK(builder_->Append(-129));
@@ -1217,7 +1217,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt16) {
   Done();
 
   ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 
   SetUp();
   ASSERT_OK(builder_->Append(std::numeric_limits<int16_t>::max()));
@@ -1227,7 +1227,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt16) {
   Done();
 
   ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 }
 
 TEST_F(TestAdaptiveIntBuilder, TestInt32) {
@@ -1239,7 +1239,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt32) {
   std::vector<int32_t> expected_values(
       {0, static_cast<int32_t>(std::numeric_limits<int16_t>::max()) + 1});
   ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 
   SetUp();
   ASSERT_OK(
@@ -1248,7 +1248,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt32) {
   Done();
 
   ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 
   SetUp();
   ASSERT_OK(builder_->Append(std::numeric_limits<int32_t>::max()));
@@ -1258,7 +1258,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt32) {
   Done();
 
   ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 }
 
 TEST_F(TestAdaptiveIntBuilder, TestInt64) {
@@ -1270,7 +1270,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt64) {
   std::vector<int64_t> expected_values(
       {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1});
   ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 
   SetUp();
   ASSERT_OK(
@@ -1279,7 +1279,7 @@ TEST_F(TestAdaptiveIntBuilder, TestInt64) {
   Done();
 
   ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 
   SetUp();
   ASSERT_OK(builder_->Append(std::numeric_limits<int64_t>::max()));
@@ -1289,17 +1289,58 @@ TEST_F(TestAdaptiveIntBuilder, TestInt64) {
   Done();
 
   ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  AssertArraysEqual(*expected_, *result_);
 }
 
 TEST_F(TestAdaptiveIntBuilder, TestAppendValues) {
-  std::vector<int64_t> expected_values(
-      {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1});
-  ASSERT_OK(builder_->AppendValues(expected_values.data(), expected_values.size()));
-  Done();
-
-  ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
-  ASSERT_TRUE(expected_->Equals(result_));
+  {
+    std::vector<int64_t> expected_values(
+        {0, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1});
+    ASSERT_OK(builder_->AppendValues(expected_values.data(), expected_values.size()));
+    Done();
+
+    ArrayFromVector<Int64Type, int64_t>(expected_values, &expected_);
+    AssertArraysEqual(*expected_, *result_);
+  }
+  {
+    SetUp();
+    std::vector<int64_t> values(
+        {0, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()});
+    ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
+    Done();
+
+    std::vector<int32_t> expected_values(
+        {0, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()});
+
+    ArrayFromVector<Int32Type, int32_t>(expected_values, &expected_);
+    AssertArraysEqual(*expected_, *result_);
+  }
+  {
+    SetUp();
+    std::vector<int64_t> values(
+        {0, std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()});
+    ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
+    Done();
+
+    std::vector<int16_t> expected_values(
+        {0, std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()});
+
+    ArrayFromVector<Int16Type, int16_t>(expected_values, &expected_);
+    AssertArraysEqual(*expected_, *result_);
+  }
+  {
+    SetUp();
+    std::vector<int64_t> values(
+        {0, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()});
+    ASSERT_OK(builder_->AppendValues(values.data(), values.size()));
+    Done();
+
+    std::vector<int8_t> expected_values(
+        {0, std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()});
+
+    ArrayFromVector<Int8Type, int8_t>(expected_values, &expected_);
+    AssertArraysEqual(*expected_, *result_);
+  }
 }
 
 TEST_F(TestAdaptiveIntBuilder, TestAssertZeroPadded) {
@@ -1311,15 +1352,23 @@ TEST_F(TestAdaptiveIntBuilder, TestAssertZeroPadded) {
 
 TEST_F(TestAdaptiveIntBuilder, TestAppendNull) {
   int64_t size = 1000;
-  for (unsigned index = 0; index < size; ++index) {
+  ASSERT_OK(builder_->Append(127));
+  for (unsigned index = 1; index < size - 1; ++index) {
     ASSERT_OK(builder_->AppendNull());
   }
+  ASSERT_OK(builder_->Append(-128));
 
   Done();
 
-  for (unsigned index = 0; index < size; ++index) {
-    ASSERT_TRUE(result_->IsNull(index));
-  }
+  std::vector<bool> expected_valid(size, false);
+  expected_valid[0] = true;
+  expected_valid[size - 1] = true;
+  std::vector<int8_t> expected_values(size);
+  expected_values[0] = 127;
+  expected_values[size - 1] = -128;
+  std::shared_ptr<Array> expected;
+  ArrayFromVector<Int8Type, int8_t>(expected_valid, expected_values, &expected_);
+  AssertArraysEqual(*expected_, *result_);
 }
 
 TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) {
@@ -1438,15 +1487,23 @@ TEST_F(TestAdaptiveUIntBuilder, TestAssertZeroPadded) {
 
 TEST_F(TestAdaptiveUIntBuilder, TestAppendNull) {
   int64_t size = 1000;
-  for (unsigned index = 0; index < size; ++index) {
+  ASSERT_OK(builder_->Append(254));
+  for (unsigned index = 1; index < size - 1; ++index) {
     ASSERT_OK(builder_->AppendNull());
   }
+  ASSERT_OK(builder_->Append(255));
 
   Done();
 
-  for (unsigned index = 0; index < size; ++index) {
-    ASSERT_TRUE(result_->IsNull(index));
-  }
+  std::vector<bool> expected_valid(size, false);
+  expected_valid[0] = true;
+  expected_valid[size - 1] = true;
+  std::vector<uint8_t> expected_values(size);
+  expected_values[0] = 254;
+  expected_values[size - 1] = 255;
+  std::shared_ptr<Array> expected;
+  ArrayFromVector<UInt8Type, uint8_t>(expected_valid, expected_values, &expected_);
+  AssertArraysEqual(*expected_, *result_);
 }
 
 TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) {
diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc
index 4acede1..f96728d 100644
--- a/cpp/src/arrow/builder-benchmark.cc
+++ b/cpp/src/arrow/builder-benchmark.cc
@@ -69,14 +69,14 @@ static void BM_BuildAdaptiveIntNoNulls(
     benchmark::State& state) {  // NOLINT non-const reference
   int64_t size = static_cast<int64_t>(std::numeric_limits<int16_t>::max()) * 256;
   int64_t chunk_size = size / 8;
-  std::vector<int64_t> data;
+  std::vector<int64_t> data(size);
   for (int64_t i = 0; i < size; i++) {
-    data.push_back(i);
+    data[i] = i;
   }
   while (state.KeepRunning()) {
     AdaptiveIntBuilder builder;
     for (int64_t i = 0; i < size; i += chunk_size) {
-      // Build up an array of 512 MiB in size
+      // Build up an array of 128 MiB in size
       ABORT_NOT_OK(builder.AppendValues(data.data() + i, chunk_size, nullptr));
     }
     std::shared_ptr<Array> out;
@@ -88,9 +88,9 @@ static void BM_BuildAdaptiveIntNoNulls(
 static void BM_BuildAdaptiveIntNoNullsScalarAppend(
     benchmark::State& state) {  // NOLINT non-const reference
   int64_t size = static_cast<int64_t>(std::numeric_limits<int16_t>::max()) * 256;
-  std::vector<int64_t> data;
+  std::vector<int64_t> data(size);
   for (int64_t i = 0; i < size; i++) {
-    data.push_back(i);
+    data[i] = i;
   }
   while (state.KeepRunning()) {
     AdaptiveIntBuilder builder;
@@ -107,14 +107,14 @@ static void BM_BuildAdaptiveUIntNoNulls(
     benchmark::State& state) {  // NOLINT non-const reference
   int64_t size = static_cast<int64_t>(std::numeric_limits<uint16_t>::max()) * 256;
   int64_t chunk_size = size / 8;
-  std::vector<uint64_t> data;
+  std::vector<uint64_t> data(size);
   for (uint64_t i = 0; i < static_cast<uint64_t>(size); i++) {
-    data.push_back(i);
+    data[i] = i;
   }
   while (state.KeepRunning()) {
     AdaptiveUIntBuilder builder;
     for (int64_t i = 0; i < size; i += chunk_size) {
-      // Build up an array of 512 MiB in size
+      // Build up an array of 128 MiB in size
       ABORT_NOT_OK(builder.AppendValues(data.data() + i, chunk_size, nullptr));
     }
     std::shared_ptr<Array> out;
@@ -123,6 +123,24 @@ static void BM_BuildAdaptiveUIntNoNulls(
   state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t));
 }
 
+static void BM_BuildAdaptiveUIntNoNullsScalarAppend(
+    benchmark::State& state) {  // NOLINT non-const reference
+  int64_t size = static_cast<int64_t>(std::numeric_limits<int16_t>::max()) * 256;
+  std::vector<uint64_t> data(size);
+  for (uint64_t i = 0; i < static_cast<uint64_t>(size); i++) {
+    data[i] = i;
+  }
+  while (state.KeepRunning()) {
+    AdaptiveUIntBuilder builder;
+    for (int64_t i = 0; i < size; i++) {
+      ABORT_NOT_OK(builder.Append(data[i]));
+    }
+    std::shared_ptr<Array> out;
+    ABORT_NOT_OK(builder.Finish(&out));
+  }
+  state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t));
+}
+
 static void BM_BuildBooleanArrayNoNulls(
     benchmark::State& state) {  // NOLINT non-const reference
   // 2 MiB block
@@ -349,6 +367,9 @@ BENCHMARK(BM_BuildAdaptiveIntNoNullsScalarAppend)
 BENCHMARK(BM_BuildAdaptiveUIntNoNulls)
     ->Repetitions(kRepetitions)
     ->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BuildAdaptiveUIntNoNullsScalarAppend)
+    ->Repetitions(kRepetitions)
+    ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_BuildBinaryArray)->Repetitions(kRepetitions)->Unit(benchmark::kMicrosecond);
 BENCHMARK(BM_BuildFixedSizeBinaryArray)
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 0d9e0e0..0e10be7 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -34,6 +34,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/hashing.h"
+#include "arrow/util/int-util.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
@@ -295,12 +296,19 @@ template class PrimitiveBuilder<FloatType>;
 template class PrimitiveBuilder<DoubleType>;
 
 AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(MemoryPool* pool)
-    : ArrayBuilder(int64(), pool), data_(nullptr), raw_data_(nullptr), int_size_(1) {}
+    : ArrayBuilder(int64(), pool),
+      data_(nullptr),
+      raw_data_(nullptr),
+      int_size_(1),
+      pending_pos_(0),
+      pending_has_nulls_(false) {}
 
 void AdaptiveIntBuilderBase::Reset() {
   ArrayBuilder::Reset();
   data_.reset();
   raw_data_ = nullptr;
+  pending_pos_ = 0;
+  pending_has_nulls_ = false;
 }
 
 Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
@@ -321,6 +329,8 @@ Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
 AdaptiveIntBuilder::AdaptiveIntBuilder(MemoryPool* pool) : AdaptiveIntBuilderBase(pool) {}
 
 Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  RETURN_NOT_OK(CommitPendingData());
+
   std::shared_ptr<DataType> output_type;
   switch (int_size_) {
     case 1:
@@ -350,62 +360,91 @@ Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   return Status::OK();
 }
 
-Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
-                                        const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
+Status AdaptiveIntBuilder::CommitPendingData() {
+  if (pending_pos_ == 0) {
+    return Status::OK();
+  }
+  RETURN_NOT_OK(Reserve(pending_pos_));
+  const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+  RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
+                                     pending_pos_, valid_bytes));
+  pending_has_nulls_ = false;
+  pending_pos_ = 0;
+  return Status::OK();
+}
 
-  if (length > 0) {
-    if (int_size_ < 8) {
-      uint8_t new_int_size = int_size_;
-      for (int64_t i = 0; i < length; i++) {
-        if (valid_bytes == nullptr || valid_bytes[i]) {
-          new_int_size = internal::ExpandedIntSize(values[i], new_int_size);
-        }
-      }
-      if (new_int_size != int_size_) {
-        RETURN_NOT_OK(ExpandIntSize(new_int_size));
-      }
+static constexpr int64_t kAdaptiveIntChunkSize = 8192;
+
+Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
+                                                const uint8_t* valid_bytes) {
+  while (length > 0) {
+    // In case `length` is very large, we don't want to trash the cache by
+    // scanning it twice (first to detect int width, second to copy the data).
+    // Instead, process data in L2-cacheable chunks.
+    const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+    uint8_t new_int_size;
+    new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+    DCHECK_GE(new_int_size, int_size_);
+    if (new_int_size > int_size_) {
+      // This updates int_size_
+      RETURN_NOT_OK(ExpandIntSize(new_int_size));
     }
-  }
 
-  if (int_size_ == 8) {
-    std::memcpy(reinterpret_cast<int64_t*>(raw_data_) + length_, values,
-                sizeof(int64_t) * length);
-  } else {
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
-    // int_size_ may have changed, so we need to recheck
     switch (int_size_) {
-      case 1: {
-        int8_t* data_ptr = reinterpret_cast<int8_t*>(raw_data_) + length_;
-        std::transform(values, values + length, data_ptr,
-                       [](int64_t x) { return static_cast<int8_t>(x); });
-      } break;
-      case 2: {
-        int16_t* data_ptr = reinterpret_cast<int16_t*>(raw_data_) + length_;
-        std::transform(values, values + length, data_ptr,
-                       [](int64_t x) { return static_cast<int16_t>(x); });
-      } break;
-      case 4: {
-        int32_t* data_ptr = reinterpret_cast<int32_t*>(raw_data_) + length_;
-        std::transform(values, values + length, data_ptr,
-                       [](int64_t x) { return static_cast<int32_t>(x); });
-      } break;
+      case 1:
+        internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      case 2:
+        internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      case 4:
+        internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
+      case 8:
+        internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
+                               chunk_size);
+        break;
       default:
         DCHECK(false);
     }
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+
+    // This updates length_
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+    values += chunk_size;
+    if (valid_bytes != nullptr) {
+      valid_bytes += chunk_size;
+    }
+    length -= chunk_size;
   }
 
-  // length_ is update by these
-  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
   return Status::OK();
 }
 
+Status AdaptiveUIntBuilder::CommitPendingData() {
+  if (pending_pos_ == 0) {
+    return Status::OK();
+  }
+  RETURN_NOT_OK(Reserve(pending_pos_));
+  const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+  RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
+  pending_has_nulls_ = false;
+  pending_pos_ = 0;
+  return Status::OK();
+}
+
+Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
+                                        const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(CommitPendingData());
+  RETURN_NOT_OK(Reserve(length));
+
+  return AppendValuesInternal(values, length, valid_bytes);
+}
+
 template <typename new_type, typename old_type>
 typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
 AdaptiveIntBuilder::ExpandIntSizeInternal() {
@@ -418,9 +457,10 @@ typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::typ
 AdaptiveIntBuilder::ExpandIntSizeInternal() {
   int_size_ = sizeof(new_type);
   RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
-
-  old_type* src = reinterpret_cast<old_type*>(raw_data_);
+  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+  const old_type* src = reinterpret_cast<old_type*>(raw_data_);
   new_type* dst = reinterpret_cast<new_type*>(raw_data_);
+
   // By doing the backward copy, we ensure that no element is overriden during
   // the copy process and the copy stays in-place.
   std::copy_backward(src, src + length_, dst + length_);
@@ -474,6 +514,8 @@ AdaptiveUIntBuilder::AdaptiveUIntBuilder(MemoryPool* pool)
     : AdaptiveIntBuilderBase(pool) {}
 
 Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  RETURN_NOT_OK(CommitPendingData());
+
   std::shared_ptr<DataType> output_type;
   switch (int_size_) {
     case 1:
@@ -503,62 +545,61 @@ Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   return Status::OK();
 }
 
-Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
-                                         const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
+Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
+                                                 const uint8_t* valid_bytes) {
+  while (length > 0) {
+    // See AdaptiveIntBuilder::AppendValuesInternal
+    const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
 
-  if (length > 0) {
-    if (int_size_ < 8) {
-      uint8_t new_int_size = int_size_;
-      for (int64_t i = 0; i < length; i++) {
-        if (valid_bytes == nullptr || valid_bytes[i]) {
-          new_int_size = internal::ExpandedUIntSize(values[i], new_int_size);
-        }
-      }
-      if (new_int_size != int_size_) {
-        RETURN_NOT_OK(ExpandIntSize(new_int_size));
-      }
+    uint8_t new_int_size;
+    new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+    DCHECK_GE(new_int_size, int_size_);
+    if (new_int_size > int_size_) {
+      // This updates int_size_
+      RETURN_NOT_OK(ExpandIntSize(new_int_size));
     }
-  }
 
-  if (int_size_ == 8) {
-    std::memcpy(reinterpret_cast<uint64_t*>(raw_data_) + length_, values,
-                sizeof(uint64_t) * length);
-  } else {
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
-    // int_size_ may have changed, so we need to recheck
     switch (int_size_) {
-      case 1: {
-        uint8_t* data_ptr = reinterpret_cast<uint8_t*>(raw_data_) + length_;
-        std::transform(values, values + length, data_ptr,
-                       [](uint64_t x) { return static_cast<uint8_t>(x); });
-      } break;
-      case 2: {
-        uint16_t* data_ptr = reinterpret_cast<uint16_t*>(raw_data_) + length_;
-        std::transform(values, values + length, data_ptr,
-                       [](uint64_t x) { return static_cast<uint16_t>(x); });
-      } break;
-      case 4: {
-        uint32_t* data_ptr = reinterpret_cast<uint32_t*>(raw_data_) + length_;
-        std::transform(values, values + length, data_ptr,
-                       [](uint64_t x) { return static_cast<uint32_t>(x); });
-      } break;
+      case 1:
+        internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      case 2:
+        internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      case 4:
+        internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
+      case 8:
+        internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
+                                chunk_size);
+        break;
       default:
         DCHECK(false);
     }
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+
+    // This updates length_
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+    values += chunk_size;
+    if (valid_bytes != nullptr) {
+      valid_bytes += chunk_size;
+    }
+    length -= chunk_size;
   }
 
-  // length_ is update by these
-  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
   return Status::OK();
 }
 
+Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
+                                         const uint8_t* valid_bytes) {
+  RETURN_NOT_OK(Reserve(length));
+
+  return AppendValuesInternal(values, length, valid_bytes);
+}
+
 template <typename new_type, typename old_type>
 typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
 AdaptiveUIntBuilder::ExpandIntSizeInternal() {
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 183e7e9..34cac55 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -432,6 +432,7 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
 
   /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
   Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+    ARROW_RETURN_NOT_OK(CommitPendingData());
     ARROW_RETURN_NOT_OK(Reserve(length));
     memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
     UnsafeAppendToBitmap(valid_bytes, length);
@@ -439,9 +440,14 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
   }
 
   Status AppendNull() {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_);
-    UnsafeAppendToBitmap(false);
+    pending_data_[pending_pos_] = 0;
+    pending_valid_[pending_pos_] = 0;
+    pending_has_nulls_ = true;
+    ++pending_pos_;
+
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
+    }
     return Status::OK();
   }
 
@@ -449,54 +455,18 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
   Status Resize(int64_t capacity) override;
 
  protected:
+  virtual Status CommitPendingData() = 0;
+
   std::shared_ptr<ResizableBuffer> data_;
   uint8_t* raw_data_;
-
   uint8_t int_size_;
-};
 
-// TODO investigate AdaptiveIntBuilder / AdaptiveUIntBuilder performance
-
-// Check if we would need to expand the underlying storage type
-inline uint8_t ExpandedIntSize(int64_t val, uint8_t current_int_size) {
-  if (current_int_size == 8 ||
-      (current_int_size < 8 &&
-       (val > static_cast<int64_t>(std::numeric_limits<int32_t>::max()) ||
-        val < static_cast<int64_t>(std::numeric_limits<int32_t>::min())))) {
-    return 8;
-  } else if (current_int_size == 4 ||
-             (current_int_size < 4 &&
-              (val > static_cast<int64_t>(std::numeric_limits<int16_t>::max()) ||
-               val < static_cast<int64_t>(std::numeric_limits<int16_t>::min())))) {
-    return 4;
-  } else if (current_int_size == 2 ||
-             (current_int_size == 1 &&
-              (val > static_cast<int64_t>(std::numeric_limits<int8_t>::max()) ||
-               val < static_cast<int64_t>(std::numeric_limits<int8_t>::min())))) {
-    return 2;
-  } else {
-    return 1;
-  }
-}
-
-// Check if we would need to expand the underlying storage type
-inline uint8_t ExpandedUIntSize(uint64_t val, uint8_t current_int_size) {
-  if (current_int_size == 8 ||
-      (current_int_size < 8 &&
-       (val > static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())))) {
-    return 8;
-  } else if (current_int_size == 4 ||
-             (current_int_size < 4 &&
-              (val > static_cast<uint64_t>(std::numeric_limits<uint16_t>::max())))) {
-    return 4;
-  } else if (current_int_size == 2 ||
-             (current_int_size == 1 &&
-              (val > static_cast<uint64_t>(std::numeric_limits<uint8_t>::max())))) {
-    return 2;
-  } else {
-    return 1;
-  }
-}
+  static constexpr int32_t pending_size_ = 1024;
+  uint8_t pending_valid_[pending_size_];
+  uint64_t pending_data_[pending_size_];
+  int32_t pending_pos_;
+  bool pending_has_nulls_;
+};
 
 }  // namespace internal
 
@@ -509,29 +479,12 @@ class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase
 
   /// Scalar append
   Status Append(const uint64_t val) {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    BitUtil::SetBit(null_bitmap_data_, length_);
+    pending_data_[pending_pos_] = val;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
 
-    uint8_t new_int_size = internal::ExpandedUIntSize(val, int_size_);
-    if (new_int_size != int_size_) {
-      ARROW_RETURN_NOT_OK(ExpandIntSize(new_int_size));
-    }
-
-    switch (int_size_) {
-      case 1:
-        reinterpret_cast<uint8_t*>(raw_data_)[length_++] = static_cast<uint8_t>(val);
-        break;
-      case 2:
-        reinterpret_cast<uint16_t*>(raw_data_)[length_++] = static_cast<uint16_t>(val);
-        break;
-      case 4:
-        reinterpret_cast<uint32_t*>(raw_data_)[length_++] = static_cast<uint32_t>(val);
-        break;
-      case 8:
-        reinterpret_cast<uint64_t*>(raw_data_)[length_++] = val;
-        break;
-      default:
-        return Status::NotImplemented("This code shall never be reached");
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
     }
     return Status::OK();
   }
@@ -548,8 +501,12 @@ class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
 
  protected:
+  Status CommitPendingData() override;
   Status ExpandIntSize(uint8_t new_int_size);
 
+  Status AppendValuesInternal(const uint64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
   template <typename new_type, typename old_type>
   typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
   ExpandIntSizeInternal();
@@ -572,29 +529,14 @@ class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase
 
   /// Scalar append
   Status Append(const int64_t val) {
-    ARROW_RETURN_NOT_OK(Reserve(1));
-    BitUtil::SetBit(null_bitmap_data_, length_);
+    auto v = static_cast<uint64_t>(val);
 
-    uint8_t new_int_size = internal::ExpandedIntSize(val, int_size_);
-    if (new_int_size != int_size_) {
-      ARROW_RETURN_NOT_OK(ExpandIntSize(new_int_size));
-    }
+    pending_data_[pending_pos_] = v;
+    pending_valid_[pending_pos_] = 1;
+    ++pending_pos_;
 
-    switch (int_size_) {
-      case 1:
-        reinterpret_cast<int8_t*>(raw_data_)[length_++] = static_cast<int8_t>(val);
-        break;
-      case 2:
-        reinterpret_cast<int16_t*>(raw_data_)[length_++] = static_cast<int16_t>(val);
-        break;
-      case 4:
-        reinterpret_cast<int32_t*>(raw_data_)[length_++] = static_cast<int32_t>(val);
-        break;
-      case 8:
-        reinterpret_cast<int64_t*>(raw_data_)[length_++] = val;
-        break;
-      default:
-        return Status::NotImplemented("This code shall never be reached");
+    if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
+      return CommitPendingData();
     }
     return Status::OK();
   }
@@ -611,8 +553,12 @@ class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
 
  protected:
+  Status CommitPendingData() override;
   Status ExpandIntSize(uint8_t new_int_size);
 
+  Status AppendValuesInternal(const int64_t* values, int64_t length,
+                              const uint8_t* valid_bytes);
+
   template <typename new_type, typename old_type>
   typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
   ExpandIntSizeInternal();
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index d785eee..6b9c359 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -91,6 +91,7 @@ ADD_ARROW_TEST(checked-cast-test)
 ADD_ARROW_TEST(compression-test)
 ADD_ARROW_TEST(decimal-test)
 ADD_ARROW_TEST(hashing-test)
+ADD_ARROW_TEST(int-util-test)
 ADD_ARROW_TEST(key-value-metadata-test)
 ADD_ARROW_TEST(lazy-test)
 ADD_ARROW_TEST(logging-test)
@@ -105,6 +106,7 @@ ADD_ARROW_BENCHMARK(bit-util-benchmark)
 ADD_ARROW_BENCHMARK(compression-benchmark)
 ADD_ARROW_BENCHMARK(decimal-benchmark)
 ADD_ARROW_BENCHMARK(hashing-benchmark)
+ADD_ARROW_BENCHMARK(int-util-benchmark)
 ADD_ARROW_BENCHMARK(lazy-benchmark)
 ADD_ARROW_BENCHMARK(number-parsing-benchmark)
 ADD_ARROW_BENCHMARK(utf8-util-benchmark)
diff --git a/cpp/src/arrow/util/int-util-benchmark.cc b/cpp/src/arrow/util/int-util-benchmark.cc
new file mode 100644
index 0000000..3feb2ee
--- /dev/null
+++ b/cpp/src/arrow/util/int-util-benchmark.cc
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/util/int-util.h"
+
+namespace arrow {
+namespace internal {
+
+std::vector<uint64_t> GetUIntSequence(int n_values, uint64_t addend = 0) {
+  std::vector<uint64_t> values(n_values);
+  for (int i = 0; i < n_values; ++i) {
+    values[i] = static_cast<uint64_t>(i) + addend;
+  }
+  return values;
+}
+
+std::vector<int64_t> GetIntSequence(int n_values, uint64_t addend = 0) {
+  std::vector<int64_t> values(n_values);
+  for (int i = 0; i < n_values; ++i) {
+    values[i] = static_cast<int64_t>(i) + addend;
+  }
+  return values;
+}
+
+std::vector<uint8_t> GetValidBytes(int n_values) {
+  std::vector<uint8_t> valid_bytes(n_values);
+  for (int i = 0; i < n_values; ++i) {
+    valid_bytes[i] = (i % 3 == 0) ? 1 : 0;
+  }
+  return valid_bytes;
+}
+
+static void BM_DetectUIntWidthNoNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  const auto values = GetUIntSequence(0x12345);
+
+  while (state.KeepRunning()) {
+    auto result = DetectUIntWidth(values.data(), static_cast<int64_t>(values.size()));
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+}
+
+static void BM_DetectUIntWidthNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  const auto values = GetUIntSequence(0x12345);
+  const auto valid_bytes = GetValidBytes(0x12345);
+
+  while (state.KeepRunning()) {
+    auto result = DetectUIntWidth(values.data(), valid_bytes.data(),
+                                  static_cast<int64_t>(values.size()));
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+}
+
+static void BM_DetectIntWidthNoNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  const auto values = GetIntSequence(0x12345, -0x1234);
+
+  while (state.KeepRunning()) {
+    auto result = DetectIntWidth(values.data(), static_cast<int64_t>(values.size()));
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+}
+
+static void BM_DetectIntWidthNulls(
+    benchmark::State& state) {  // NOLINT non-const reference
+  const auto values = GetIntSequence(0x12345, -0x1234);
+  const auto valid_bytes = GetValidBytes(0x12345);
+
+  while (state.KeepRunning()) {
+    auto result = DetectIntWidth(values.data(), valid_bytes.data(),
+                                 static_cast<int64_t>(values.size()));
+    benchmark::DoNotOptimize(result);
+  }
+  state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+}
+
+BENCHMARK(BM_DetectUIntWidthNoNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_DetectUIntWidthNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_DetectIntWidthNoNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_DetectIntWidthNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/int-util-test.cc b/cpp/src/arrow/util/int-util-test.cc
new file mode 100644
index 0000000..51fd96e
--- /dev/null
+++ b/cpp/src/arrow/util/int-util-test.cc
@@ -0,0 +1,379 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <random>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/test-util.h"
+#include "arrow/util/int-util.h"
+
+namespace arrow {
+namespace internal {
+
+static std::vector<uint8_t> all_widths = {1, 2, 4, 8};
+
+template <typename T>
+void CheckUIntWidth(const std::vector<T>& values, uint8_t expected_width) {
+  for (const uint8_t min_width : all_widths) {
+    uint8_t width =
+        DetectUIntWidth(values.data(), static_cast<int64_t>(values.size()), min_width);
+    ASSERT_EQ(width, std::max(min_width, expected_width));
+    width = DetectUIntWidth(values.data(), nullptr, static_cast<int64_t>(values.size()),
+                            min_width);
+    ASSERT_EQ(width, std::max(min_width, expected_width));
+  }
+}
+
+template <typename T>
+void CheckUIntWidth(const std::vector<T>& values, const std::vector<uint8_t>& valid_bytes,
+                    uint8_t expected_width) {
+  for (const uint8_t min_width : all_widths) {
+    uint8_t width = DetectUIntWidth(values.data(), valid_bytes.data(),
+                                    static_cast<int64_t>(values.size()), min_width);
+    ASSERT_EQ(width, std::max(min_width, expected_width));
+  }
+}
+
+template <typename T>
+void CheckIntWidth(const std::vector<T>& values, uint8_t expected_width) {
+  for (const uint8_t min_width : all_widths) {
+    uint8_t width =
+        DetectIntWidth(values.data(), static_cast<int64_t>(values.size()), min_width);
+    ASSERT_EQ(width, std::max(min_width, expected_width));
+    width = DetectIntWidth(values.data(), nullptr, static_cast<int64_t>(values.size()),
+                           min_width);
+    ASSERT_EQ(width, std::max(min_width, expected_width));
+  }
+}
+
+template <typename T>
+void CheckIntWidth(const std::vector<T>& values, const std::vector<uint8_t>& valid_bytes,
+                   uint8_t expected_width) {
+  for (const uint8_t min_width : all_widths) {
+    uint8_t width = DetectIntWidth(values.data(), valid_bytes.data(),
+                                   static_cast<int64_t>(values.size()), min_width);
+    ASSERT_EQ(width, std::max(min_width, expected_width));
+  }
+}
+
+template <typename T>
+std::vector<T> MakeRandomVector(const std::vector<T>& base_values, int n_values) {
+  std::default_random_engine gen(42);
+  std::uniform_int_distribution<int> index_dist(0,
+                                                static_cast<int>(base_values.size() - 1));
+
+  std::vector<T> values(n_values);
+  for (int i = 0; i < n_values; ++i) {
+    values[i] = base_values[index_dist(gen)];
+  }
+  return values;
+}
+
+template <typename T>
+std::vector<std::pair<std::vector<T>, std::vector<uint8_t>>> AlmostAllNullValues(
+    int n_values, T null_value, T non_null_value) {
+  std::vector<std::pair<std::vector<T>, std::vector<uint8_t>>> vectors;
+  vectors.reserve(n_values);
+  for (int i = 0; i < n_values; ++i) {
+    std::vector<T> values(n_values, null_value);
+    std::vector<uint8_t> valid_bytes(n_values, 0);
+    values[i] = non_null_value;
+    valid_bytes[i] = 1;
+    vectors.push_back({std::move(values), std::move(valid_bytes)});
+  }
+  return vectors;
+}
+
+template <typename T>
+std::vector<std::vector<T>> AlmostAllZeros(int n_values, T nonzero_value) {
+  std::vector<std::vector<T>> vectors;
+  vectors.reserve(n_values);
+  for (int i = 0; i < n_values; ++i) {
+    std::vector<T> values(n_values, 0);
+    values[i] = nonzero_value;
+    vectors.push_back(std::move(values));
+  }
+  return vectors;
+}
+
+std::vector<uint64_t> valid_uint8 = {0, 0x7f, 0xff};
+std::vector<uint64_t> valid_uint16 = {0, 0x7f, 0xff, 0x1000, 0xffff};
+std::vector<uint64_t> valid_uint32 = {0, 0x7f, 0xff, 0x10000, 0xffffffffULL};
+std::vector<uint64_t> valid_uint64 = {0, 0x100000000ULL, 0xffffffffffffffffULL};
+
+TEST(UIntWidth, NoNulls) {
+  std::vector<uint64_t> values{0, 0x7f, 0xff};
+  CheckUIntWidth(values, 1);
+
+  values = {0, 0x100};
+  CheckUIntWidth(values, 2);
+
+  values = {0, 0xffff};
+  CheckUIntWidth(values, 2);
+
+  values = {0, 0x10000};
+  CheckUIntWidth(values, 4);
+
+  values = {0, 0xffffffffULL};
+  CheckUIntWidth(values, 4);
+
+  values = {0, 0x100000000ULL};
+  CheckUIntWidth(values, 8);
+
+  values = {0, 0xffffffffffffffffULL};
+  CheckUIntWidth(values, 8);
+}
+
+TEST(UIntWidth, Nulls) {
+  std::vector<uint8_t> valid10{true, false};
+  std::vector<uint8_t> valid01{false, true};
+
+  std::vector<uint64_t> values{0, 0xff};
+  CheckUIntWidth(values, valid01, 1);
+  CheckUIntWidth(values, valid10, 1);
+
+  values = {0, 0x100};
+  CheckUIntWidth(values, valid01, 2);
+  CheckUIntWidth(values, valid10, 1);
+
+  values = {0, 0xffff};
+  CheckUIntWidth(values, valid01, 2);
+  CheckUIntWidth(values, valid10, 1);
+
+  values = {0, 0x10000};
+  CheckUIntWidth(values, valid01, 4);
+  CheckUIntWidth(values, valid10, 1);
+
+  values = {0, 0xffffffffULL};
+  CheckUIntWidth(values, valid01, 4);
+  CheckUIntWidth(values, valid10, 1);
+
+  values = {0, 0x100000000ULL};
+  CheckUIntWidth(values, valid01, 8);
+  CheckUIntWidth(values, valid10, 1);
+
+  values = {0, 0xffffffffffffffffULL};
+  CheckUIntWidth(values, valid01, 8);
+  CheckUIntWidth(values, valid10, 1);
+}
+
+TEST(UIntWidth, NoNullsMany) {
+  constexpr int N = 40;
+  for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xff)) {
+    CheckUIntWidth(values, 1);
+  }
+  for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffff)) {
+    CheckUIntWidth(values, 2);
+  }
+  for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffffffffULL)) {
+    CheckUIntWidth(values, 4);
+  }
+  for (const auto& values : AlmostAllZeros<uint64_t>(N, 0xffffffffffffffffULL)) {
+    CheckUIntWidth(values, 8);
+  }
+  auto values = MakeRandomVector(valid_uint8, N);
+  CheckUIntWidth(values, 1);
+
+  values = MakeRandomVector(valid_uint16, N);
+  CheckUIntWidth(values, 2);
+
+  values = MakeRandomVector(valid_uint32, N);
+  CheckUIntWidth(values, 4);
+
+  values = MakeRandomVector(valid_uint64, N);
+  CheckUIntWidth(values, 8);
+}
+
+TEST(UIntWidth, NullsMany) {
+  constexpr uint64_t huge = 0x123456789abcdefULL;
+  constexpr int N = 40;
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xff)) {
+    CheckUIntWidth(p.first, p.second, 1);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xff)) {
+    CheckUIntWidth(p.first, p.second, 1);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffff)) {
+    CheckUIntWidth(p.first, p.second, 2);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffff)) {
+    CheckUIntWidth(p.first, p.second, 2);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffffffffULL)) {
+    CheckUIntWidth(p.first, p.second, 4);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffffffffULL)) {
+    CheckUIntWidth(p.first, p.second, 4);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, 0, 0xffffffffffffffffULL)) {
+    CheckUIntWidth(p.first, p.second, 8);
+  }
+  for (const auto& p : AlmostAllNullValues<uint64_t>(N, huge, 0xffffffffffffffffULL)) {
+    CheckUIntWidth(p.first, p.second, 8);
+  }
+}
+
+TEST(IntWidth, NoNulls) {
+  std::vector<int64_t> values{0, 0x7f, -0x80};
+  CheckIntWidth(values, 1);
+
+  values = {0, 0x80};
+  CheckIntWidth(values, 2);
+
+  values = {0, -0x81};
+  CheckIntWidth(values, 2);
+
+  values = {0, 0x7fff, -0x8000};
+  CheckIntWidth(values, 2);
+
+  values = {0, 0x8000};
+  CheckIntWidth(values, 4);
+
+  values = {0, -0x8001};
+  CheckIntWidth(values, 4);
+
+  values = {0, 0x7fffffffLL, -0x80000000LL};
+  CheckIntWidth(values, 4);
+
+  values = {0, 0x80000000LL};
+  CheckIntWidth(values, 8);
+
+  values = {0, -0x80000001LL};
+  CheckIntWidth(values, 8);
+
+  values = {0, 0x7fffffffffffffffLL, -0x7fffffffffffffffLL - 1};
+  CheckIntWidth(values, 8);
+}
+
+TEST(IntWidth, Nulls) {
+  std::vector<uint8_t> valid100{true, false, false};
+  std::vector<uint8_t> valid010{false, true, false};
+  std::vector<uint8_t> valid001{false, false, true};
+
+  std::vector<int64_t> values{0, 0x7f, -0x80};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 1);
+  CheckIntWidth(values, valid001, 1);
+
+  values = {0, 0x80, -0x81};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 2);
+  CheckIntWidth(values, valid001, 2);
+
+  values = {0, 0x7fff, -0x8000};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 2);
+  CheckIntWidth(values, valid001, 2);
+
+  values = {0, 0x8000, -0x8001};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 4);
+  CheckIntWidth(values, valid001, 4);
+
+  values = {0, 0x7fffffffLL, -0x80000000LL};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 4);
+  CheckIntWidth(values, valid001, 4);
+
+  values = {0, 0x80000000LL, -0x80000001LL};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 8);
+  CheckIntWidth(values, valid001, 8);
+
+  values = {0, 0x7fffffffffffffffLL, -0x7fffffffffffffffLL - 1};
+  CheckIntWidth(values, valid100, 1);
+  CheckIntWidth(values, valid010, 8);
+  CheckIntWidth(values, valid001, 8);
+}
+
+TEST(IntWidth, NoNullsMany) {
+  constexpr int N = 40;
+  // 1 byte wide
+  for (const int64_t value : {0x7f, -0x80}) {
+    for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
+      CheckIntWidth(values, 1);
+    }
+  }
+  // 2 bytes wide
+  for (const int64_t value : {0x80, -0x81, 0x7fff, -0x8000}) {
+    for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
+      CheckIntWidth(values, 2);
+    }
+  }
+  // 4 bytes wide
+  for (const int64_t value : {0x8000LL, -0x8001LL, 0x7fffffffLL, -0x80000000LL}) {
+    for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
+      CheckIntWidth(values, 4);
+    }
+  }
+  // 8 bytes wide
+  for (const int64_t value : {0x80000000LL, -0x80000001LL, 0x7fffffffffffffffLL}) {
+    for (const auto& values : AlmostAllZeros<int64_t>(N, value)) {
+      CheckIntWidth(values, 8);
+    }
+  }
+}
+
+TEST(IntWidth, NullsMany) {
+  constexpr int64_t huge = 0x123456789abcdefLL;
+  constexpr int N = 40;
+  // 1 byte wide
+  for (const int64_t value : {0x7f, -0x80}) {
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
+      CheckIntWidth(p.first, p.second, 1);
+    }
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
+      CheckIntWidth(p.first, p.second, 1);
+    }
+  }
+  // 2 bytes wide
+  for (const int64_t value : {0x80, -0x81, 0x7fff, -0x8000}) {
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
+      CheckIntWidth(p.first, p.second, 2);
+    }
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
+      CheckIntWidth(p.first, p.second, 2);
+    }
+  }
+  // 4 bytes wide
+  for (const int64_t value : {0x8000LL, -0x8001LL, 0x7fffffffLL, -0x80000000LL}) {
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
+      CheckIntWidth(p.first, p.second, 4);
+    }
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
+      CheckIntWidth(p.first, p.second, 4);
+    }
+  }
+  // 8 bytes wide
+  for (const int64_t value : {0x80000000LL, -0x80000001LL, 0x7fffffffffffffffLL}) {
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, 0, value)) {
+      CheckIntWidth(p.first, p.second, 8);
+    }
+    for (const auto& p : AlmostAllNullValues<int64_t>(N, huge, value)) {
+      CheckIntWidth(p.first, p.second, 8);
+    }
+  }
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/int-util.cc b/cpp/src/arrow/util/int-util.cc
new file mode 100644
index 0000000..ced1cd1
--- /dev/null
+++ b/cpp/src/arrow/util/int-util.cc
@@ -0,0 +1,406 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/int-util.h"
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+
+#include "arrow/util/bit-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+static constexpr uint64_t max_uint8 =
+    static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
+static constexpr uint64_t max_uint16 =
+    static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
+static constexpr uint64_t max_uint32 =
+    static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
+static constexpr uint64_t max_uint64 = std::numeric_limits<uint64_t>::max();
+
+static constexpr uint64_t mask_uint8 = ~0xffULL;
+static constexpr uint64_t mask_uint16 = ~0xffffULL;
+static constexpr uint64_t mask_uint32 = ~0xffffffffULL;
+
+//
+// Unsigned integer width detection
+//
+
+static const uint64_t max_uints[] = {0, max_uint8, max_uint16, 0,         max_uint32,
+                                     0, 0,         0,          max_uint64};
+
+// Check if we would need to expand the underlying storage type
+inline uint8_t ExpandedUIntWidth(uint64_t val, uint8_t current_width) {
+  // Optimize for the common case where width doesn't change
+  if (ARROW_PREDICT_TRUE(val <= max_uints[current_width])) {
+    return current_width;
+  }
+  if (current_width == 1 && val <= max_uint8) {
+    return 1;
+  } else if (current_width <= 2 && val <= max_uint16) {
+    return 2;
+  } else if (current_width <= 4 && val <= max_uint32) {
+    return 4;
+  } else {
+    return 8;
+  }
+}
+
+uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width) {
+  uint8_t width = min_width;
+  if (min_width < 8) {
+    auto p = values;
+    const auto end = p + length;
+    while (p <= end - 16) {
+      // This is probably SIMD-izable
+      auto u = p[0];
+      auto v = p[1];
+      auto w = p[2];
+      auto x = p[3];
+      u |= p[4];
+      v |= p[5];
+      w |= p[6];
+      x |= p[7];
+      u |= p[8];
+      v |= p[9];
+      w |= p[10];
+      x |= p[11];
+      u |= p[12];
+      v |= p[13];
+      w |= p[14];
+      x |= p[15];
+      p += 16;
+      width = ExpandedUIntWidth(u | v | w | x, width);
+      if (ARROW_PREDICT_FALSE(width == 8)) {
+        break;
+      }
+    }
+    if (p <= end - 8) {
+      auto u = p[0];
+      auto v = p[1];
+      auto w = p[2];
+      auto x = p[3];
+      u |= p[4];
+      v |= p[5];
+      w |= p[6];
+      x |= p[7];
+      p += 8;
+      width = ExpandedUIntWidth(u | v | w | x, width);
+    }
+    while (p < end) {
+      width = ExpandedUIntWidth(*p++, width);
+    }
+  }
+  return width;
+}
+
+uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
+                        int64_t length, uint8_t min_width) {
+  if (valid_bytes == nullptr) {
+    return DetectUIntWidth(values, length, min_width);
+  }
+  uint8_t width = min_width;
+  if (min_width < 8) {
+    auto p = values;
+    const auto end = p + length;
+    auto b = valid_bytes;
+
+#define MASK(p, b, i) p[i] * (b[i] != 0)
+
+    while (p <= end - 8) {
+      // This is probably be SIMD-izable
+      auto u = MASK(p, b, 0);
+      auto v = MASK(p, b, 1);
+      auto w = MASK(p, b, 2);
+      auto x = MASK(p, b, 3);
+      u |= MASK(p, b, 4);
+      v |= MASK(p, b, 5);
+      w |= MASK(p, b, 6);
+      x |= MASK(p, b, 7);
+      b += 8;
+      p += 8;
+      width = ExpandedUIntWidth(u | v | w | x, width);
+      if (ARROW_PREDICT_FALSE(width == 8)) {
+        break;
+      }
+    }
+    uint64_t mask = 0;
+    while (p < end) {
+      mask |= MASK(p, b, 0);
+      ++b;
+      ++p;
+    }
+    width = ExpandedUIntWidth(mask, width);
+
+#undef MASK
+  }
+  return width;
+}
+
+//
+// Signed integer width detection
+//
+
+uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width) {
+  if (min_width == 8) {
+    return min_width;
+  }
+  uint8_t width = min_width;
+
+  auto p = values;
+  const auto end = p + length;
+  // Strategy: to determine whether `x` is between -0x80 and 0x7f,
+  // we determine whether `x + 0x80` is between 0x00 and 0xff.  The
+  // latter can be done with a simple AND mask with ~0xff and, more
+  // importantly, can be computed in a single step over multiple ORed
+  // values (so we can branch once every N items instead of once every item).
+  // This strategy could probably lend itself to explicit SIMD-ization,
+  // if more performance is needed.
+  constexpr uint64_t addend8 = 0x80ULL;
+  constexpr uint64_t addend16 = 0x8000ULL;
+  constexpr uint64_t addend32 = 0x80000000ULL;
+
+  auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
+    auto v = *p++;
+    if (ARROW_PREDICT_FALSE(((v + addend) & test_mask) != 0)) {
+      --p;
+      return false;
+    } else {
+      return true;
+    }
+  };
+
+  auto test_four_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
+    auto mask = (p[0] + addend) | (p[1] + addend) | (p[2] + addend) | (p[3] + addend);
+    p += 4;
+    if (ARROW_PREDICT_FALSE((mask & test_mask) != 0)) {
+      p -= 4;
+      return false;
+    } else {
+      return true;
+    }
+  };
+
+  if (width == 1) {
+    while (p <= end - 4) {
+      if (!test_four_items(addend8, mask_uint8)) {
+        width = 2;
+        goto width2;
+      }
+    }
+    while (p < end) {
+      if (!test_one_item(addend8, mask_uint8)) {
+        width = 2;
+        goto width2;
+      }
+    }
+    return 1;
+  }
+width2:
+  if (width == 2) {
+    while (p <= end - 4) {
+      if (!test_four_items(addend16, mask_uint16)) {
+        width = 4;
+        goto width4;
+      }
+    }
+    while (p < end) {
+      if (!test_one_item(addend16, mask_uint16)) {
+        width = 4;
+        goto width4;
+      }
+    }
+    return 2;
+  }
+width4:
+  if (width == 4) {
+    while (p <= end - 4) {
+      if (!test_four_items(addend32, mask_uint32)) {
+        width = 8;
+        goto width8;
+      }
+    }
+    while (p < end) {
+      if (!test_one_item(addend32, mask_uint32)) {
+        width = 8;
+        goto width8;
+      }
+    }
+    return 4;
+  }
+width8:
+  return 8;
+}
+
+uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
+                       uint8_t min_width) {
+  if (valid_bytes == nullptr) {
+    return DetectIntWidth(values, length, min_width);
+  }
+
+  if (min_width == 8) {
+    return min_width;
+  }
+  uint8_t width = min_width;
+
+  auto p = values;
+  const auto end = p + length;
+  auto b = valid_bytes;
+  // Strategy is similar to the no-nulls case above, but we also
+  // have to zero any incoming items that have a zero validity byte.
+  constexpr uint64_t addend8 = 0x80ULL;
+  constexpr uint64_t addend16 = 0x8000ULL;
+  constexpr uint64_t addend32 = 0x80000000ULL;
+
+#define MASK(p, b, addend, i) (p[i] + addend) * (b[i] != 0)
+
+  auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
+    auto v = MASK(p, b, addend, 0);
+    ++b;
+    ++p;
+    if (ARROW_PREDICT_FALSE((v & test_mask) != 0)) {
+      --b;
+      --p;
+      return false;
+    } else {
+      return true;
+    }
+  };
+
+  auto test_eight_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
+    auto mask1 = MASK(p, b, addend, 0) | MASK(p, b, addend, 1) | MASK(p, b, addend, 2) |
+                 MASK(p, b, addend, 3);
+    auto mask2 = MASK(p, b, addend, 4) | MASK(p, b, addend, 5) | MASK(p, b, addend, 6) |
+                 MASK(p, b, addend, 7);
+    b += 8;
+    p += 8;
+    if (ARROW_PREDICT_FALSE(((mask1 | mask2) & test_mask) != 0)) {
+      b -= 8;
+      p -= 8;
+      return false;
+    } else {
+      return true;
+    }
+  };
+
+#undef MASK
+
+  if (width == 1) {
+    while (p <= end - 8) {
+      if (!test_eight_items(addend8, mask_uint8)) {
+        width = 2;
+        goto width2;
+      }
+    }
+    while (p < end) {
+      if (!test_one_item(addend8, mask_uint8)) {
+        width = 2;
+        goto width2;
+      }
+    }
+    return 1;
+  }
+width2:
+  if (width == 2) {
+    while (p <= end - 8) {
+      if (!test_eight_items(addend16, mask_uint16)) {
+        width = 4;
+        goto width4;
+      }
+    }
+    while (p < end) {
+      if (!test_one_item(addend16, mask_uint16)) {
+        width = 4;
+        goto width4;
+      }
+    }
+    return 2;
+  }
+width4:
+  if (width == 4) {
+    while (p <= end - 8) {
+      if (!test_eight_items(addend32, mask_uint32)) {
+        width = 8;
+        goto width8;
+      }
+    }
+    while (p < end) {
+      if (!test_one_item(addend32, mask_uint32)) {
+        width = 8;
+        goto width8;
+      }
+    }
+    return 4;
+  }
+width8:
+  return 8;
+}
+
+template <typename Source, typename Dest>
+inline void DowncastIntsInternal(const Source* src, Dest* dest, int64_t length) {
+  while (length >= 4) {
+    dest[0] = static_cast<Dest>(src[0]);
+    dest[1] = static_cast<Dest>(src[1]);
+    dest[2] = static_cast<Dest>(src[2]);
+    dest[3] = static_cast<Dest>(src[3]);
+    length -= 4;
+    src += 4;
+    dest += 4;
+  }
+  while (length > 0) {
+    *dest++ = static_cast<Dest>(*src++);
+    --length;
+  }
+}
+
+void DowncastInts(const int64_t* source, int8_t* dest, int64_t length) {
+  DowncastIntsInternal(source, dest, length);
+}
+
+void DowncastInts(const int64_t* source, int16_t* dest, int64_t length) {
+  DowncastIntsInternal(source, dest, length);
+}
+
+void DowncastInts(const int64_t* source, int32_t* dest, int64_t length) {
+  DowncastIntsInternal(source, dest, length);
+}
+
+void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
+  memcpy(dest, source, length * sizeof(int64_t));
+}
+
+void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length) {
+  DowncastIntsInternal(source, dest, length);
+}
+
+void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length) {
+  DowncastIntsInternal(source, dest, length);
+}
+
+void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length) {
+  DowncastIntsInternal(source, dest, length);
+}
+
+void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
+  memcpy(dest, source, length * sizeof(int64_t));
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/int-util.h b/cpp/src/arrow/util/int-util.h
new file mode 100644
index 0000000..68355d3
--- /dev/null
+++ b/cpp/src/arrow/util/int-util.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_INT_UTIL_H
+#define ARROW_UTIL_INT_UTIL_H
+
+#include <cstdint>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+ARROW_EXPORT
+uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
+                        int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
+                       uint8_t min_width = 1);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int8_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int16_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int32_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int64_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length);
+
+}  // namespace internal
+}  // namespace arrow
+
+#endif  // ARROW_UTIL_INT_UTIL_H