You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/18 15:38:08 UTC

[arrow] branch master updated: ARROW-5102: [C++] Reduce header dependencies

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a55621  ARROW-5102: [C++] Reduce header dependencies
7a55621 is described below

commit 7a5562174cffb21b16f990f64d114c1a94a30556
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Sat May 18 10:37:55 2019 -0500

    ARROW-5102: [C++] Reduce header dependencies
    
    This is a first attempt at making our headers lighter-weight.
    
    The benefits are currently meagre: I get a 4% speedup when compiling Arrow + Parquet.
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #4313 from pitrou/ARROW-5102-reduce-header-deps and squashes the following commits:
    
    4937d9fd2 <Antoine Pitrou> ARROW-5102:  Reduce header dependencies
---
 c_glib/arrow-glib/compute.cpp                      |   2 +
 cpp/src/arrow/CMakeLists.txt                       |   2 +
 cpp/src/arrow/adapters/orc/adapter.cc              |   1 +
 cpp/src/arrow/adapters/orc/adapter_util.cc         |   1 +
 cpp/src/arrow/api.h                                |  31 ++--
 cpp/src/arrow/array-test.cc                        |   1 +
 cpp/src/arrow/array/builder_base.cc                |   1 -
 cpp/src/arrow/array/builder_binary.cc              |   1 -
 cpp/src/arrow/array/builder_binary.h               |   1 -
 cpp/src/arrow/array/builder_decimal.cc             |   1 -
 cpp/src/arrow/array/builder_dict.cc                |   1 -
 cpp/src/arrow/array/builder_nested.cc              |   1 -
 cpp/src/arrow/array/builder_primitive.cc           |   1 -
 cpp/src/arrow/buffer-builder.h                     |  13 --
 cpp/src/arrow/buffer.cc                            |   1 +
 cpp/src/arrow/buffer.h                             |   1 -
 cpp/src/arrow/builder.cc                           |   1 -
 cpp/src/arrow/compute/compute-benchmark.cc         |   1 +
 cpp/src/arrow/compute/kernels/boolean-test.cc      |   2 +-
 cpp/src/arrow/csv/parser.cc                        |   1 -
 cpp/src/arrow/csv/test-common.h                    |   7 +-
 cpp/src/arrow/extension_type-test.cc               |   1 +
 cpp/src/arrow/extension_type.cc                    |   1 +
 cpp/src/arrow/flight/perf-server.cc                |   6 +-
 cpp/src/arrow/flight/test-integration-client.cc    |   2 +
 cpp/src/arrow/flight/test-server.cc                |   1 +
 cpp/src/arrow/gpu/cuda_common.h                    |   9 -
 cpp/src/arrow/io/compressed-test.cc                |   1 +
 cpp/src/arrow/io/file-test.cc                      |   7 +-
 cpp/src/arrow/io/memory.cc                         |   4 +
 cpp/src/arrow/io/memory.h                          |   4 +-
 cpp/src/arrow/io/{test-common.h => test-common.cc} |  63 +++----
 cpp/src/arrow/io/test-common.h                     | 121 +++----------
 cpp/src/arrow/ipc/json-internal.h                  |   1 -
 cpp/src/arrow/ipc/json-test.cc                     |   2 +-
 cpp/src/arrow/ipc/metadata-internal.cc             |   1 +
 cpp/src/arrow/ipc/read-write-test.cc               |   3 +
 cpp/src/arrow/json/parser.h                        |   1 +
 cpp/src/arrow/memory_pool.cc                       |   1 -
 cpp/src/arrow/pretty_print.h                       |   2 +-
 cpp/src/arrow/python/arrow_to_pandas.cc            |   1 -
 cpp/src/arrow/python/common.cc                     |   2 +-
 cpp/src/arrow/python/common.h                      |   2 -
 cpp/src/arrow/python/deserialize.cc                |   1 -
 cpp/src/arrow/python/inference.cc                  |   1 -
 cpp/src/arrow/python/inference.h                   |   2 -
 cpp/src/arrow/python/numpy-internal.h              |   1 +
 cpp/src/arrow/python/numpy_convert.cc              |   1 -
 cpp/src/arrow/python/numpy_to_arrow.cc             |   1 -
 cpp/src/arrow/python/platform.h                    |   1 -
 cpp/src/arrow/python/util/datetime.h               |   1 -
 cpp/src/arrow/record_batch.cc                      |   8 +-
 cpp/src/arrow/record_batch.h                       |  14 +-
 cpp/src/arrow/status.cc                            |  16 +-
 cpp/src/arrow/status.h                             |   3 +
 cpp/src/arrow/table.cc                             |   1 -
 cpp/src/arrow/tensor.cc                            |   1 +
 cpp/src/arrow/testing/gtest_util.cc                |  38 ++--
 cpp/src/arrow/testing/gtest_util.h                 |  57 ++----
 cpp/src/arrow/testing/random.h                     |  34 ++++
 cpp/src/arrow/testing/util.cc                      |   9 +
 cpp/src/arrow/testing/util.h                       |  72 +-------
 cpp/src/arrow/type-test.cc                         |   1 +
 cpp/src/arrow/type.cc                              | 123 ++++++++++---
 cpp/src/arrow/type.h                               |  60 +++----
 cpp/src/arrow/type_fwd.h                           |   1 +
 cpp/src/arrow/util/compression_brotli.cc           |   1 -
 cpp/src/arrow/util/compression_lz4.cc              |   1 -
 cpp/src/arrow/util/compression_snappy.cc           |   1 -
 cpp/src/arrow/util/compression_zlib.cc             |   1 -
 cpp/src/arrow/util/compression_zstd.cc             |   1 -
 cpp/src/arrow/util/concatenate.cc                  |   1 +
 cpp/src/arrow/util/decimal.cc                      |   6 +
 cpp/src/arrow/util/decimal.h                       |   9 +-
 cpp/src/arrow/util/io-util.cc                      | 194 ++++++++++++++++++---
 cpp/src/arrow/util/io-util.h                       | 118 +++++--------
 cpp/src/arrow/util/lazy-benchmark.cc               |   1 +
 cpp/src/arrow/util/lazy-test.cc                    |   1 +
 cpp/src/arrow/util/logging.h                       |  11 +-
 cpp/src/arrow/util/parsing.h                       |   2 -
 .../{python/platform.h => util/string_builder.cc}  |  33 ++--
 cpp/src/arrow/util/string_builder.h                |  34 +++-
 cpp/src/arrow/util/thread-pool.cc                  | 117 +++++++------
 cpp/src/arrow/util/thread-pool.h                   |  16 +-
 cpp/src/arrow/visitor_inline.h                     |   1 -
 cpp/src/gandiva/tests/literal_test.cc              |   2 +-
 cpp/src/gandiva/tests/projector_test.cc            |   6 +-
 cpp/src/parquet/arrow/arrow-reader-writer-test.cc  |  10 --
 cpp/src/parquet/arrow/test-util.h                  |   1 +
 89 files changed, 683 insertions(+), 640 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 73adccb..c97485f 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -21,6 +21,8 @@
 #  include <config.h>
 #endif
 
+#include <sstream>
+
 #include <arrow-glib/array.hpp>
 #include <arrow-glib/compute.hpp>
 #include <arrow-glib/data-type.hpp>
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d60514d..619c9eb 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -132,6 +132,7 @@ set(ARROW_SRCS
     util/logging.cc
     util/key_value_metadata.cc
     util/memory.cc
+    util/string_builder.cc
     util/task-group.cc
     util/thread-pool.cc
     util/trie.cc
@@ -283,6 +284,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
   # that depend on gtest
   add_arrow_lib(arrow_testing
                 SOURCES
+                io/test-common.cc
                 ipc/test-common.cc
                 filesystem/test-util.cc
                 testing/gtest_util.cc
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index a4311bb..d72c16a 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -40,6 +40,7 @@
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/lazy.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.cc b/cpp/src/arrow/adapters/orc/adapter_util.cc
index 235e5ba..313c029 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_util.cc
@@ -21,6 +21,7 @@
 #include "arrow/adapters/orc/adapter_util.h"
 #include "arrow/array/builder_base.h"
 #include "arrow/builder.h"
+#include "arrow/status.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/lazy.h"
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 7c8b10a..3d6a179 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -20,21 +20,22 @@
 #ifndef ARROW_API_H
 #define ARROW_API_H
 
-#include "arrow/array.h"           // IYWU pragma: export
-#include "arrow/buffer.h"          // IYWU pragma: export
-#include "arrow/builder.h"         // IYWU pragma: export
-#include "arrow/compare.h"         // IYWU pragma: export
-#include "arrow/extension_type.h"  // IYWU pragma: export
-#include "arrow/memory_pool.h"     // IYWU pragma: export
-#include "arrow/pretty_print.h"    // IYWU pragma: export
-#include "arrow/record_batch.h"    // IYWU pragma: export
-#include "arrow/status.h"          // IYWU pragma: export
-#include "arrow/table.h"           // IYWU pragma: export
-#include "arrow/table_builder.h"   // IYWU pragma: export
-#include "arrow/tensor.h"          // IYWU pragma: export
-#include "arrow/type.h"            // IYWU pragma: export
-#include "arrow/util/config.h"     // IYWU pragma: export
-#include "arrow/visitor.h"         // IYWU pragma: export
+#include "arrow/array.h"                    // IYWU pragma: export
+#include "arrow/buffer.h"                   // IYWU pragma: export
+#include "arrow/builder.h"                  // IYWU pragma: export
+#include "arrow/compare.h"                  // IYWU pragma: export
+#include "arrow/extension_type.h"           // IYWU pragma: export
+#include "arrow/memory_pool.h"              // IYWU pragma: export
+#include "arrow/pretty_print.h"             // IYWU pragma: export
+#include "arrow/record_batch.h"             // IYWU pragma: export
+#include "arrow/status.h"                   // IYWU pragma: export
+#include "arrow/table.h"                    // IYWU pragma: export
+#include "arrow/table_builder.h"            // IYWU pragma: export
+#include "arrow/tensor.h"                   // IYWU pragma: export
+#include "arrow/type.h"                     // IYWU pragma: export
+#include "arrow/util/config.h"              // IYWU pragma: export
+#include "arrow/util/key_value_metadata.h"  // IWYU pragma: export
+#include "arrow/visitor.h"                  // IYWU pragma: export
 
 /// \brief Top-level namespace for Apache Arrow C++ API
 namespace arrow {}
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 2d1ab48..a6a20c0 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -38,6 +38,7 @@
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_common.h"
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index fb13a88..2282937 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -21,7 +21,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <sstream>
 #include <utility>
 #include <vector>
 
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 4a8ea40..88c2e86 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -22,7 +22,6 @@
 #include <cstdint>
 #include <cstring>
 #include <numeric>
-#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index facaf4a..a04e308 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -19,7 +19,6 @@
 
 #include <limits>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <vector>
 
diff --git a/cpp/src/arrow/array/builder_decimal.cc b/cpp/src/arrow/array/builder_decimal.cc
index 191a0ff..6a46556 100644
--- a/cpp/src/arrow/array/builder_decimal.cc
+++ b/cpp/src/arrow/array/builder_decimal.cc
@@ -23,7 +23,6 @@
 #include <cstring>
 #include <memory>
 #include <numeric>
-#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc
index e4267bf..648b6ff 100644
--- a/cpp/src/arrow/array/builder_dict.cc
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -19,7 +19,6 @@
 
 #include <cstdint>
 #include <limits>
-#include <sstream>
 #include <type_traits>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
index 9ef0f4d..dd88a7a 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -21,7 +21,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <sstream>
 #include <utility>
 #include <vector>
 
diff --git a/cpp/src/arrow/array/builder_primitive.cc b/cpp/src/arrow/array/builder_primitive.cc
index 13e8f2e..d4def92 100644
--- a/cpp/src/arrow/array/builder_primitive.cc
+++ b/cpp/src/arrow/array/builder_primitive.cc
@@ -21,7 +21,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <sstream>
 #include <utility>
 #include <vector>
 
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
index 32d7804..376e078 100644
--- a/cpp/src/arrow/buffer-builder.h
+++ b/cpp/src/arrow/buffer-builder.h
@@ -19,7 +19,6 @@
 #define ARROW_BUFFER_BUILDER_H
 
 #include <algorithm>
-#include <array>
 #include <cstdint>
 #include <cstring>
 #include <memory>
@@ -110,18 +109,6 @@ class ARROW_EXPORT BufferBuilder {
     return Status::OK();
   }
 
-  /// \brief Append the given data to the buffer
-  ///
-  /// The buffer is automatically expanded if necessary.
-  template <size_t NBYTES>
-  Status Append(const std::array<uint8_t, NBYTES>& data) {
-    constexpr auto nbytes = static_cast<int64_t>(NBYTES);
-    ARROW_RETURN_NOT_OK(Reserve(NBYTES));
-    std::copy(data.cbegin(), data.cend(), data_ + size_);
-    size_ += nbytes;
-    return Status::OK();
-  }
-
   // Advance pointer and zero out memory
   Status Advance(const int64_t length) { return Append(length, 0); }
 
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 9e9bd2e..e93333e 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/buffer.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <utility>
 
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 20a7969..07b2f09 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -18,7 +18,6 @@
 #ifndef ARROW_BUFFER_H
 #define ARROW_BUFFER_H
 
-#include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <memory>
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 2bf6178..2a3a1ad 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -17,7 +17,6 @@
 
 #include "arrow/builder.h"
 
-#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc
index 6412c54..c14f706 100644
--- a/cpp/src/arrow/compute/compute-benchmark.cc
+++ b/cpp/src/arrow/compute/compute-benchmark.cc
@@ -22,6 +22,7 @@
 #include "arrow/builder.h"
 #include "arrow/memory_pool.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 
 #include "arrow/compute/context.h"
diff --git a/cpp/src/arrow/compute/kernels/boolean-test.cc b/cpp/src/arrow/compute/kernels/boolean-test.cc
index 824a0d5..1b678bb 100644
--- a/cpp/src/arrow/compute/kernels/boolean-test.cc
+++ b/cpp/src/arrow/compute/kernels/boolean-test.cc
@@ -122,7 +122,7 @@ TEST_F(TestBooleanKernel, Invert) {
   ASSERT_OK(Invert(&this->ctx_, ca1, &result));
   ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
   std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
-  ASSERT_ARRAYS_EQUAL(*ca2, *result_ca);
+  AssertChunkedEqual(*ca2, *result_ca);
 }
 
 TEST_F(TestBooleanKernel, InvertEmptyArray) {
diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index b1d175a..a7ca71c 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -19,7 +19,6 @@
 
 #include <algorithm>
 #include <cstdio>
-#include <sstream>
 #include <utility>
 
 #include "arrow/memory_pool.h"
diff --git a/cpp/src/arrow/csv/test-common.h b/cpp/src/arrow/csv/test-common.h
index a8fdb43..624023f 100644
--- a/cpp/src/arrow/csv/test-common.h
+++ b/cpp/src/arrow/csv/test-common.h
@@ -19,7 +19,6 @@
 #define ARROW_CSV_TEST_COMMON_H
 
 #include <memory>
-#include <sstream>
 #include <string>
 #include <vector>
 
@@ -30,11 +29,11 @@ namespace arrow {
 namespace csv {
 
 std::string MakeCSVData(std::vector<std::string> lines) {
-  std::stringstream ss;
+  std::string s;
   for (const auto& line : lines) {
-    ss << line;
+    s += line;
   }
-  return ss.str();
+  return s;
 }
 
 // Make a BlockParser from a vector of lines representing a CSV file
diff --git a/cpp/src/arrow/extension_type-test.cc b/cpp/src/arrow/extension_type-test.cc
index 1c917ea..90f96cd 100644
--- a/cpp/src/arrow/extension_type-test.cc
+++ b/cpp/src/arrow/extension_type-test.cc
@@ -42,6 +42,7 @@
 #include "arrow/testing/gtest_common.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 5f52ffa..5a63459 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -25,6 +25,7 @@
 #include <utility>
 
 #include "arrow/array.h"
+#include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/visibility.h"
 
diff --git a/cpp/src/arrow/flight/perf-server.cc b/cpp/src/arrow/flight/perf-server.cc
index b2c268b..3755f3d 100644
--- a/cpp/src/arrow/flight/perf-server.cc
+++ b/cpp/src/arrow/flight/perf-server.cc
@@ -29,7 +29,9 @@
 #include "arrow/io/test-common.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/logging.h"
 
 #include "arrow/flight/api.h"
 #include "arrow/flight/internal.h"
@@ -125,8 +127,10 @@ Status GetPerfBatches(const perf::Token& token, const std::shared_ptr<Schema>& s
   const int32_t length = token.definition().records_per_batch();
   const int32_t ncolumns = 4;
   for (int i = 0; i < ncolumns; ++i) {
-    RETURN_NOT_OK(MakeRandomBuffer<int64_t>(length, default_memory_pool(), &buffer));
+    RETURN_NOT_OK(MakeRandomByteBuffer(length * sizeof(int64_t), default_memory_pool(),
+                                       &buffer, static_cast<int32_t>(i) /* seed */));
     arrays.push_back(std::make_shared<Int64Array>(length, buffer));
+    RETURN_NOT_OK(ValidateArray(*arrays.back()));
   }
 
   *data_stream = std::unique_ptr<FlightDataStream>(
diff --git a/cpp/src/arrow/flight/test-integration-client.cc b/cpp/src/arrow/flight/test-integration-client.cc
index 66af90a..93a1a16 100644
--- a/cpp/src/arrow/flight/test-integration-client.cc
+++ b/cpp/src/arrow/flight/test-integration-client.cc
@@ -27,12 +27,14 @@
 
 #include <gflags/gflags.h>
 
+#include "arrow/io/file.h"
 #include "arrow/io/test-common.h"
 #include "arrow/ipc/dictionary.h"
 #include "arrow/ipc/json-integration.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/util/logging.h"
 
 #include "arrow/flight/api.h"
diff --git a/cpp/src/arrow/flight/test-server.cc b/cpp/src/arrow/flight/test-server.cc
index a995bc6..a9070a4 100644
--- a/cpp/src/arrow/flight/test-server.cc
+++ b/cpp/src/arrow/flight/test-server.cc
@@ -25,6 +25,7 @@
 
 #include <gflags/gflags.h>
 
+#include "arrow/buffer.h"
 #include "arrow/io/test-common.h"
 #include "arrow/record_batch.h"
 #include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/gpu/cuda_common.h b/cpp/src/arrow/gpu/cuda_common.h
index 2b630c8..87371ce 100644
--- a/cpp/src/arrow/gpu/cuda_common.h
+++ b/cpp/src/arrow/gpu/cuda_common.h
@@ -20,20 +20,11 @@
 #ifndef ARROW_GPU_CUDA_COMMON_H
 #define ARROW_GPU_CUDA_COMMON_H
 
-#include <sstream>
-
 #include <cuda.h>
 
 namespace arrow {
 namespace cuda {
 
-#define CUDA_DCHECK(STMT) \
-  do {                    \
-    int ret = (STMT);     \
-    DCHECK_EQ(0, ret);    \
-    (void)ret;            \
-  } while (0)
-
 #define CU_RETURN_NOT_OK(STMT)                                                  \
   do {                                                                          \
     CUresult ret = (STMT);                                                      \
diff --git a/cpp/src/arrow/io/compressed-test.cc b/cpp/src/arrow/io/compressed-test.cc
index acc57cc..fb4dcac 100644
--- a/cpp/src/arrow/io/compressed-test.cc
+++ b/cpp/src/arrow/io/compressed-test.cc
@@ -27,6 +27,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/io/test-common.h"
 #include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/compression.h"
 
diff --git a/cpp/src/arrow/io/file-test.cc b/cpp/src/arrow/io/file-test.cc
index cba006e..c548785 100644
--- a/cpp/src/arrow/io/file-test.cc
+++ b/cpp/src/arrow/io/file-test.cc
@@ -25,8 +25,8 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
 #include <memory>
-#include <sstream>  // IWYU pragma: keep
 #include <string>
 #include <thread>
 #include <vector>
@@ -39,6 +39,7 @@
 #include "arrow/io/test-common.h"
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/io-util.h"
 
@@ -95,7 +96,7 @@ class TestFileOutputStream : public FileTestFixture {
 #if defined(_MSC_VER)
 TEST_F(TestFileOutputStream, FileNameWideCharConversionRangeException) {
   std::shared_ptr<FileOutputStream> file;
-  // Form literal string with non-ASCII symbol(127 + 1)
+  // Invalid utf-8 filename
   std::string file_name = "\x80";
   ASSERT_RAISES(Invalid, FileOutputStream::Open(file_name, &file));
 
@@ -105,6 +106,8 @@ TEST_F(TestFileOutputStream, FileNameWideCharConversionRangeException) {
   std::shared_ptr<ReadableFile> rd_file;
   ASSERT_RAISES(Invalid, ReadableFile::Open(file_name, &rd_file));
 }
+
+// TODO add a test with a valid utf-8 filename
 #endif
 
 TEST_F(TestFileOutputStream, DestructorClosesFile) {
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 9c889e7..f8b8a13 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -275,6 +275,10 @@ BufferReader::BufferReader(const uint8_t* data, int64_t size)
 BufferReader::BufferReader(const Buffer& buffer)
     : BufferReader(buffer.data(), buffer.size()) {}
 
+BufferReader::BufferReader(const util::string_view& data)
+    : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
+                   static_cast<int64_t>(data.size())) {}
+
 Status BufferReader::Close() {
   is_open_ = false;
   return Status::OK();
diff --git a/cpp/src/arrow/io/memory.h b/cpp/src/arrow/io/memory.h
index f5a62cb..878d9bc 100644
--- a/cpp/src/arrow/io/memory.h
+++ b/cpp/src/arrow/io/memory.h
@@ -136,9 +136,7 @@ class ARROW_EXPORT BufferReader : public RandomAccessFile {
 
   /// \brief Instantiate from std::string or arrow::util::string_view. Does not
   /// own data
-  explicit BufferReader(const util::string_view& data)
-      : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
-                     static_cast<int64_t>(data.size())) {}
+  explicit BufferReader(const util::string_view& data);
 
   Status Close() override;
   bool closed() const override;
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.cc
similarity index 64%
copy from cpp/src/arrow/io/test-common.h
copy to cpp/src/arrow/io/test-common.cc
index 6ae827c..8648f2e 100644
--- a/cpp/src/arrow/io/test-common.h
+++ b/cpp/src/arrow/io/test-common.cc
@@ -15,15 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef ARROW_IO_TEST_COMMON_H
-#define ARROW_IO_TEST_COMMON_H
+#include "arrow/io/test-common.h"
 
 #include <algorithm>
 #include <cstdint>
 #include <fstream>  // IWYU pragma: keep
-#include <memory>
-#include <string>
-#include <vector>
 
 #ifdef _WIN32
 #include <crtdbg.h>
@@ -41,8 +37,7 @@
 namespace arrow {
 namespace io {
 
-static inline void AssertFileContents(const std::string& path,
-                                      const std::string& contents) {
+void AssertFileContents(const std::string& path, const std::string& contents) {
   std::shared_ptr<ReadableFile> rf;
   int64_t size;
 
@@ -55,20 +50,18 @@ static inline void AssertFileContents(const std::string& path,
   ASSERT_TRUE(actual_data->Equals(Buffer(contents)));
 }
 
-static inline bool FileExists(const std::string& path) {
-  return std::ifstream(path.c_str()).good();
-}
+bool FileExists(const std::string& path) { return std::ifstream(path.c_str()).good(); }
 
 #if defined(_WIN32)
-static inline void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
-                                       const wchar_t* source_file,
-                                       unsigned int source_line, uintptr_t reserved) {
+static void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
+                                const wchar_t* source_file, unsigned int source_line,
+                                uintptr_t reserved) {
   wprintf(L"Invalid parameter in function '%s'. Source: '%s' line %d expression '%s'\n",
           func, source_file, source_line, expr);
 }
 #endif
 
-static inline bool FileIsClosed(int fd) {
+bool FileIsClosed(int fd) {
 #if defined(_WIN32)
   // Disables default behavior on wrong params which causes the application to crash
   // https://msdn.microsoft.com/en-us/library/ksazx244.aspx
@@ -91,7 +84,7 @@ static inline bool FileIsClosed(int fd) {
 #endif
 }
 
-static inline Status ZeroMemoryMap(MemoryMappedFile* file) {
+Status ZeroMemoryMap(MemoryMappedFile* file) {
   constexpr int64_t kBufferSize = 512;
   static constexpr uint8_t kZeroBytes[kBufferSize] = {0};
 
@@ -109,34 +102,26 @@ static inline Status ZeroMemoryMap(MemoryMappedFile* file) {
   return Status::OK();
 }
 
-class MemoryMapFixture {
- public:
-  void TearDown() {
-    for (auto path : tmp_files_) {
-      ARROW_UNUSED(std::remove(path.c_str()));
-    }
-  }
-
-  void CreateFile(const std::string& path, int64_t size) {
-    std::shared_ptr<MemoryMappedFile> file;
-    ASSERT_OK(MemoryMappedFile::Create(path, size, &file));
-    tmp_files_.push_back(path);
+void MemoryMapFixture::TearDown() {
+  for (auto path : tmp_files_) {
+    ARROW_UNUSED(std::remove(path.c_str()));
   }
+}
 
-  Status InitMemoryMap(int64_t size, const std::string& path,
-                       std::shared_ptr<MemoryMappedFile>* mmap) {
-    RETURN_NOT_OK(MemoryMappedFile::Create(path, size, mmap));
-    tmp_files_.push_back(path);
-    return Status::OK();
-  }
+void MemoryMapFixture::CreateFile(const std::string& path, int64_t size) {
+  std::shared_ptr<MemoryMappedFile> file;
+  ASSERT_OK(MemoryMappedFile::Create(path, size, &file));
+  tmp_files_.push_back(path);
+}
 
-  void AppendFile(const std::string& path) { tmp_files_.push_back(path); }
+Status MemoryMapFixture::InitMemoryMap(int64_t size, const std::string& path,
+                                       std::shared_ptr<MemoryMappedFile>* mmap) {
+  RETURN_NOT_OK(MemoryMappedFile::Create(path, size, mmap));
+  tmp_files_.push_back(path);
+  return Status::OK();
+}
 
- private:
-  std::vector<std::string> tmp_files_;
-};
+void MemoryMapFixture::AppendFile(const std::string& path) { tmp_files_.push_back(path); }
 
 }  // namespace io
 }  // namespace arrow
-
-#endif  // ARROW_IO_TEST_COMMON_H
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.h
index 6ae827c..75e1347 100644
--- a/cpp/src/arrow/io/test-common.h
+++ b/cpp/src/arrow/io/test-common.h
@@ -18,119 +18,38 @@
 #ifndef ARROW_IO_TEST_COMMON_H
 #define ARROW_IO_TEST_COMMON_H
 
-#include <algorithm>
-#include <cstdint>
-#include <fstream>  // IWYU pragma: keep
 #include <memory>
 #include <string>
 #include <vector>
 
-#ifdef _WIN32
-#include <crtdbg.h>
-#include <io.h>
-#else
-#include <fcntl.h>
-#endif
-
-#include "arrow/buffer.h"
-#include "arrow/io/file.h"
-#include "arrow/io/memory.h"
-#include "arrow/memory_pool.h"
-#include "arrow/testing/gtest_util.h"
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace io {
 
-static inline void AssertFileContents(const std::string& path,
-                                      const std::string& contents) {
-  std::shared_ptr<ReadableFile> rf;
-  int64_t size;
-
-  ASSERT_OK(ReadableFile::Open(path, &rf));
-  ASSERT_OK(rf->GetSize(&size));
-  ASSERT_EQ(size, contents.size());
-
-  std::shared_ptr<Buffer> actual_data;
-  ASSERT_OK(rf->Read(size, &actual_data));
-  ASSERT_TRUE(actual_data->Equals(Buffer(contents)));
-}
-
-static inline bool FileExists(const std::string& path) {
-  return std::ifstream(path.c_str()).good();
-}
-
-#if defined(_WIN32)
-static inline void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
-                                       const wchar_t* source_file,
-                                       unsigned int source_line, uintptr_t reserved) {
-  wprintf(L"Invalid parameter in function '%s'. Source: '%s' line %d expression '%s'\n",
-          func, source_file, source_line, expr);
-}
-#endif
-
-static inline bool FileIsClosed(int fd) {
-#if defined(_WIN32)
-  // Disables default behavior on wrong params which causes the application to crash
-  // https://msdn.microsoft.com/en-us/library/ksazx244.aspx
-  _set_invalid_parameter_handler(InvalidParamHandler);
-
-  // Disables possible assertion alert box on invalid input arguments
-  _CrtSetReportMode(_CRT_ASSERT, 0);
-
-  int new_fd = _dup(fd);
-  if (new_fd == -1) {
-    return errno == EBADF;
-  }
-  _close(new_fd);
-  return false;
-#else
-  if (-1 != fcntl(fd, F_GETFD)) {
-    return false;
-  }
-  return errno == EBADF;
-#endif
-}
-
-static inline Status ZeroMemoryMap(MemoryMappedFile* file) {
-  constexpr int64_t kBufferSize = 512;
-  static constexpr uint8_t kZeroBytes[kBufferSize] = {0};
-
-  RETURN_NOT_OK(file->Seek(0));
-  int64_t position = 0;
-  int64_t file_size;
-  RETURN_NOT_OK(file->GetSize(&file_size));
-
-  int64_t chunksize;
-  while (position < file_size) {
-    chunksize = std::min(kBufferSize, file_size - position);
-    RETURN_NOT_OK(file->Write(kZeroBytes, chunksize));
-    position += chunksize;
-  }
-  return Status::OK();
-}
-
-class MemoryMapFixture {
+class MemoryMappedFile;
+
+ARROW_EXPORT
+void AssertFileContents(const std::string& path, const std::string& contents);
+
+ARROW_EXPORT bool FileExists(const std::string& path);
+
+ARROW_EXPORT bool FileIsClosed(int fd);
+
+ARROW_EXPORT
+Status ZeroMemoryMap(MemoryMappedFile* file);
+
+class ARROW_EXPORT MemoryMapFixture {
  public:
-  void TearDown() {
-    for (auto path : tmp_files_) {
-      ARROW_UNUSED(std::remove(path.c_str()));
-    }
-  }
-
-  void CreateFile(const std::string& path, int64_t size) {
-    std::shared_ptr<MemoryMappedFile> file;
-    ASSERT_OK(MemoryMappedFile::Create(path, size, &file));
-    tmp_files_.push_back(path);
-  }
+  void TearDown();
+
+  void CreateFile(const std::string& path, int64_t size);
 
   Status InitMemoryMap(int64_t size, const std::string& path,
-                       std::shared_ptr<MemoryMappedFile>* mmap) {
-    RETURN_NOT_OK(MemoryMappedFile::Create(path, size, mmap));
-    tmp_files_.push_back(path);
-    return Status::OK();
-  }
+                       std::shared_ptr<MemoryMappedFile>* mmap);
 
-  void AppendFile(const std::string& path) { tmp_files_.push_back(path); }
+  void AppendFile(const std::string& path);
 
  private:
   std::vector<std::string> tmp_files_;
diff --git a/cpp/src/arrow/ipc/json-internal.h b/cpp/src/arrow/ipc/json-internal.h
index a68e0f6..aa2e06a 100644
--- a/cpp/src/arrow/ipc/json-internal.h
+++ b/cpp/src/arrow/ipc/json-internal.h
@@ -19,7 +19,6 @@
 #define ARROW_IPC_JSON_INTERNAL_H
 
 #include <memory>
-#include <sstream>
 #include <string>
 
 #include "arrow/json/rapidjson-defs.h"
diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc
index 36f2d16..2a98862 100644
--- a/cpp/src/arrow/ipc/json-test.cc
+++ b/cpp/src/arrow/ipc/json-test.cc
@@ -296,7 +296,7 @@ TEST(TestJsonFileReadWrite, BasicRoundTrip) {
   for (int i = 0; i < nbatches; ++i) {
     std::shared_ptr<RecordBatch> batch;
     ASSERT_OK(reader->ReadRecordBatch(i, &batch));
-    ASSERT_RECORD_BATCHES_EQUAL(*batch, *batches[i]);
+    ASSERT_BATCHES_EQUAL(*batch, *batches[i]);
   }
 }
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 9837cbe..7a1e3b6 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -39,6 +39,7 @@
 #include "arrow/tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
 
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index edae88c..c21a547 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -41,10 +41,13 @@
 #include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/json/parser.h b/cpp/src/arrow/json/parser.h
index f9fb5fc..ec12eee 100644
--- a/cpp/src/arrow/json/parser.h
+++ b/cpp/src/arrow/json/parser.h
@@ -22,6 +22,7 @@
 
 #include "arrow/json/options.h"
 #include "arrow/status.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 3e0366a..4164e74 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -23,7 +23,6 @@
 #include <iostream>   // IWYU pragma: keep
 #include <limits>
 #include <memory>
-#include <sstream>  // IWYU pragma: keep
 
 #include "arrow/status.h"
 #include "arrow/util/logging.h"  // IWYU pragma: keep
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index ca50bc0..9c2708f 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -18,7 +18,7 @@
 #ifndef ARROW_PRETTY_PRINT_H
 #define ARROW_PRETTY_PRINT_H
 
-#include <ostream>
+#include <iosfwd>
 #include <string>
 
 #include "arrow/util/visibility.h"
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index f0e4b92..2e39f32 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -24,7 +24,6 @@
 #include <cmath>
 #include <cstdint>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <vector>
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 6ca989b..1d4b3ab 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -19,7 +19,7 @@
 
 #include <cstdlib>
 #include <mutex>
-#include <sstream>
+#include <string>
 
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 27661a9..a759d39 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -19,8 +19,6 @@
 #define ARROW_PYTHON_COMMON_H
 
 #include <memory>
-#include <sstream>
-#include <string>
 #include <utility>
 
 #include "arrow/python/config.h"
diff --git a/cpp/src/arrow/python/deserialize.cc b/cpp/src/arrow/python/deserialize.cc
index e5091c4..f1690a8 100644
--- a/cpp/src/arrow/python/deserialize.cc
+++ b/cpp/src/arrow/python/deserialize.cc
@@ -21,7 +21,6 @@
 
 #include <cstdint>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/python/inference.cc b/cpp/src/arrow/python/inference.cc
index c9db5f4..6cf8bed 100644
--- a/cpp/src/arrow/python/inference.cc
+++ b/cpp/src/arrow/python/inference.cc
@@ -23,7 +23,6 @@
 #include <algorithm>
 #include <limits>
 #include <map>
-#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/python/inference.h b/cpp/src/arrow/python/inference.h
index f2e2305..8790250 100644
--- a/cpp/src/arrow/python/inference.h
+++ b/cpp/src/arrow/python/inference.h
@@ -24,8 +24,6 @@
 #include "arrow/python/platform.h"
 
 #include <memory>
-#include <ostream>
-#include <string>
 
 #include "arrow/python/visibility.h"
 #include "arrow/type.h"
diff --git a/cpp/src/arrow/python/numpy-internal.h b/cpp/src/arrow/python/numpy-internal.h
index e27ae5c..19bcde0 100644
--- a/cpp/src/arrow/python/numpy-internal.h
+++ b/cpp/src/arrow/python/numpy-internal.h
@@ -27,6 +27,7 @@
 #include "arrow/python/platform.h"
 
 #include <cstdint>
+#include <sstream>
 #include <string>
 
 namespace arrow {
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 02ce0b6..f7068b3 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -21,7 +21,6 @@
 
 #include <cstdint>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <vector>
 
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index ca3f596..b353a1e 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -28,7 +28,6 @@
 #include <cstring>
 #include <limits>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <utility>
 #include <vector>
diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/python/platform.h
index ca9b553..bc06df9 100644
--- a/cpp/src/arrow/python/platform.h
+++ b/cpp/src/arrow/python/platform.h
@@ -21,7 +21,6 @@
 #ifndef ARROW_PYTHON_PLATFORM_H
 #define ARROW_PYTHON_PLATFORM_H
 
-#include <iostream>
 #include <Python.h> // IWYU pragma: export
 #include <datetime.h>
 
diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h
index 04ca307..a6e9c87 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -19,7 +19,6 @@
 #define PYARROW_UTIL_DATETIME_H
 
 #include <algorithm>
-#include <sstream>
 
 #include <datetime.h>
 #include "arrow/python/platform.h"
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index baaf5cb..2bc8c22 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -20,7 +20,6 @@
 #include <algorithm>
 #include <cstdlib>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <utility>
 
@@ -40,6 +39,13 @@ Status RecordBatch::AddColumn(int i, const std::string& field_name,
   return AddColumn(i, field, column, out);
 }
 
+std::shared_ptr<Array> RecordBatch::GetColumnByName(const std::string& name) const {
+  auto i = schema_->GetFieldIndex(name);
+  return i == -1 ? NULLPTR : column(i);
+}
+
+int RecordBatch::num_columns() const { return schema_->num_fields(); }
+
 /// \class SimpleRecordBatch
 /// \brief A basic, non-lazy in-memory record batch
 class SimpleRecordBatch : public RecordBatch {
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index abae413..f80d4ed 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -23,17 +23,12 @@
 #include <string>
 #include <vector>
 
-#include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 
-class Array;
-struct ArrayData;
-class Status;
-class Table;
-
 /// \class RecordBatch
 /// \brief Collection of equal-length arrays matching a particular Schema
 ///
@@ -95,10 +90,7 @@ class ARROW_EXPORT RecordBatch {
   /// \brief Retrieve an array from the record batch
   /// \param[in] name field name
   /// \return an Array or null if no field was found
-  std::shared_ptr<Array> GetColumnByName(const std::string& name) const {
-    auto i = schema_->GetFieldIndex(name);
-    return i == -1 ? NULLPTR : column(i);
-  }
+  std::shared_ptr<Array> GetColumnByName(const std::string& name) const;
 
   /// \brief Retrieve an array's internaldata from the record batch
   /// \param[in] i field index, does not boundscheck
@@ -141,7 +133,7 @@ class ARROW_EXPORT RecordBatch {
   const std::string& column_name(int i) const;
 
   /// \return the number of columns in the table
-  int num_columns() const { return schema_->num_fields(); }
+  int num_columns() const;
 
   /// \return the number of rows (the corresponding length of each column)
   int64_t num_rows() const { return num_rows_; }
diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index 7d742f1..e97dc8c 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -12,8 +12,9 @@
 
 #include "arrow/status.h"
 
-#include <assert.h>
-#include <sstream>
+#include <cassert>
+#include <cstdlib>
+#include <iostream>
 
 namespace arrow {
 
@@ -114,4 +115,15 @@ std::string Status::ToString() const {
   return result;
 }
 
+void Status::Abort() const { Abort(std::string()); }
+
+void Status::Abort(const std::string& message) const {
+  std::cerr << "-- Arrow Fatal Error --\n";
+  if (!message.empty()) {
+    std::cerr << message << "\n";
+  }
+  std::cerr << ToString() << std::endl;
+  std::abort();
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 6c23864..790d9b7 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -335,6 +335,9 @@ class ARROW_EXPORT Status {
   /// \brief Return the specific error message attached to this status.
   std::string message() const { return ok() ? "" : state_->msg; }
 
+  [[noreturn]] void Abort() const;
+  [[noreturn]] void Abort(const std::string& message) const;
+
  private:
   struct State {
     StatusCode code;
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 669ff20..b018b8b 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -21,7 +21,6 @@
 #include <cstdlib>
 #include <limits>
 #include <memory>
-#include <sstream>
 #include <utility>
 
 #include "arrow/array.h"
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 8c1c58a..743a9bc 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -27,6 +27,7 @@
 #include <vector>
 
 #include "arrow/compare.h"
+#include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 4811954..ee66b2e 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -29,7 +29,6 @@
 #include <iostream>
 #include <limits>
 #include <memory>
-#include <random>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -46,8 +45,33 @@
 
 namespace arrow {
 
+static void PrintColumn(const Column& col, std::stringstream* ss) {
+  const ChunkedArray& carr = *col.data();
+  for (int i = 0; i < carr.num_chunks(); ++i) {
+    auto c1 = carr.chunk(i);
+    *ss << "Chunk " << i << std::endl;
+    ARROW_EXPECT_OK(::arrow::PrettyPrint(*c1, 0, ss));
+    *ss << std::endl;
+  }
+}
+
+template <typename T>
+void AssertTsEqual(const T& expected, const T& actual) {
+  if (!expected.Equals(actual)) {
+    std::stringstream pp_expected;
+    std::stringstream pp_actual;
+    ARROW_EXPECT_OK(PrettyPrint(expected, 0, &pp_expected));
+    ARROW_EXPECT_OK(PrettyPrint(actual, 0, &pp_actual));
+    FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str();
+  }
+}
+
 void AssertArraysEqual(const Array& expected, const Array& actual) {
-  ASSERT_ARRAYS_EQUAL(expected, actual);
+  AssertTsEqual(expected, actual);
+}
+
+void AssertBatchesEqual(const RecordBatch& expected, const RecordBatch& actual) {
+  AssertTsEqual(expected, actual);
 }
 
 void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual) {
@@ -117,16 +141,6 @@ std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
   return out;
 }
 
-void PrintColumn(const Column& col, std::stringstream* ss) {
-  const ChunkedArray& carr = *col.data();
-  for (int i = 0; i < carr.num_chunks(); ++i) {
-    auto c1 = carr.chunk(i);
-    *ss << "Chunk " << i << std::endl;
-    ARROW_EXPECT_OK(::arrow::PrettyPrint(*c1, 0, ss));
-    *ss << std::endl;
-  }
-}
-
 void AssertTablesEqual(const Table& expected, const Table& actual,
                        bool same_chunk_layout) {
   ASSERT_EQ(expected.num_columns(), actual.num_columns());
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 88ffc22..c44bb17 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -21,25 +21,17 @@
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
-#include <iostream>
-#include <limits>
 #include <memory>
-#include <random>
-#include <sstream>
 #include <string>
 #include <type_traits>
 #include <vector>
 
 #include <gtest/gtest.h>
 
-#include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/builder.h"
-#include "arrow/memory_pool.h"
-#include "arrow/pretty_print.h"
-#include "arrow/record_batch.h"
 #include "arrow/status.h"
-#include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/logging.h"
@@ -83,13 +75,12 @@
     EXPECT_TRUE(_st.ok());        \
   } while (false)
 
-#define ABORT_NOT_OK(expr)                 \
-  do {                                     \
-    ::arrow::Status _st = (expr);          \
-    if (ARROW_PREDICT_FALSE(!_st.ok())) {  \
-      std::cerr << _st.ToString() << "\n"; \
-      std::abort();                        \
-    }                                      \
+#define ABORT_NOT_OK(s)                   \
+  do {                                    \
+    ::arrow::Status _st = (s);            \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) { \
+      _st.Abort();                        \
+    }                                     \
   } while (false);
 
 namespace arrow {
@@ -101,8 +92,10 @@ typedef ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type
                          Int16Type, Int32Type, Int64Type, FloatType, DoubleType>
     NumericArrowTypes;
 
+class Array;
 class ChunkedArray;
 class Column;
+class RecordBatch;
 class Table;
 
 namespace compute {
@@ -113,22 +106,12 @@ using Datum = compute::Datum;
 
 using ArrayVector = std::vector<std::shared_ptr<Array>>;
 
-#define ASSERT_PP_EQUAL(LEFT, RIGHT)                                                   \
-  do {                                                                                 \
-    if (!(LEFT).Equals((RIGHT))) {                                                     \
-      std::stringstream pp_result;                                                     \
-      std::stringstream pp_expected;                                                   \
-                                                                                       \
-      ARROW_EXPECT_OK(PrettyPrint(RIGHT, 0, &pp_result));                              \
-      ARROW_EXPECT_OK(PrettyPrint(LEFT, 0, &pp_expected));                             \
-      FAIL() << "Got: \n" << pp_result.str() << "\nExpected: \n" << pp_expected.str(); \
-    }                                                                                  \
-  } while (false)
-
-#define ASSERT_ARRAYS_EQUAL(lhs, rhs) ASSERT_PP_EQUAL(lhs, rhs)
-#define ASSERT_RECORD_BATCHES_EQUAL(lhs, rhs) ASSERT_PP_EQUAL(lhs, rhs)
+#define ASSERT_ARRAYS_EQUAL(lhs, rhs) AssertArraysEqual((lhs), (rhs))
+#define ASSERT_BATCHES_EQUAL(lhs, rhs) AssertBatchesEqual((lhs), (rhs))
 
 ARROW_EXPORT void AssertArraysEqual(const Array& expected, const Array& actual);
+ARROW_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
+                                     const RecordBatch& actual);
 ARROW_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
                                      const ChunkedArray& actual);
 ARROW_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
@@ -139,7 +122,6 @@ ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer, const std::string& exp
 ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer, const Buffer& expected);
 ARROW_EXPORT void AssertSchemaEqual(const Schema& lhs, const Schema& rhs);
 
-ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
 ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
                                     bool same_chunk_layout = true);
 
@@ -176,19 +158,6 @@ void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
 
 #define DECL_TYPE() typedef typename TestFixture::Type Type;
 
-#define ASSERT_BATCHES_EQUAL(LEFT, RIGHT)    \
-  do {                                       \
-    if (!(LEFT).ApproxEquals(RIGHT)) {       \
-      std::stringstream ss;                  \
-      ss << "Left:\n";                       \
-      ASSERT_OK(PrettyPrint(LEFT, 0, &ss));  \
-                                             \
-      ss << "\nRight:\n";                    \
-      ASSERT_OK(PrettyPrint(RIGHT, 0, &ss)); \
-      FAIL() << ss.str();                    \
-    }                                        \
-  } while (false)
-
 // ArrayFromJSON: construct an Array from a simple JSON representation
 
 ARROW_EXPORT
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index f69b705..6b188fd 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -17,10 +17,13 @@
 
 #pragma once
 
+#include <algorithm>
+#include <cassert>
 #include <cstdint>
 #include <limits>
 #include <memory>
 #include <random>
+#include <vector>
 
 #include "arrow/type.h"
 #include "arrow/util/visibility.h"
@@ -235,4 +238,35 @@ class ARROW_EXPORT RandomArrayGenerator {
 };
 
 }  // namespace random
+
+//
+// Assorted functions
+//
+
+template <typename T, typename U>
+void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
+  const int random_seed = 0;
+  std::default_random_engine gen(random_seed);
+  std::uniform_int_distribution<T> d(lower, upper);
+  out->resize(N, static_cast<T>(0));
+  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
+template <typename T, typename U>
+void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
+                 std::vector<U>* out) {
+  std::default_random_engine gen(seed);
+  std::uniform_real_distribution<T> d(min_value, max_value);
+  out->resize(n, static_cast<T>(0));
+  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
+template <typename T, typename U>
+void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
+  assert(out || (n == 0));
+  std::default_random_engine gen(seed);
+  std::uniform_int_distribution<T> d(min_value, max_value);
+  std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 429f6bb..f30f704 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -17,6 +17,9 @@
 
 #include "arrow/testing/util.h"
 
+#include <chrono>
+#include <random>
+
 #ifndef _WIN32
 #include <sys/stat.h>  // IWYU pragma: keep
 #include <sys/wait.h>  // IWYU pragma: keep
@@ -24,9 +27,15 @@
 #endif
 
 #include "arrow/table.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
+uint64_t random_seed() {
+  return std::chrono::high_resolution_clock::now().time_since_epoch().count();
+}
+
 void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
   const int random_seed = 0;
   std::default_random_engine gen(random_seed);
diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h
index ad2350e..d12f57e 100644
--- a/cpp/src/arrow/testing/util.h
+++ b/cpp/src/arrow/testing/util.h
@@ -18,62 +18,36 @@
 #pragma once
 
 #include <algorithm>
-#include <chrono>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
-#include <iostream>
 #include <limits>
 #include <memory>
-#include <random>
-#include <sstream>
 #include <string>
 #include <type_traits>
 #include <vector>
 
-#include "arrow/array.h"
 #include "arrow/buffer.h"
-#include "arrow/builder.h"
-#include "arrow/memory_pool.h"
-#include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/logging.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 
+class Array;
 class ChunkedArray;
 class Column;
+class MemoryPool;
+class RecordBatch;
 class Table;
 
 using ArrayVector = std::vector<std::shared_ptr<Array>>;
 
-template <typename T, typename U>
-void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
-  const int random_seed = 0;
-  std::default_random_engine gen(random_seed);
-  std::uniform_int_distribution<T> d(lower, upper);
-  out->resize(N, static_cast<T>(0));
-  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
-}
-
-template <typename T, typename U>
-void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
-                 std::vector<U>* out) {
-  std::default_random_engine gen(seed);
-  std::uniform_real_distribution<T> d(min_value, max_value);
-  out->resize(n, static_cast<T>(0));
-  std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
-}
-
 template <typename T>
-inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
-                                   std::shared_ptr<Buffer>* result) {
+Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
+                            std::shared_ptr<Buffer>* result) {
   int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
 
   std::shared_ptr<Buffer> buffer;
@@ -103,39 +77,7 @@ ARROW_EXPORT Status MakeRandomByteBuffer(int64_t length, MemoryPool* pool,
                                          std::shared_ptr<ResizableBuffer>* out,
                                          uint32_t seed = 0);
 
-static inline uint64_t random_seed() {
-  return std::chrono::high_resolution_clock::now().time_since_epoch().count();
-}
-
-template <typename T, typename U>
-void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
-  DCHECK(out || (n == 0));
-  std::default_random_engine gen(seed);
-  std::uniform_int_distribution<T> d(min_value, max_value);
-  std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
-}
-
-template <typename T, typename Enable = void>
-struct GenerateRandom {};
-
-template <typename T>
-struct GenerateRandom<T, typename std::enable_if<std::is_integral<T>::value>::type> {
-  static void Gen(int64_t length, uint32_t seed, void* out) {
-    rand_uniform_int(length, seed, std::numeric_limits<T>::min(),
-                     std::numeric_limits<T>::max(), reinterpret_cast<T*>(out));
-  }
-};
-
-template <typename T>
-Status MakeRandomBuffer(int64_t length, MemoryPool* pool,
-                        std::shared_ptr<ResizableBuffer>* out, uint32_t seed = 0) {
-  DCHECK(pool);
-  std::shared_ptr<ResizableBuffer> result;
-  RETURN_NOT_OK(AllocateResizableBuffer(pool, sizeof(T) * length, &result));
-  GenerateRandom<T>::Gen(length, seed, result->mutable_data());
-  *out = result;
-  return Status::OK();
-}
+ARROW_EXPORT uint64_t random_seed();
 
 template <class T, class Builder>
 Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 3c4adc9..94be608 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -30,6 +30,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 67f07ea..93c6f39 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -19,8 +19,10 @@
 
 #include <climits>
 #include <cstddef>
+#include <ostream>
 #include <sstream>  // IWYU pragma: keep
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -118,6 +120,11 @@ bool DataType::Equals(const std::shared_ptr<DataType>& other) const {
   return Equals(*other.get());
 }
 
+std::ostream& operator<<(std::ostream& os, const DataType& type) {
+  os << type.ToString();
+  return os;
+}
+
 std::string BooleanType::ToString() const { return name(); }
 
 FloatingPoint::Precision HalfFloatType::precision() const { return FloatingPoint::HALF; }
@@ -191,6 +198,24 @@ std::string Time64Type::ToString() const {
   return ss.str();
 }
 
+std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
+  switch (unit) {
+    case TimeUnit::SECOND:
+      os << "s";
+      break;
+    case TimeUnit::MILLI:
+      os << "ms";
+      break;
+    case TimeUnit::MICRO:
+      os << "us";
+      break;
+    case TimeUnit::NANO:
+      os << "ns";
+      break;
+  }
+  return os;
+}
+
 // ----------------------------------------------------------------------
 // Timestamp types
 
@@ -271,11 +296,21 @@ int LookupNameIndex(const std::unordered_multimap<std::string, int>& name_to_ind
 
 }  // namespace
 
+class StructType::Impl {
+ public:
+  explicit Impl(const std::vector<std::shared_ptr<Field>>& fields)
+      : name_to_index_(CreateNameToIndexMap(fields)) {}
+
+  const std::unordered_multimap<std::string, int> name_to_index_;
+};
+
 StructType::StructType(const std::vector<std::shared_ptr<Field>>& fields)
-    : NestedType(Type::STRUCT), name_to_index_(CreateNameToIndexMap(fields)) {
+    : NestedType(Type::STRUCT), impl_(new Impl(fields)) {
   children_ = fields;
 }
 
+StructType::~StructType() {}
+
 std::string StructType::ToString() const {
   std::stringstream s;
   s << "struct<";
@@ -296,12 +331,12 @@ std::shared_ptr<Field> StructType::GetFieldByName(const std::string& name) const
 }
 
 int StructType::GetFieldIndex(const std::string& name) const {
-  return LookupNameIndex(name_to_index_, name);
+  return LookupNameIndex(impl_->name_to_index_, name);
 }
 
 std::vector<int> StructType::GetAllFieldIndices(const std::string& name) const {
   std::vector<int> result;
-  auto p = name_to_index_.equal_range(name);
+  auto p = impl_->name_to_index_.equal_range(name);
   for (auto it = p.first; it != p.second; ++it) {
     result.push_back(it->second);
   }
@@ -311,7 +346,7 @@ std::vector<int> StructType::GetAllFieldIndices(const std::string& name) const {
 std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
     const std::string& name) const {
   std::vector<std::shared_ptr<Field>> result;
-  auto p = name_to_index_.equal_range(name);
+  auto p = impl_->name_to_index_.equal_range(name);
   for (auto it = p.first; it != p.second; ++it) {
     result.push_back(children_[it->second]);
   }
@@ -371,17 +406,44 @@ std::string NullType::ToString() const { return name(); }
 // ----------------------------------------------------------------------
 // Schema implementation
 
+class Schema::Impl {
+ public:
+  Impl(const std::vector<std::shared_ptr<Field>>& fields,
+       const std::shared_ptr<const KeyValueMetadata>& metadata)
+      : fields_(fields),
+        name_to_index_(CreateNameToIndexMap(fields_)),
+        metadata_(metadata) {}
+
+  Impl(std::vector<std::shared_ptr<Field>>&& fields,
+       const std::shared_ptr<const KeyValueMetadata>& metadata)
+      : fields_(std::move(fields)),
+        name_to_index_(CreateNameToIndexMap(fields_)),
+        metadata_(metadata) {}
+
+  std::vector<std::shared_ptr<Field>> fields_;
+  std::unordered_multimap<std::string, int> name_to_index_;
+  std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
 Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields,
                const std::shared_ptr<const KeyValueMetadata>& metadata)
-    : fields_(fields),
-      name_to_index_(CreateNameToIndexMap(fields_)),
-      metadata_(metadata) {}
+    : impl_(new Impl(fields, metadata)) {}
 
 Schema::Schema(std::vector<std::shared_ptr<Field>>&& fields,
                const std::shared_ptr<const KeyValueMetadata>& metadata)
-    : fields_(std::move(fields)),
-      name_to_index_(CreateNameToIndexMap(fields_)),
-      metadata_(metadata) {}
+    : impl_(new Impl(std::move(fields), metadata)) {}
+
+Schema::Schema(const Schema& schema) : impl_(new Impl(*schema.impl_)) {}
+
+Schema::~Schema() {}
+
+int Schema::num_fields() const { return static_cast<int>(impl_->fields_.size()); }
+
+std::shared_ptr<Field> Schema::field(int i) const { return impl_->fields_[i]; }
+
+const std::vector<std::shared_ptr<Field>>& Schema::fields() const {
+  return impl_->fields_;
+}
 
 bool Schema::Equals(const Schema& other, bool check_metadata) const {
   if (this == &other) {
@@ -402,7 +464,7 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
   if (!check_metadata) {
     return true;
   } else if (this->HasMetadata() && other.HasMetadata()) {
-    return metadata_->Equals(*other.metadata_);
+    return impl_->metadata_->Equals(*other.impl_->metadata_);
   } else if (!this->HasMetadata() && !other.HasMetadata()) {
     return true;
   } else {
@@ -412,16 +474,16 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
 
 std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) const {
   int i = GetFieldIndex(name);
-  return i == -1 ? nullptr : fields_[i];
+  return i == -1 ? nullptr : impl_->fields_[i];
 }
 
 int Schema::GetFieldIndex(const std::string& name) const {
-  return LookupNameIndex(name_to_index_, name);
+  return LookupNameIndex(impl_->name_to_index_, name);
 }
 
 std::vector<int> Schema::GetAllFieldIndices(const std::string& name) const {
   std::vector<int> result;
-  auto p = name_to_index_.equal_range(name);
+  auto p = impl_->name_to_index_.equal_range(name);
   for (auto it = p.first; it != p.second; ++it) {
     result.push_back(it->second);
   }
@@ -431,9 +493,9 @@ std::vector<int> Schema::GetAllFieldIndices(const std::string& name) const {
 std::vector<std::shared_ptr<Field>> Schema::GetAllFieldsByName(
     const std::string& name) const {
   std::vector<std::shared_ptr<Field>> result;
-  auto p = name_to_index_.equal_range(name);
+  auto p = impl_->name_to_index_.equal_range(name);
   for (auto it = p.first; it != p.second; ++it) {
-    result.push_back(fields_[it->second]);
+    result.push_back(impl_->fields_[it->second]);
   }
   return result;
 }
@@ -444,8 +506,8 @@ Status Schema::AddField(int i, const std::shared_ptr<Field>& field,
     return Status::Invalid("Invalid column index to add field.");
   }
 
-  *out =
-      std::make_shared<Schema>(internal::AddVectorElement(fields_, i, field), metadata_);
+  *out = std::make_shared<Schema>(internal::AddVectorElement(impl_->fields_, i, field),
+                                  impl_->metadata_);
   return Status::OK();
 }
 
@@ -455,24 +517,26 @@ Status Schema::SetField(int i, const std::shared_ptr<Field>& field,
     return Status::Invalid("Invalid column index to add field.");
   }
 
-  *out = std::make_shared<Schema>(internal::ReplaceVectorElement(fields_, i, field),
-                                  metadata_);
+  *out = std::make_shared<Schema>(
+      internal::ReplaceVectorElement(impl_->fields_, i, field), impl_->metadata_);
   return Status::OK();
 }
 
 bool Schema::HasMetadata() const {
-  return (metadata_ != nullptr) && (metadata_->size() > 0);
+  return (impl_->metadata_ != nullptr) && (impl_->metadata_->size() > 0);
 }
 
 std::shared_ptr<Schema> Schema::AddMetadata(
     const std::shared_ptr<const KeyValueMetadata>& metadata) const {
-  return std::make_shared<Schema>(fields_, metadata);
+  return std::make_shared<Schema>(impl_->fields_, metadata);
 }
 
-std::shared_ptr<const KeyValueMetadata> Schema::metadata() const { return metadata_; }
+std::shared_ptr<const KeyValueMetadata> Schema::metadata() const {
+  return impl_->metadata_;
+}
 
 std::shared_ptr<Schema> Schema::RemoveMetadata() const {
-  return std::make_shared<Schema>(fields_);
+  return std::make_shared<Schema>(impl_->fields_);
 }
 
 Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
@@ -480,7 +544,8 @@ Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
     return Status::Invalid("Invalid column index to remove field.");
   }
 
-  *out = std::make_shared<Schema>(internal::DeleteVectorElement(fields_, i), metadata_);
+  *out = std::make_shared<Schema>(internal::DeleteVectorElement(impl_->fields_, i),
+                                  impl_->metadata_);
   return Status::OK();
 }
 
@@ -488,7 +553,7 @@ std::string Schema::ToString() const {
   std::stringstream buffer;
 
   int i = 0;
-  for (auto field : fields_) {
+  for (const auto& field : impl_->fields_) {
     if (i > 0) {
       buffer << std::endl;
     }
@@ -496,8 +561,8 @@ std::string Schema::ToString() const {
     ++i;
   }
 
-  if (metadata_) {
-    buffer << metadata_->ToString();
+  if (impl_->metadata_) {
+    buffer << impl_->metadata_->ToString();
   }
 
   return buffer.str();
@@ -505,7 +570,7 @@ std::string Schema::ToString() const {
 
 std::vector<std::string> Schema::field_names() const {
   std::vector<std::string> names;
-  for (auto& field : fields_) {
+  for (const auto& field : impl_->fields_) {
     names.push_back(field->name());
   }
   return names;
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 75ee674..eeba7d4 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -20,17 +20,14 @@
 
 #include <climits>
 #include <cstdint>
+#include <iosfwd>
 #include <memory>
-#include <ostream>
 #include <string>
-#include <type_traits>
-#include <unordered_map>
 #include <vector>
 
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"  // IWYU pragma: export
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/key_value_metadata.h"  // IWYU pragma: export
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 #include "arrow/visitor.h"  // IWYU pragma: keep
@@ -201,10 +198,7 @@ class ARROW_EXPORT DataType {
   ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
 };
 
-inline std::ostream& operator<<(std::ostream& os, const DataType& type) {
-  os << type.ToString();
-  return os;
-}
+std::ostream& operator<<(std::ostream& os, const DataType& type);
 
 /// \brief Base class for all fixed-width data types
 class ARROW_EXPORT FixedWidthType : public DataType {
@@ -548,6 +542,8 @@ class ARROW_EXPORT StructType : public NestedType {
 
   explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
 
+  ~StructType() override;
+
   std::string ToString() const override;
   std::string name() const override { return "struct"; }
 
@@ -571,7 +567,8 @@ class ARROW_EXPORT StructType : public NestedType {
   int GetChildIndex(const std::string& name) const;
 
  private:
-  std::unordered_multimap<std::string, int> name_to_index_;
+  class Impl;
+  std::unique_ptr<Impl> impl_;
 };
 
 /// \brief Base type class for (fixed-size) decimal data
@@ -691,23 +688,7 @@ struct TimeUnit {
   enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
 };
 
-static inline std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
-  switch (unit) {
-    case TimeUnit::SECOND:
-      os << "s";
-      break;
-    case TimeUnit::MILLI:
-      os << "ms";
-      break;
-    case TimeUnit::MICRO:
-      os << "us";
-      break;
-    case TimeUnit::NANO:
-      os << "ns";
-      break;
-  }
-  return os;
-}
+std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);
 
 /// Base type class for time data
 class ARROW_EXPORT TimeType : public TemporalType, public ParametricType {
@@ -916,13 +897,22 @@ class ARROW_EXPORT Schema {
   explicit Schema(std::vector<std::shared_ptr<Field>>&& fields,
                   const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
 
-  virtual ~Schema() = default;
+  Schema(const Schema&);
+
+  virtual ~Schema();
 
   /// Returns true if all of the schema fields are equal
   bool Equals(const Schema& other, bool check_metadata = true) const;
 
+  /// \brief Return the number of fields (columns) in the schema
+  int num_fields() const;
+
   /// Return the ith schema element. Does not boundscheck
-  std::shared_ptr<Field> field(int i) const { return fields_[i]; }
+  std::shared_ptr<Field> field(int i) const;
+
+  const std::vector<std::shared_ptr<Field>>& fields() const;
+
+  std::vector<std::string> field_names() const;
 
   /// Returns null if name not found
   std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
@@ -936,10 +926,6 @@ class ARROW_EXPORT Schema {
   /// Return the indices of all fields having this name
   std::vector<int> GetAllFieldIndices(const std::string& name) const;
 
-  const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
-
-  std::vector<std::string> field_names() const;
-
   /// \brief The custom key-value metadata, if any
   ///
   /// \return metadata may be null
@@ -967,15 +953,9 @@ class ARROW_EXPORT Schema {
   /// \brief Indicates that Schema has non-empty KevValueMetadata
   bool HasMetadata() const;
 
-  /// \brief Return the number of fields (columns) in the schema
-  int num_fields() const { return static_cast<int>(fields_.size()); }
-
  private:
-  std::vector<std::shared_ptr<Field>> fields_;
-
-  std::unordered_multimap<std::string, int> name_to_index_;
-
-  std::shared_ptr<const KeyValueMetadata> metadata_;
+  class Impl;
+  std::unique_ptr<Impl> impl_;
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 3211d2a..040ccf2 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -27,6 +27,7 @@ namespace arrow {
 class Status;
 
 class DataType;
+class KeyValueMetadata;
 class Array;
 struct ArrayData;
 class ArrayBuilder;
diff --git a/cpp/src/arrow/util/compression_brotli.cc b/cpp/src/arrow/util/compression_brotli.cc
index 8a9beb6..051b8c0 100644
--- a/cpp/src/arrow/util/compression_brotli.cc
+++ b/cpp/src/arrow/util/compression_brotli.cc
@@ -19,7 +19,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <sstream>
 
 #include <brotli/decode.h>
 #include <brotli/encode.h>
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index 238628b..1efd4c6 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -19,7 +19,6 @@
 
 #include <cstdint>
 #include <cstring>
-#include <sstream>
 
 #include <lz4.h>
 #include <lz4frame.h>
diff --git a/cpp/src/arrow/util/compression_snappy.cc b/cpp/src/arrow/util/compression_snappy.cc
index 2113f98..963de69 100644
--- a/cpp/src/arrow/util/compression_snappy.cc
+++ b/cpp/src/arrow/util/compression_snappy.cc
@@ -19,7 +19,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <sstream>
 
 #include <snappy.h>
 
diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc
index 202ef06..5afd5e3 100644
--- a/cpp/src/arrow/util/compression_zlib.cc
+++ b/cpp/src/arrow/util/compression_zlib.cc
@@ -22,7 +22,6 @@
 #include <cstring>
 #include <limits>
 #include <memory>
-#include <sstream>
 #include <string>
 
 #include <zconf.h>
diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc
index 4972f43..24a7329 100644
--- a/cpp/src/arrow/util/compression_zstd.cc
+++ b/cpp/src/arrow/util/compression_zstd.cc
@@ -19,7 +19,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <sstream>
 
 #include <zstd.h>
 
diff --git a/cpp/src/arrow/util/concatenate.cc b/cpp/src/arrow/util/concatenate.cc
index 9a77501..f0bbaff 100644
--- a/cpp/src/arrow/util/concatenate.cc
+++ b/cpp/src/arrow/util/concatenate.cc
@@ -25,6 +25,7 @@
 
 #include "arrow/array.h"
 #include "arrow/memory_pool.h"
+#include "arrow/status.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/visibility.h"
 #include "arrow/visitor_inline.h"
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 4802862..a405b46 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -23,6 +23,7 @@
 #include <cstring>
 #include <iomanip>
 #include <limits>
+#include <ostream>
 #include <sstream>
 #include <string>
 
@@ -434,4 +435,9 @@ Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
   return status;
 }
 
+std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
+  os << decimal.ToIntegerString();
+  return os;
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 3cb86d1..3a576d0 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -17,10 +17,9 @@
 
 #pragma once
 
-#include <array>
 #include <cstdint>
+#include <iosfwd>
 #include <limits>
-#include <sstream>
 #include <string>
 #include <type_traits>
 
@@ -123,10 +122,8 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
     return Status::OK();
   }
 
-  friend std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
-    os << decimal.ToIntegerString();
-    return os;
-  }
+  friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
+                                               const Decimal128& decimal);
 
  private:
   /// Converts internal error code to Status
diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc
index 5f7e292..6eb80a9 100644
--- a/cpp/src/arrow/util/io-util.cc
+++ b/cpp/src/arrow/util/io-util.cc
@@ -28,8 +28,9 @@
 #include <cerrno>
 #include <cstdint>
 #include <cstring>
-#include <sstream>
+#include <iostream>
 #include <string>
+#include <utility>
 
 #include <fcntl.h>
 #include <stdlib.h>
@@ -47,9 +48,19 @@
 
 // For filename conversion
 #if defined(_MSC_VER)
-#include <boost/system/system_error.hpp>  // NOLINT
 #include <codecvt>
 #include <locale>
+#include <stdexcept>
+#endif
+
+#if defined(_MSC_VER)
+#define USE_BOOST_FILESYSTEM 1
+#else
+#define USE_BOOST_FILESYSTEM 0
+#endif
+
+#if USE_BOOST_FILESYSTEM
+#include <boost/filesystem.hpp>  // NOLINT
 #endif
 
 // ----------------------------------------------------------------------
@@ -92,12 +103,162 @@
 
 #endif
 
-#include "arrow/status.h"
+#include "arrow/buffer.h"
 #include "arrow/util/io-util.h"
 
 namespace arrow {
+namespace io {
+
+//
+// StdoutStream implementation
+//
+
+StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StdoutStream::Close() { return Status::OK(); }
+
+bool StdoutStream::closed() const { return false; }
+
+Status StdoutStream::Tell(int64_t* position) const {
+  *position = pos_;
+  return Status::OK();
+}
+
+Status StdoutStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cout.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StderrStream implementation
+//
+
+StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StderrStream::Close() { return Status::OK(); }
+
+bool StderrStream::closed() const { return false; }
+
+Status StderrStream::Tell(int64_t* position) const {
+  *position = pos_;
+  return Status::OK();
+}
+
+Status StderrStream::Write(const void* data, int64_t nbytes) {
+  pos_ += nbytes;
+  std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
+  return Status::OK();
+}
+
+//
+// StdinStream implementation
+//
+
+StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+
+Status StdinStream::Close() { return Status::OK(); }
+
+bool StdinStream::closed() const { return false; }
+
+Status StdinStream::Tell(int64_t* position) const {
+  *position = pos_;
+  return Status::OK();
+}
+
+Status StdinStream::Read(int64_t nbytes, int64_t* bytes_read, void* out) {
+  std::cin.read(reinterpret_cast<char*>(out), nbytes);
+  if (std::cin) {
+    *bytes_read = nbytes;
+    pos_ += nbytes;
+  } else {
+    *bytes_read = 0;
+  }
+  return Status::OK();
+}
+
+Status StdinStream::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
+  std::shared_ptr<ResizableBuffer> buffer;
+  ARROW_RETURN_NOT_OK(AllocateResizableBuffer(nbytes, &buffer));
+  int64_t bytes_read;
+  ARROW_RETURN_NOT_OK(Read(nbytes, &bytes_read, buffer->mutable_data()));
+  ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+  buffer->ZeroPadding();
+  *out = buffer;
+  return Status::OK();
+}
+
+}  // namespace io
+
 namespace internal {
 
+//
+// PlatformFilename implementation
+//
+
+struct PlatformFilename::Impl {
+#if USE_BOOST_FILESYSTEM
+  ::boost::filesystem::path path;
+#else
+  std::string path;  // 8-bit Unix path
+#endif
+};
+
+PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
+
+PlatformFilename::~PlatformFilename() {}
+
+PlatformFilename::PlatformFilename(const PlatformFilename& other)
+    : impl_(new Impl{other.impl_->path}) {}
+
+PlatformFilename::PlatformFilename(PlatformFilename&& other)
+    : impl_(std::move(other.impl_)) {}
+
+PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
+  this->impl_.reset(new Impl{other.impl_->path});
+  return *this;
+}
+
+PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
+  this->impl_ = std::move(other.impl_);
+  return *this;
+}
+
+#if defined(_MSC_VER)
+PlatformFilename::PlatformFilename(const std::wstring& path) : impl_(new Impl{path}) {}
+#else
+PlatformFilename::PlatformFilename(const std::string& path) : impl_(new Impl{path}) {}
+#endif
+
+#if defined(_MSC_VER)
+const std::wstring& PlatformFilename::ToNative() const { return impl_->path.native(); }
+#elif USE_BOOST_FILESYSTEM
+const std::string& PlatformFilename::ToNative() const { return impl_->path.native(); }
+#else
+const std::string& PlatformFilename::ToNative() const { return impl_->path; }
+#endif
+
+#if USE_BOOST_FILESYSTEM
+std::string PlatformFilename::ToString() const { return impl_->path.string(); }
+#else
+std::string PlatformFilename::ToString() const { return impl_->path; }
+#endif
+
+Status PlatformFilename::FromString(const std::string& file_name, PlatformFilename* out) {
+#if defined(_MSC_VER)
+  try {
+    auto wpath =
+        std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>{}.from_bytes(file_name);
+    *out = PlatformFilename(std::move(wpath));
+  } catch (std::range_error& e) {
+    return Status::Invalid(e.what());
+  }
+#else
+  *out = PlatformFilename(file_name);
+#endif
+  return Status::OK();
+}
+
 #define CHECK_LSEEK(retval) \
   if ((retval) == -1) return Status::IOError("lseek failed");
 
@@ -113,7 +274,7 @@ static inline Status CheckFileOpResult(int ret, int errno_actual,
                                        const PlatformFilename& file_name,
                                        const char* opname) {
   if (ret == -1) {
-    return Status::IOError("Failed to ", opname, " file: ", file_name.string(),
+    return Status::IOError("Failed to ", opname, " file: ", file_name.ToString(),
                            " , error: ", std::strerror(errno_actual));
   }
   return Status::OK();
@@ -124,17 +285,7 @@ static inline Status CheckFileOpResult(int ret, int errno_actual,
 //
 
 Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
-#if defined(_MSC_VER)
-  try {
-    std::codecvt_utf8_utf16<wchar_t> utf16_converter;
-    out->assign(file_name, utf16_converter);
-  } catch (boost::system::system_error& e) {
-    return Status::Invalid(e.what());
-  }
-#else
-  *out = internal::PlatformFilename(file_name);
-#endif
-  return Status::OK();
+  return PlatformFilename::FromString(file_name, out);
 }
 
 //
@@ -144,11 +295,11 @@ Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
 Status FileOpenReadable(const PlatformFilename& file_name, int* fd) {
   int ret, errno_actual;
 #if defined(_MSC_VER)
-  errno_actual = _wsopen_s(fd, file_name.wstring().c_str(),
+  errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(),
                            _O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD);
   ret = *fd;
 #else
-  ret = *fd = open(file_name.c_str(), O_RDONLY | O_BINARY);
+  ret = *fd = open(file_name.ToNative().c_str(), O_RDONLY | O_BINARY);
   errno_actual = errno;
 #endif
 
@@ -161,10 +312,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
 
 #if defined(_MSC_VER)
   int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
-  int pmode = _S_IWRITE;
-  if (!write_only) {
-    pmode |= _S_IREAD;
-  }
+  int pmode = _S_IREAD | _S_IWRITE;
 
   if (truncate) {
     oflag |= _O_TRUNC;
@@ -179,7 +327,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
     oflag |= _O_RDWR;
   }
 
-  errno_actual = _wsopen_s(fd, file_name.wstring().c_str(), oflag, _SH_DENYNO, pmode);
+  errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(), oflag, _SH_DENYNO, pmode);
   ret = *fd;
 
 #else
@@ -198,7 +346,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
     oflag |= O_RDWR;
   }
 
-  ret = *fd = open(file_name.c_str(), oflag, ARROW_WRITE_SHMODE);
+  ret = *fd = open(file_name.ToNative().c_str(), oflag, ARROW_WRITE_SHMODE);
   errno_actual = errno;
 #endif
   return CheckFileOpResult(ret, errno_actual, file_name, "open local");
diff --git a/cpp/src/arrow/util/io-util.h b/cpp/src/arrow/util/io-util.h
index 7f8320a..1ecb790 100644
--- a/cpp/src/arrow/util/io-util.h
+++ b/cpp/src/arrow/util/io-util.h
@@ -18,40 +18,30 @@
 #ifndef ARROW_UTIL_IO_UTIL_H
 #define ARROW_UTIL_IO_UTIL_H
 
-#include <iostream>
 #include <memory>
 #include <string>
 
-#include "arrow/buffer.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/status.h"
 
-#if defined(_MSC_VER)
-#include <boost/filesystem.hpp>  // NOLINT
-#endif
-
 namespace arrow {
+
+class Buffer;
+
 namespace io {
 
 // Output stream that just writes to stdout.
 class ARROW_EXPORT StdoutStream : public OutputStream {
  public:
-  StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+  StdoutStream();
   ~StdoutStream() override {}
 
-  Status Close() override { return Status::OK(); }
-  bool closed() const override { return false; }
+  Status Close() override;
+  bool closed() const override;
 
-  Status Tell(int64_t* position) const override {
-    *position = pos_;
-    return Status::OK();
-  }
+  Status Tell(int64_t* position) const override;
 
-  Status Write(const void* data, int64_t nbytes) override {
-    pos_ += nbytes;
-    std::cout.write(reinterpret_cast<const char*>(data), nbytes);
-    return Status::OK();
-  }
+  Status Write(const void* data, int64_t nbytes) override;
 
  private:
   int64_t pos_;
@@ -60,22 +50,15 @@ class ARROW_EXPORT StdoutStream : public OutputStream {
 // Output stream that just writes to stderr.
 class ARROW_EXPORT StderrStream : public OutputStream {
  public:
-  StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+  StderrStream();
   ~StderrStream() override {}
 
-  Status Close() override { return Status::OK(); }
-  bool closed() const override { return false; }
+  Status Close() override;
+  bool closed() const override;
 
-  Status Tell(int64_t* position) const override {
-    *position = pos_;
-    return Status::OK();
-  }
+  Status Tell(int64_t* position) const override;
 
-  Status Write(const void* data, int64_t nbytes) override {
-    pos_ += nbytes;
-    std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
-    return Status::OK();
-  }
+  Status Write(const void* data, int64_t nbytes) override;
 
  private:
   int64_t pos_;
@@ -84,38 +67,17 @@ class ARROW_EXPORT StderrStream : public OutputStream {
 // Input stream that just reads from stdin.
 class ARROW_EXPORT StdinStream : public InputStream {
  public:
-  StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+  StdinStream();
   ~StdinStream() override {}
 
-  Status Close() override { return Status::OK(); }
-  bool closed() const override { return false; }
-
-  Status Tell(int64_t* position) const override {
-    *position = pos_;
-    return Status::OK();
-  }
-
-  Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override {
-    std::cin.read(reinterpret_cast<char*>(out), nbytes);
-    if (std::cin) {
-      *bytes_read = nbytes;
-      pos_ += nbytes;
-    } else {
-      *bytes_read = 0;
-    }
-    return Status::OK();
-  }
-
-  Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override {
-    std::shared_ptr<ResizableBuffer> buffer;
-    ARROW_RETURN_NOT_OK(AllocateResizableBuffer(nbytes, &buffer));
-    int64_t bytes_read;
-    ARROW_RETURN_NOT_OK(Read(nbytes, &bytes_read, buffer->mutable_data()));
-    ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
-    buffer->ZeroPadding();
-    *out = buffer;
-    return Status::OK();
-  }
+  Status Close() override;
+  bool closed() const override;
+
+  Status Tell(int64_t* position) const override;
+
+  Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override;
+
+  Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
 
  private:
   int64_t pos_;
@@ -125,26 +87,34 @@ class ARROW_EXPORT StdinStream : public InputStream {
 
 namespace internal {
 
-#if defined(_MSC_VER)
-// namespace fs = boost::filesystem;
-// #define PlatformFilename fs::path
-typedef ::boost::filesystem::path PlatformFilename;
+class ARROW_EXPORT PlatformFilename {
+ public:
+  ~PlatformFilename();
+  PlatformFilename();
+  PlatformFilename(const PlatformFilename&);
+  PlatformFilename(PlatformFilename&&);
+  PlatformFilename& operator=(const PlatformFilename&);
+  PlatformFilename& operator=(PlatformFilename&&);
 
+#if defined(_MSC_VER)
+  const std::wstring& ToNative() const;
 #else
+  const std::string& ToNative() const;
+#endif
+  std::string ToString() const;
 
-struct PlatformFilename {
-  PlatformFilename() {}
-  explicit PlatformFilename(const std::string& path) { utf8_path = path; }
-
-  const char* c_str() const { return utf8_path.c_str(); }
-
-  const std::string& string() const { return utf8_path; }
+  static Status FromString(const std::string& file_name, PlatformFilename* out);
 
-  size_t length() const { return utf8_path.size(); }
+ private:
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
 
-  std::string utf8_path;
-};
+#if defined(_MSC_VER)
+  explicit PlatformFilename(const std::wstring& path);
+#else
+  explicit PlatformFilename(const std::string& path);
 #endif
+};
 
 ARROW_EXPORT
 Status FileNameFromString(const std::string& file_name, PlatformFilename* out);
diff --git a/cpp/src/arrow/util/lazy-benchmark.cc b/cpp/src/arrow/util/lazy-benchmark.cc
index 19e6675..02c7de5 100644
--- a/cpp/src/arrow/util/lazy-benchmark.cc
+++ b/cpp/src/arrow/util/lazy-benchmark.cc
@@ -21,6 +21,7 @@
 
 #include <benchmark/benchmark.h>
 
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/lazy.h"
 
diff --git a/cpp/src/arrow/util/lazy-test.cc b/cpp/src/arrow/util/lazy-test.cc
index 07d591e..aec99d4 100644
--- a/cpp/src/arrow/util/lazy-test.cc
+++ b/cpp/src/arrow/util/lazy-test.cc
@@ -22,6 +22,7 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/lazy.h"
 
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 7b4ec65..999aca6 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -36,8 +36,8 @@
 
 #else  // !GANDIVA_IR
 
-#include <iostream>
 #include <memory>
+#include <ostream>
 #include <string>
 
 #include "arrow/util/macros.h"
@@ -153,19 +153,18 @@ class ARROW_EXPORT ArrowLogBase {
   }
 
  protected:
-  virtual std::ostream& Stream() { return std::cerr; }
+  virtual std::ostream& Stream() = 0;
 };
 
 class ARROW_EXPORT ArrowLog : public ArrowLogBase {
  public:
   ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity);
-
-  virtual ~ArrowLog();
+  ~ArrowLog() override;
 
   /// Return whether or not current logging instance is enabled.
   ///
   /// \return True if logging is enabled and false otherwise.
-  virtual bool IsEnabled() const;
+  bool IsEnabled() const override;
 
   /// The init function of arrow log for a program which should be called only once.
   ///
@@ -204,7 +203,7 @@ class ARROW_EXPORT ArrowLog : public ArrowLogBase {
   static ArrowLogLevel severity_threshold_;
 
  protected:
-  virtual std::ostream& Stream();
+  std::ostream& Stream() override;
 };
 
 // This class make ARROW_CHECK compilation pass to change the << operator to void.
diff --git a/cpp/src/arrow/util/parsing.h b/cpp/src/arrow/util/parsing.h
index 3d93ed2..20b749a 100644
--- a/cpp/src/arrow/util/parsing.h
+++ b/cpp/src/arrow/util/parsing.h
@@ -23,9 +23,7 @@
 #include <cassert>
 #include <chrono>
 #include <limits>
-#include <locale>
 #include <memory>
-#include <sstream>
 #include <string>
 #include <type_traits>
 
diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/util/string_builder.cc
similarity index 63%
copy from cpp/src/arrow/python/platform.h
copy to cpp/src/arrow/util/string_builder.cc
index ca9b553..6d5a161 100644
--- a/cpp/src/arrow/python/platform.h
+++ b/cpp/src/arrow/util/string_builder.cc
@@ -15,21 +15,26 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// Functions for converting between pandas's NumPy-based data representation
-// and Arrow data structures
+#include "arrow/util/string_builder.h"
 
-#ifndef ARROW_PYTHON_PLATFORM_H
-#define ARROW_PYTHON_PLATFORM_H
+#include <sstream>
 
-#include <iostream>
-#include <Python.h> // IWYU pragma: export
-#include <datetime.h>
+#include "arrow/util/stl.h"
 
-// Work around C2528 error
-#ifdef _MSC_VER
-#if _MSC_VER >= 1900
-#undef timezone
-#endif
-#endif
+namespace arrow {
 
-#endif  // ARROW_PYTHON_PLATFORM_H
+using internal::make_unique;
+
+namespace util {
+namespace detail {
+
+StringStreamWrapper::StringStreamWrapper()
+    : sstream_(make_unique<std::ostringstream>()), ostream_(*sstream_) {}
+
+StringStreamWrapper::~StringStreamWrapper() {}
+
+std::string StringStreamWrapper::str() { return sstream_->str(); }
+
+}  // namespace detail
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/string_builder.h b/cpp/src/arrow/util/string_builder.h
index 7b3e107..9129f12 100644
--- a/cpp/src/arrow/util/string_builder.h
+++ b/cpp/src/arrow/util/string_builder.h
@@ -18,31 +18,49 @@
 #ifndef ARROW_UTIL_STRING_BUILDER_H
 #define ARROW_UTIL_STRING_BUILDER_H
 
-#include <sstream>
+#include <memory>
+#include <ostream>
 #include <string>
 #include <utility>
 
+#include "arrow/util/visibility.h"
+
 namespace arrow {
 namespace util {
 
+namespace detail {
+
+class ARROW_EXPORT StringStreamWrapper {
+ public:
+  StringStreamWrapper();
+  ~StringStreamWrapper();
+
+  std::ostream& stream() { return ostream_; }
+  std::string str();
+
+ protected:
+  std::unique_ptr<std::ostringstream> sstream_;
+  std::ostream& ostream_;
+};
+
+}  // namespace detail
+
 template <typename Head>
-void StringBuilderRecursive(std::stringstream& stream, Head&& head) {
+void StringBuilderRecursive(std::ostream& stream, Head&& head) {
   stream << head;
 }
 
 template <typename Head, typename... Tail>
-void StringBuilderRecursive(std::stringstream& stream, Head&& head, Tail&&... tail) {
+void StringBuilderRecursive(std::ostream& stream, Head&& head, Tail&&... tail) {
   StringBuilderRecursive(stream, std::forward<Head>(head));
   StringBuilderRecursive(stream, std::forward<Tail>(tail)...);
 }
 
 template <typename... Args>
 std::string StringBuilder(Args&&... args) {
-  std::stringstream stream;
-
-  StringBuilderRecursive(stream, std::forward<Args>(args)...);
-
-  return stream.str();
+  detail::StringStreamWrapper ss;
+  StringBuilderRecursive(ss.stream(), std::forward<Args>(args)...);
+  return ss.str();
 }
 
 }  // namespace util
diff --git a/cpp/src/arrow/util/thread-pool.cc b/cpp/src/arrow/util/thread-pool.cc
index 17ad9c4..6969f3f 100644
--- a/cpp/src/arrow/util/thread-pool.cc
+++ b/cpp/src/arrow/util/thread-pool.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <condition_variable>
 #include <deque>
+#include <list>
 #include <mutex>
 #include <string>
 #include <thread>
@@ -53,6 +54,65 @@ struct ThreadPool::State {
   bool quick_shutdown_;
 };
 
+// The worker loop is an independent function so that it can keep running
+// after the ThreadPool is destroyed.
+static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
+                       std::list<std::thread>::iterator it) {
+  std::unique_lock<std::mutex> lock(state->mutex_);
+
+  // Since we hold the lock, `it` now points to the correct thread object
+  // (LaunchWorkersUnlocked has exited)
+  DCHECK_EQ(std::this_thread::get_id(), it->get_id());
+
+  // If too many threads, we should secede from the pool
+  const auto should_secede = [&]() -> bool {
+    return state->workers_.size() > static_cast<size_t>(state->desired_capacity_);
+  };
+
+  while (true) {
+    // By the time this thread is started, some tasks may have been pushed
+    // or shutdown could even have been requested.  So we only wait on the
+    // condition variable at the end of the loop.
+
+    // Execute pending tasks if any
+    while (!state->pending_tasks_.empty() && !state->quick_shutdown_) {
+      // We check this opportunistically at each loop iteration since
+      // it releases the lock below.
+      if (should_secede()) {
+        break;
+      }
+      {
+        std::function<void()> task = std::move(state->pending_tasks_.front());
+        state->pending_tasks_.pop_front();
+        lock.unlock();
+        task();
+      }
+      lock.lock();
+    }
+    // Now either the queue is empty *or* a quick shutdown was requested
+    if (state->please_shutdown_ || should_secede()) {
+      break;
+    }
+    // Wait for next wakeup
+    state->cv_.wait(lock);
+  }
+
+  // We're done.  Move our thread object to the trashcan of finished
+  // workers.  This has two motivations:
+  // 1) the thread object doesn't get destroyed before this function finishes
+  //    (but we could call thread::detach() instead)
+  // 2) we can explicitly join() the trashcan threads to make sure all OS threads
+  //    are exited before the ThreadPool is destroyed.  Otherwise subtle
+  //    timing conditions can lead to false positives with Valgrind.
+  DCHECK_EQ(std::this_thread::get_id(), it->get_id());
+  state->finished_workers_.push_back(std::move(*it));
+  state->workers_.erase(it);
+  if (state->please_shutdown_) {
+    // Notify the function waiting in Shutdown().
+    state->cv_shutdown_.notify_one();
+  }
+}
+
 ThreadPool::ThreadPool()
     : sp_state_(std::make_shared<ThreadPool::State>()),
       state_(sp_state_.get()),
@@ -166,63 +226,6 @@ void ThreadPool::LaunchWorkersUnlocked(int threads) {
   }
 }
 
-void ThreadPool::WorkerLoop(std::shared_ptr<State> state,
-                            std::list<std::thread>::iterator it) {
-  std::unique_lock<std::mutex> lock(state->mutex_);
-
-  // Since we hold the lock, `it` now points to the correct thread object
-  // (LaunchWorkersUnlocked has exited)
-  DCHECK_EQ(std::this_thread::get_id(), it->get_id());
-
-  // If too many threads, we should secede from the pool
-  const auto should_secede = [&]() -> bool {
-    return state->workers_.size() > static_cast<size_t>(state->desired_capacity_);
-  };
-
-  while (true) {
-    // By the time this thread is started, some tasks may have been pushed
-    // or shutdown could even have been requested.  So we only wait on the
-    // condition variable at the end of the loop.
-
-    // Execute pending tasks if any
-    while (!state->pending_tasks_.empty() && !state->quick_shutdown_) {
-      // We check this opportunistically at each loop iteration since
-      // it releases the lock below.
-      if (should_secede()) {
-        break;
-      }
-      {
-        std::function<void()> task = std::move(state->pending_tasks_.front());
-        state->pending_tasks_.pop_front();
-        lock.unlock();
-        task();
-      }
-      lock.lock();
-    }
-    // Now either the queue is empty *or* a quick shutdown was requested
-    if (state->please_shutdown_ || should_secede()) {
-      break;
-    }
-    // Wait for next wakeup
-    state->cv_.wait(lock);
-  }
-
-  // We're done.  Move our thread object to the trashcan of finished
-  // workers.  This has two motivations:
-  // 1) the thread object doesn't get destroyed before this function finishes
-  //    (but we could call thread::detach() instead)
-  // 2) we can explicitly join() the trashcan threads to make sure all OS threads
-  //    are exited before the ThreadPool is destroyed.  Otherwise subtle
-  //    timing conditions can lead to false positives with Valgrind.
-  DCHECK_EQ(std::this_thread::get_id(), it->get_id());
-  state->finished_workers_.push_back(std::move(*it));
-  state->workers_.erase(it);
-  if (state->please_shutdown_) {
-    // Notify the function waiting in Shutdown().
-    state->cv_shutdown_.notify_one();
-  }
-}
-
 Status ThreadPool::SpawnReal(std::function<void()> task) {
   {
     ProtectAgainstFork();
diff --git a/cpp/src/arrow/util/thread-pool.h b/cpp/src/arrow/util/thread-pool.h
index f18cfeb..2de212e 100644
--- a/cpp/src/arrow/util/thread-pool.h
+++ b/cpp/src/arrow/util/thread-pool.h
@@ -25,11 +25,8 @@
 #include <cstdlib>
 #include <functional>
 #include <future>
-#include <iostream>
-#include <list>
 #include <memory>
 #include <string>
-#include <thread>
 #include <type_traits>
 #include <utility>
 
@@ -127,20 +124,18 @@ class ARROW_EXPORT ThreadPool {
 
     Status st = SpawnReal(detail::packaged_task_wrapper<Result>(std::move(task)));
     if (!st.ok()) {
-      // This happens when Submit() is called after Shutdown()
-      std::cerr << st.ToString() << std::endl;
-      std::abort();
+      st.Abort("ThreadPool::Submit() was probably called after Shutdown()");
     }
     return fut;
   }
 
+  struct State;
+
  protected:
   FRIEND_TEST(TestThreadPool, SetCapacity);
   FRIEND_TEST(TestGlobalThreadPool, Capacity);
   friend ARROW_EXPORT ThreadPool* GetCpuThreadPool();
 
-  struct State;
-
   ThreadPool();
 
   ARROW_DISALLOW_COPY_AND_ASSIGN(ThreadPool);
@@ -155,11 +150,6 @@ class ARROW_EXPORT ThreadPool {
   // Reinitialize the thread pool if the pid changed
   void ProtectAgainstFork();
 
-  // The worker loop is a static method so that it can keep running
-  // after the ThreadPool is destroyed
-  static void WorkerLoop(std::shared_ptr<State> state,
-                         std::list<std::thread>::iterator it);
-
   static std::shared_ptr<ThreadPool> MakeCpuThreadPool();
 
   std::shared_ptr<State> sp_state_;
diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h
index 4699238..01bf442 100644
--- a/cpp/src/arrow/visitor_inline.h
+++ b/cpp/src/arrow/visitor_inline.h
@@ -24,7 +24,6 @@
 #include "arrow/extension_type.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
-#include "arrow/tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
diff --git a/cpp/src/gandiva/tests/literal_test.cc b/cpp/src/gandiva/tests/literal_test.cc
index 53323cb..b5ffff0 100644
--- a/cpp/src/gandiva/tests/literal_test.cc
+++ b/cpp/src/gandiva/tests/literal_test.cc
@@ -197,7 +197,7 @@ TEST_F(TestLiteral, TestNullLiteralInIf) {
   auto res = field("res", float64());
 
   auto node_a = TreeExprBuilder::MakeField(field_a);
-  auto literal_5 = TreeExprBuilder::MakeLiteral((double_t)5);
+  auto literal_5 = TreeExprBuilder::MakeLiteral(5.0);
   auto a_gt_5 = TreeExprBuilder::MakeFunction("greater_than", {node_a, literal_5},
                                               arrow::boolean());
   auto literal_null = TreeExprBuilder::MakeNull(arrow::float64());
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index e66f9e7..238fbe2 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -15,9 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "gandiva/projector.h"
+#include <cmath>
+
 #include <gtest/gtest.h>
+
 #include "arrow/memory_pool.h"
+
+#include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"
 
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index d9fd2d3..84ace3f 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -354,16 +354,6 @@ void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual
   }
 }
 
-void PrintColumn(const Column& col, std::stringstream* ss) {
-  const ChunkedArray& carr = *col.data();
-  for (int i = 0; i < carr.num_chunks(); ++i) {
-    auto c1 = carr.chunk(i);
-    *ss << "Chunk " << i << std::endl;
-    ARROW_EXPECT_OK(::arrow::PrettyPrint(*c1, 0, ss));
-    *ss << std::endl;
-  }
-}
-
 void DoSimpleRoundtrip(const std::shared_ptr<Table>& table, bool use_threads,
                        int64_t row_group_size, const std::vector<int>& column_subset,
                        std::shared_ptr<Table>* out,
diff --git a/cpp/src/parquet/arrow/test-util.h b/cpp/src/parquet/arrow/test-util.h
index b963c3e..b99e28f 100644
--- a/cpp/src/parquet/arrow/test-util.h
+++ b/cpp/src/parquet/arrow/test-util.h
@@ -26,6 +26,7 @@
 
 #include "arrow/api.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"