You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/18 15:38:08 UTC
[arrow] branch master updated: ARROW-5102: [C++] Reduce header
dependencies
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 7a55621 ARROW-5102: [C++] Reduce header dependencies
7a55621 is described below
commit 7a5562174cffb21b16f990f64d114c1a94a30556
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Sat May 18 10:37:55 2019 -0500
ARROW-5102: [C++] Reduce header dependencies
This is a first attempt at making our headers lighter-weight.
The benefits are currently meagre: I get a 4% speedup when compiling Arrow + Parquet.
Author: Antoine Pitrou <an...@python.org>
Closes #4313 from pitrou/ARROW-5102-reduce-header-deps and squashes the following commits:
4937d9fd2 <Antoine Pitrou> ARROW-5102: Reduce header dependencies
---
c_glib/arrow-glib/compute.cpp | 2 +
cpp/src/arrow/CMakeLists.txt | 2 +
cpp/src/arrow/adapters/orc/adapter.cc | 1 +
cpp/src/arrow/adapters/orc/adapter_util.cc | 1 +
cpp/src/arrow/api.h | 31 ++--
cpp/src/arrow/array-test.cc | 1 +
cpp/src/arrow/array/builder_base.cc | 1 -
cpp/src/arrow/array/builder_binary.cc | 1 -
cpp/src/arrow/array/builder_binary.h | 1 -
cpp/src/arrow/array/builder_decimal.cc | 1 -
cpp/src/arrow/array/builder_dict.cc | 1 -
cpp/src/arrow/array/builder_nested.cc | 1 -
cpp/src/arrow/array/builder_primitive.cc | 1 -
cpp/src/arrow/buffer-builder.h | 13 --
cpp/src/arrow/buffer.cc | 1 +
cpp/src/arrow/buffer.h | 1 -
cpp/src/arrow/builder.cc | 1 -
cpp/src/arrow/compute/compute-benchmark.cc | 1 +
cpp/src/arrow/compute/kernels/boolean-test.cc | 2 +-
cpp/src/arrow/csv/parser.cc | 1 -
cpp/src/arrow/csv/test-common.h | 7 +-
cpp/src/arrow/extension_type-test.cc | 1 +
cpp/src/arrow/extension_type.cc | 1 +
cpp/src/arrow/flight/perf-server.cc | 6 +-
cpp/src/arrow/flight/test-integration-client.cc | 2 +
cpp/src/arrow/flight/test-server.cc | 1 +
cpp/src/arrow/gpu/cuda_common.h | 9 -
cpp/src/arrow/io/compressed-test.cc | 1 +
cpp/src/arrow/io/file-test.cc | 7 +-
cpp/src/arrow/io/memory.cc | 4 +
cpp/src/arrow/io/memory.h | 4 +-
cpp/src/arrow/io/{test-common.h => test-common.cc} | 63 +++----
cpp/src/arrow/io/test-common.h | 121 +++----------
cpp/src/arrow/ipc/json-internal.h | 1 -
cpp/src/arrow/ipc/json-test.cc | 2 +-
cpp/src/arrow/ipc/metadata-internal.cc | 1 +
cpp/src/arrow/ipc/read-write-test.cc | 3 +
cpp/src/arrow/json/parser.h | 1 +
cpp/src/arrow/memory_pool.cc | 1 -
cpp/src/arrow/pretty_print.h | 2 +-
cpp/src/arrow/python/arrow_to_pandas.cc | 1 -
cpp/src/arrow/python/common.cc | 2 +-
cpp/src/arrow/python/common.h | 2 -
cpp/src/arrow/python/deserialize.cc | 1 -
cpp/src/arrow/python/inference.cc | 1 -
cpp/src/arrow/python/inference.h | 2 -
cpp/src/arrow/python/numpy-internal.h | 1 +
cpp/src/arrow/python/numpy_convert.cc | 1 -
cpp/src/arrow/python/numpy_to_arrow.cc | 1 -
cpp/src/arrow/python/platform.h | 1 -
cpp/src/arrow/python/util/datetime.h | 1 -
cpp/src/arrow/record_batch.cc | 8 +-
cpp/src/arrow/record_batch.h | 14 +-
cpp/src/arrow/status.cc | 16 +-
cpp/src/arrow/status.h | 3 +
cpp/src/arrow/table.cc | 1 -
cpp/src/arrow/tensor.cc | 1 +
cpp/src/arrow/testing/gtest_util.cc | 38 ++--
cpp/src/arrow/testing/gtest_util.h | 57 ++----
cpp/src/arrow/testing/random.h | 34 ++++
cpp/src/arrow/testing/util.cc | 9 +
cpp/src/arrow/testing/util.h | 72 +-------
cpp/src/arrow/type-test.cc | 1 +
cpp/src/arrow/type.cc | 123 ++++++++++---
cpp/src/arrow/type.h | 60 +++----
cpp/src/arrow/type_fwd.h | 1 +
cpp/src/arrow/util/compression_brotli.cc | 1 -
cpp/src/arrow/util/compression_lz4.cc | 1 -
cpp/src/arrow/util/compression_snappy.cc | 1 -
cpp/src/arrow/util/compression_zlib.cc | 1 -
cpp/src/arrow/util/compression_zstd.cc | 1 -
cpp/src/arrow/util/concatenate.cc | 1 +
cpp/src/arrow/util/decimal.cc | 6 +
cpp/src/arrow/util/decimal.h | 9 +-
cpp/src/arrow/util/io-util.cc | 194 ++++++++++++++++++---
cpp/src/arrow/util/io-util.h | 118 +++++--------
cpp/src/arrow/util/lazy-benchmark.cc | 1 +
cpp/src/arrow/util/lazy-test.cc | 1 +
cpp/src/arrow/util/logging.h | 11 +-
cpp/src/arrow/util/parsing.h | 2 -
.../{python/platform.h => util/string_builder.cc} | 33 ++--
cpp/src/arrow/util/string_builder.h | 34 +++-
cpp/src/arrow/util/thread-pool.cc | 117 +++++++------
cpp/src/arrow/util/thread-pool.h | 16 +-
cpp/src/arrow/visitor_inline.h | 1 -
cpp/src/gandiva/tests/literal_test.cc | 2 +-
cpp/src/gandiva/tests/projector_test.cc | 6 +-
cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 10 --
cpp/src/parquet/arrow/test-util.h | 1 +
89 files changed, 683 insertions(+), 640 deletions(-)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 73adccb..c97485f 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -21,6 +21,8 @@
# include <config.h>
#endif
+#include <sstream>
+
#include <arrow-glib/array.hpp>
#include <arrow-glib/compute.hpp>
#include <arrow-glib/data-type.hpp>
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index d60514d..619c9eb 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -132,6 +132,7 @@ set(ARROW_SRCS
util/logging.cc
util/key_value_metadata.cc
util/memory.cc
+ util/string_builder.cc
util/task-group.cc
util/thread-pool.cc
util/trie.cc
@@ -283,6 +284,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
# that depend on gtest
add_arrow_lib(arrow_testing
SOURCES
+ io/test-common.cc
ipc/test-common.cc
filesystem/test-util.cc
testing/gtest_util.cc
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index a4311bb..d72c16a 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -40,6 +40,7 @@
#include "arrow/util/bit-util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
+#include "arrow/util/key_value_metadata.h"
#include "arrow/util/lazy.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
diff --git a/cpp/src/arrow/adapters/orc/adapter_util.cc b/cpp/src/arrow/adapters/orc/adapter_util.cc
index 235e5ba..313c029 100644
--- a/cpp/src/arrow/adapters/orc/adapter_util.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_util.cc
@@ -21,6 +21,7 @@
#include "arrow/adapters/orc/adapter_util.h"
#include "arrow/array/builder_base.h"
#include "arrow/builder.h"
+#include "arrow/status.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/lazy.h"
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 7c8b10a..3d6a179 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -20,21 +20,22 @@
#ifndef ARROW_API_H
#define ARROW_API_H
-#include "arrow/array.h" // IYWU pragma: export
-#include "arrow/buffer.h" // IYWU pragma: export
-#include "arrow/builder.h" // IYWU pragma: export
-#include "arrow/compare.h" // IYWU pragma: export
-#include "arrow/extension_type.h" // IYWU pragma: export
-#include "arrow/memory_pool.h" // IYWU pragma: export
-#include "arrow/pretty_print.h" // IYWU pragma: export
-#include "arrow/record_batch.h" // IYWU pragma: export
-#include "arrow/status.h" // IYWU pragma: export
-#include "arrow/table.h" // IYWU pragma: export
-#include "arrow/table_builder.h" // IYWU pragma: export
-#include "arrow/tensor.h" // IYWU pragma: export
-#include "arrow/type.h" // IYWU pragma: export
-#include "arrow/util/config.h" // IYWU pragma: export
-#include "arrow/visitor.h" // IYWU pragma: export
+#include "arrow/array.h" // IYWU pragma: export
+#include "arrow/buffer.h" // IYWU pragma: export
+#include "arrow/builder.h" // IYWU pragma: export
+#include "arrow/compare.h" // IYWU pragma: export
+#include "arrow/extension_type.h" // IYWU pragma: export
+#include "arrow/memory_pool.h" // IYWU pragma: export
+#include "arrow/pretty_print.h" // IYWU pragma: export
+#include "arrow/record_batch.h" // IYWU pragma: export
+#include "arrow/status.h" // IYWU pragma: export
+#include "arrow/table.h" // IYWU pragma: export
+#include "arrow/table_builder.h" // IYWU pragma: export
+#include "arrow/tensor.h" // IYWU pragma: export
+#include "arrow/type.h" // IYWU pragma: export
+#include "arrow/util/config.h" // IYWU pragma: export
+#include "arrow/util/key_value_metadata.h" // IWYU pragma: export
+#include "arrow/visitor.h" // IYWU pragma: export
/// \brief Top-level namespace for Apache Arrow C++ API
namespace arrow {}
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 2d1ab48..a6a20c0 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -38,6 +38,7 @@
#include "arrow/record_batch.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_common.h"
+#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index fb13a88..2282937 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -21,7 +21,6 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <sstream>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 4a8ea40..88c2e86 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -22,7 +22,6 @@
#include <cstdint>
#include <cstring>
#include <numeric>
-#include <sstream>
#include <string>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index facaf4a..a04e308 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -19,7 +19,6 @@
#include <limits>
#include <memory>
-#include <sstream>
#include <string>
#include <vector>
diff --git a/cpp/src/arrow/array/builder_decimal.cc b/cpp/src/arrow/array/builder_decimal.cc
index 191a0ff..6a46556 100644
--- a/cpp/src/arrow/array/builder_decimal.cc
+++ b/cpp/src/arrow/array/builder_decimal.cc
@@ -23,7 +23,6 @@
#include <cstring>
#include <memory>
#include <numeric>
-#include <sstream>
#include <string>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc
index e4267bf..648b6ff 100644
--- a/cpp/src/arrow/array/builder_dict.cc
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -19,7 +19,6 @@
#include <cstdint>
#include <limits>
-#include <sstream>
#include <type_traits>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
index 9ef0f4d..dd88a7a 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -21,7 +21,6 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <sstream>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/array/builder_primitive.cc b/cpp/src/arrow/array/builder_primitive.cc
index 13e8f2e..d4def92 100644
--- a/cpp/src/arrow/array/builder_primitive.cc
+++ b/cpp/src/arrow/array/builder_primitive.cc
@@ -21,7 +21,6 @@
#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <sstream>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
index 32d7804..376e078 100644
--- a/cpp/src/arrow/buffer-builder.h
+++ b/cpp/src/arrow/buffer-builder.h
@@ -19,7 +19,6 @@
#define ARROW_BUFFER_BUILDER_H
#include <algorithm>
-#include <array>
#include <cstdint>
#include <cstring>
#include <memory>
@@ -110,18 +109,6 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}
- /// \brief Append the given data to the buffer
- ///
- /// The buffer is automatically expanded if necessary.
- template <size_t NBYTES>
- Status Append(const std::array<uint8_t, NBYTES>& data) {
- constexpr auto nbytes = static_cast<int64_t>(NBYTES);
- ARROW_RETURN_NOT_OK(Reserve(NBYTES));
- std::copy(data.cbegin(), data.cend(), data_ + size_);
- size_ += nbytes;
- return Status::OK();
- }
-
// Advance pointer and zero out memory
Status Advance(const int64_t length) { return Append(length, 0); }
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 9e9bd2e..e93333e 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -17,6 +17,7 @@
#include "arrow/buffer.h"
+#include <algorithm>
#include <cstdint>
#include <utility>
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 20a7969..07b2f09 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -18,7 +18,6 @@
#ifndef ARROW_BUFFER_H
#define ARROW_BUFFER_H
-#include <algorithm>
#include <cstdint>
#include <cstring>
#include <memory>
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 2bf6178..2a3a1ad 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -17,7 +17,6 @@
#include "arrow/builder.h"
-#include <sstream>
#include <string>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc
index 6412c54..c14f706 100644
--- a/cpp/src/arrow/compute/compute-benchmark.cc
+++ b/cpp/src/arrow/compute/compute-benchmark.cc
@@ -22,6 +22,7 @@
#include "arrow/builder.h"
#include "arrow/memory_pool.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/compute/context.h"
diff --git a/cpp/src/arrow/compute/kernels/boolean-test.cc b/cpp/src/arrow/compute/kernels/boolean-test.cc
index 824a0d5..1b678bb 100644
--- a/cpp/src/arrow/compute/kernels/boolean-test.cc
+++ b/cpp/src/arrow/compute/kernels/boolean-test.cc
@@ -122,7 +122,7 @@ TEST_F(TestBooleanKernel, Invert) {
ASSERT_OK(Invert(&this->ctx_, ca1, &result));
ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind());
std::shared_ptr<ChunkedArray> result_ca = result.chunked_array();
- ASSERT_ARRAYS_EQUAL(*ca2, *result_ca);
+ AssertChunkedEqual(*ca2, *result_ca);
}
TEST_F(TestBooleanKernel, InvertEmptyArray) {
diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index b1d175a..a7ca71c 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -19,7 +19,6 @@
#include <algorithm>
#include <cstdio>
-#include <sstream>
#include <utility>
#include "arrow/memory_pool.h"
diff --git a/cpp/src/arrow/csv/test-common.h b/cpp/src/arrow/csv/test-common.h
index a8fdb43..624023f 100644
--- a/cpp/src/arrow/csv/test-common.h
+++ b/cpp/src/arrow/csv/test-common.h
@@ -19,7 +19,6 @@
#define ARROW_CSV_TEST_COMMON_H
#include <memory>
-#include <sstream>
#include <string>
#include <vector>
@@ -30,11 +29,11 @@ namespace arrow {
namespace csv {
std::string MakeCSVData(std::vector<std::string> lines) {
- std::stringstream ss;
+ std::string s;
for (const auto& line : lines) {
- ss << line;
+ s += line;
}
- return ss.str();
+ return s;
}
// Make a BlockParser from a vector of lines representing a CSV file
diff --git a/cpp/src/arrow/extension_type-test.cc b/cpp/src/arrow/extension_type-test.cc
index 1c917ea..90f96cd 100644
--- a/cpp/src/arrow/extension_type-test.cc
+++ b/cpp/src/arrow/extension_type-test.cc
@@ -42,6 +42,7 @@
#include "arrow/testing/gtest_common.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
+#include "arrow/util/key_value_metadata.h"
namespace arrow {
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 5f52ffa..5a63459 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -25,6 +25,7 @@
#include <utility>
#include "arrow/array.h"
+#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/visibility.h"
diff --git a/cpp/src/arrow/flight/perf-server.cc b/cpp/src/arrow/flight/perf-server.cc
index b2c268b..3755f3d 100644
--- a/cpp/src/arrow/flight/perf-server.cc
+++ b/cpp/src/arrow/flight/perf-server.cc
@@ -29,7 +29,9 @@
#include "arrow/io/test-common.h"
#include "arrow/ipc/writer.h"
#include "arrow/record_batch.h"
+#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
+#include "arrow/util/logging.h"
#include "arrow/flight/api.h"
#include "arrow/flight/internal.h"
@@ -125,8 +127,10 @@ Status GetPerfBatches(const perf::Token& token, const std::shared_ptr<Schema>& s
const int32_t length = token.definition().records_per_batch();
const int32_t ncolumns = 4;
for (int i = 0; i < ncolumns; ++i) {
- RETURN_NOT_OK(MakeRandomBuffer<int64_t>(length, default_memory_pool(), &buffer));
+ RETURN_NOT_OK(MakeRandomByteBuffer(length * sizeof(int64_t), default_memory_pool(),
+ &buffer, static_cast<int32_t>(i) /* seed */));
arrays.push_back(std::make_shared<Int64Array>(length, buffer));
+ RETURN_NOT_OK(ValidateArray(*arrays.back()));
}
*data_stream = std::unique_ptr<FlightDataStream>(
diff --git a/cpp/src/arrow/flight/test-integration-client.cc b/cpp/src/arrow/flight/test-integration-client.cc
index 66af90a..93a1a16 100644
--- a/cpp/src/arrow/flight/test-integration-client.cc
+++ b/cpp/src/arrow/flight/test-integration-client.cc
@@ -27,12 +27,14 @@
#include <gflags/gflags.h>
+#include "arrow/io/file.h"
#include "arrow/io/test-common.h"
#include "arrow/ipc/dictionary.h"
#include "arrow/ipc/json-integration.h"
#include "arrow/ipc/writer.h"
#include "arrow/record_batch.h"
#include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
#include "arrow/util/logging.h"
#include "arrow/flight/api.h"
diff --git a/cpp/src/arrow/flight/test-server.cc b/cpp/src/arrow/flight/test-server.cc
index a995bc6..a9070a4 100644
--- a/cpp/src/arrow/flight/test-server.cc
+++ b/cpp/src/arrow/flight/test-server.cc
@@ -25,6 +25,7 @@
#include <gflags/gflags.h>
+#include "arrow/buffer.h"
#include "arrow/io/test-common.h"
#include "arrow/record_batch.h"
#include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/gpu/cuda_common.h b/cpp/src/arrow/gpu/cuda_common.h
index 2b630c8..87371ce 100644
--- a/cpp/src/arrow/gpu/cuda_common.h
+++ b/cpp/src/arrow/gpu/cuda_common.h
@@ -20,20 +20,11 @@
#ifndef ARROW_GPU_CUDA_COMMON_H
#define ARROW_GPU_CUDA_COMMON_H
-#include <sstream>
-
#include <cuda.h>
namespace arrow {
namespace cuda {
-#define CUDA_DCHECK(STMT) \
- do { \
- int ret = (STMT); \
- DCHECK_EQ(0, ret); \
- (void)ret; \
- } while (0)
-
#define CU_RETURN_NOT_OK(STMT) \
do { \
CUresult ret = (STMT); \
diff --git a/cpp/src/arrow/io/compressed-test.cc b/cpp/src/arrow/io/compressed-test.cc
index acc57cc..fb4dcac 100644
--- a/cpp/src/arrow/io/compressed-test.cc
+++ b/cpp/src/arrow/io/compressed-test.cc
@@ -27,6 +27,7 @@
#include "arrow/io/memory.h"
#include "arrow/io/test-common.h"
#include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/util/compression.h"
diff --git a/cpp/src/arrow/io/file-test.cc b/cpp/src/arrow/io/file-test.cc
index cba006e..c548785 100644
--- a/cpp/src/arrow/io/file-test.cc
+++ b/cpp/src/arrow/io/file-test.cc
@@ -25,8 +25,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <fstream>
#include <memory>
-#include <sstream> // IWYU pragma: keep
#include <string>
#include <thread>
#include <vector>
@@ -39,6 +39,7 @@
#include "arrow/io/test-common.h"
#include "arrow/memory_pool.h"
#include "arrow/status.h"
+#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/util/io-util.h"
@@ -95,7 +96,7 @@ class TestFileOutputStream : public FileTestFixture {
#if defined(_MSC_VER)
TEST_F(TestFileOutputStream, FileNameWideCharConversionRangeException) {
std::shared_ptr<FileOutputStream> file;
- // Form literal string with non-ASCII symbol(127 + 1)
+ // Invalid utf-8 filename
std::string file_name = "\x80";
ASSERT_RAISES(Invalid, FileOutputStream::Open(file_name, &file));
@@ -105,6 +106,8 @@ TEST_F(TestFileOutputStream, FileNameWideCharConversionRangeException) {
std::shared_ptr<ReadableFile> rd_file;
ASSERT_RAISES(Invalid, ReadableFile::Open(file_name, &rd_file));
}
+
+// TODO add a test with a valid utf-8 filename
#endif
TEST_F(TestFileOutputStream, DestructorClosesFile) {
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 9c889e7..f8b8a13 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -275,6 +275,10 @@ BufferReader::BufferReader(const uint8_t* data, int64_t size)
BufferReader::BufferReader(const Buffer& buffer)
: BufferReader(buffer.data(), buffer.size()) {}
+BufferReader::BufferReader(const util::string_view& data)
+ : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
+ static_cast<int64_t>(data.size())) {}
+
Status BufferReader::Close() {
is_open_ = false;
return Status::OK();
diff --git a/cpp/src/arrow/io/memory.h b/cpp/src/arrow/io/memory.h
index f5a62cb..878d9bc 100644
--- a/cpp/src/arrow/io/memory.h
+++ b/cpp/src/arrow/io/memory.h
@@ -136,9 +136,7 @@ class ARROW_EXPORT BufferReader : public RandomAccessFile {
/// \brief Instantiate from std::string or arrow::util::string_view. Does not
/// own data
- explicit BufferReader(const util::string_view& data)
- : BufferReader(reinterpret_cast<const uint8_t*>(data.data()),
- static_cast<int64_t>(data.size())) {}
+ explicit BufferReader(const util::string_view& data);
Status Close() override;
bool closed() const override;
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.cc
similarity index 64%
copy from cpp/src/arrow/io/test-common.h
copy to cpp/src/arrow/io/test-common.cc
index 6ae827c..8648f2e 100644
--- a/cpp/src/arrow/io/test-common.h
+++ b/cpp/src/arrow/io/test-common.cc
@@ -15,15 +15,11 @@
// specific language governing permissions and limitations
// under the License.
-#ifndef ARROW_IO_TEST_COMMON_H
-#define ARROW_IO_TEST_COMMON_H
+#include "arrow/io/test-common.h"
#include <algorithm>
#include <cstdint>
#include <fstream> // IWYU pragma: keep
-#include <memory>
-#include <string>
-#include <vector>
#ifdef _WIN32
#include <crtdbg.h>
@@ -41,8 +37,7 @@
namespace arrow {
namespace io {
-static inline void AssertFileContents(const std::string& path,
- const std::string& contents) {
+void AssertFileContents(const std::string& path, const std::string& contents) {
std::shared_ptr<ReadableFile> rf;
int64_t size;
@@ -55,20 +50,18 @@ static inline void AssertFileContents(const std::string& path,
ASSERT_TRUE(actual_data->Equals(Buffer(contents)));
}
-static inline bool FileExists(const std::string& path) {
- return std::ifstream(path.c_str()).good();
-}
+bool FileExists(const std::string& path) { return std::ifstream(path.c_str()).good(); }
#if defined(_WIN32)
-static inline void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
- const wchar_t* source_file,
- unsigned int source_line, uintptr_t reserved) {
+static void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
+ const wchar_t* source_file, unsigned int source_line,
+ uintptr_t reserved) {
wprintf(L"Invalid parameter in function '%s'. Source: '%s' line %d expression '%s'\n",
func, source_file, source_line, expr);
}
#endif
-static inline bool FileIsClosed(int fd) {
+bool FileIsClosed(int fd) {
#if defined(_WIN32)
// Disables default behavior on wrong params which causes the application to crash
// https://msdn.microsoft.com/en-us/library/ksazx244.aspx
@@ -91,7 +84,7 @@ static inline bool FileIsClosed(int fd) {
#endif
}
-static inline Status ZeroMemoryMap(MemoryMappedFile* file) {
+Status ZeroMemoryMap(MemoryMappedFile* file) {
constexpr int64_t kBufferSize = 512;
static constexpr uint8_t kZeroBytes[kBufferSize] = {0};
@@ -109,34 +102,26 @@ static inline Status ZeroMemoryMap(MemoryMappedFile* file) {
return Status::OK();
}
-class MemoryMapFixture {
- public:
- void TearDown() {
- for (auto path : tmp_files_) {
- ARROW_UNUSED(std::remove(path.c_str()));
- }
- }
-
- void CreateFile(const std::string& path, int64_t size) {
- std::shared_ptr<MemoryMappedFile> file;
- ASSERT_OK(MemoryMappedFile::Create(path, size, &file));
- tmp_files_.push_back(path);
+void MemoryMapFixture::TearDown() {
+ for (auto path : tmp_files_) {
+ ARROW_UNUSED(std::remove(path.c_str()));
}
+}
- Status InitMemoryMap(int64_t size, const std::string& path,
- std::shared_ptr<MemoryMappedFile>* mmap) {
- RETURN_NOT_OK(MemoryMappedFile::Create(path, size, mmap));
- tmp_files_.push_back(path);
- return Status::OK();
- }
+void MemoryMapFixture::CreateFile(const std::string& path, int64_t size) {
+ std::shared_ptr<MemoryMappedFile> file;
+ ASSERT_OK(MemoryMappedFile::Create(path, size, &file));
+ tmp_files_.push_back(path);
+}
- void AppendFile(const std::string& path) { tmp_files_.push_back(path); }
+Status MemoryMapFixture::InitMemoryMap(int64_t size, const std::string& path,
+ std::shared_ptr<MemoryMappedFile>* mmap) {
+ RETURN_NOT_OK(MemoryMappedFile::Create(path, size, mmap));
+ tmp_files_.push_back(path);
+ return Status::OK();
+}
- private:
- std::vector<std::string> tmp_files_;
-};
+void MemoryMapFixture::AppendFile(const std::string& path) { tmp_files_.push_back(path); }
} // namespace io
} // namespace arrow
-
-#endif // ARROW_IO_TEST_COMMON_H
diff --git a/cpp/src/arrow/io/test-common.h b/cpp/src/arrow/io/test-common.h
index 6ae827c..75e1347 100644
--- a/cpp/src/arrow/io/test-common.h
+++ b/cpp/src/arrow/io/test-common.h
@@ -18,119 +18,38 @@
#ifndef ARROW_IO_TEST_COMMON_H
#define ARROW_IO_TEST_COMMON_H
-#include <algorithm>
-#include <cstdint>
-#include <fstream> // IWYU pragma: keep
#include <memory>
#include <string>
#include <vector>
-#ifdef _WIN32
-#include <crtdbg.h>
-#include <io.h>
-#else
-#include <fcntl.h>
-#endif
-
-#include "arrow/buffer.h"
-#include "arrow/io/file.h"
-#include "arrow/io/memory.h"
-#include "arrow/memory_pool.h"
-#include "arrow/testing/gtest_util.h"
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
namespace arrow {
namespace io {
-static inline void AssertFileContents(const std::string& path,
- const std::string& contents) {
- std::shared_ptr<ReadableFile> rf;
- int64_t size;
-
- ASSERT_OK(ReadableFile::Open(path, &rf));
- ASSERT_OK(rf->GetSize(&size));
- ASSERT_EQ(size, contents.size());
-
- std::shared_ptr<Buffer> actual_data;
- ASSERT_OK(rf->Read(size, &actual_data));
- ASSERT_TRUE(actual_data->Equals(Buffer(contents)));
-}
-
-static inline bool FileExists(const std::string& path) {
- return std::ifstream(path.c_str()).good();
-}
-
-#if defined(_WIN32)
-static inline void InvalidParamHandler(const wchar_t* expr, const wchar_t* func,
- const wchar_t* source_file,
- unsigned int source_line, uintptr_t reserved) {
- wprintf(L"Invalid parameter in function '%s'. Source: '%s' line %d expression '%s'\n",
- func, source_file, source_line, expr);
-}
-#endif
-
-static inline bool FileIsClosed(int fd) {
-#if defined(_WIN32)
- // Disables default behavior on wrong params which causes the application to crash
- // https://msdn.microsoft.com/en-us/library/ksazx244.aspx
- _set_invalid_parameter_handler(InvalidParamHandler);
-
- // Disables possible assertion alert box on invalid input arguments
- _CrtSetReportMode(_CRT_ASSERT, 0);
-
- int new_fd = _dup(fd);
- if (new_fd == -1) {
- return errno == EBADF;
- }
- _close(new_fd);
- return false;
-#else
- if (-1 != fcntl(fd, F_GETFD)) {
- return false;
- }
- return errno == EBADF;
-#endif
-}
-
-static inline Status ZeroMemoryMap(MemoryMappedFile* file) {
- constexpr int64_t kBufferSize = 512;
- static constexpr uint8_t kZeroBytes[kBufferSize] = {0};
-
- RETURN_NOT_OK(file->Seek(0));
- int64_t position = 0;
- int64_t file_size;
- RETURN_NOT_OK(file->GetSize(&file_size));
-
- int64_t chunksize;
- while (position < file_size) {
- chunksize = std::min(kBufferSize, file_size - position);
- RETURN_NOT_OK(file->Write(kZeroBytes, chunksize));
- position += chunksize;
- }
- return Status::OK();
-}
-
-class MemoryMapFixture {
+class MemoryMappedFile;
+
+ARROW_EXPORT
+void AssertFileContents(const std::string& path, const std::string& contents);
+
+ARROW_EXPORT bool FileExists(const std::string& path);
+
+ARROW_EXPORT bool FileIsClosed(int fd);
+
+ARROW_EXPORT
+Status ZeroMemoryMap(MemoryMappedFile* file);
+
+class ARROW_EXPORT MemoryMapFixture {
public:
- void TearDown() {
- for (auto path : tmp_files_) {
- ARROW_UNUSED(std::remove(path.c_str()));
- }
- }
-
- void CreateFile(const std::string& path, int64_t size) {
- std::shared_ptr<MemoryMappedFile> file;
- ASSERT_OK(MemoryMappedFile::Create(path, size, &file));
- tmp_files_.push_back(path);
- }
+ void TearDown();
+
+ void CreateFile(const std::string& path, int64_t size);
Status InitMemoryMap(int64_t size, const std::string& path,
- std::shared_ptr<MemoryMappedFile>* mmap) {
- RETURN_NOT_OK(MemoryMappedFile::Create(path, size, mmap));
- tmp_files_.push_back(path);
- return Status::OK();
- }
+ std::shared_ptr<MemoryMappedFile>* mmap);
- void AppendFile(const std::string& path) { tmp_files_.push_back(path); }
+ void AppendFile(const std::string& path);
private:
std::vector<std::string> tmp_files_;
diff --git a/cpp/src/arrow/ipc/json-internal.h b/cpp/src/arrow/ipc/json-internal.h
index a68e0f6..aa2e06a 100644
--- a/cpp/src/arrow/ipc/json-internal.h
+++ b/cpp/src/arrow/ipc/json-internal.h
@@ -19,7 +19,6 @@
#define ARROW_IPC_JSON_INTERNAL_H
#include <memory>
-#include <sstream>
#include <string>
#include "arrow/json/rapidjson-defs.h"
diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc
index 36f2d16..2a98862 100644
--- a/cpp/src/arrow/ipc/json-test.cc
+++ b/cpp/src/arrow/ipc/json-test.cc
@@ -296,7 +296,7 @@ TEST(TestJsonFileReadWrite, BasicRoundTrip) {
for (int i = 0; i < nbatches; ++i) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(reader->ReadRecordBatch(i, &batch));
- ASSERT_RECORD_BATCHES_EQUAL(*batch, *batches[i]);
+ ASSERT_BATCHES_EQUAL(*batch, *batches[i]);
}
}
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 9837cbe..7a1e3b6 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -39,6 +39,7 @@
#include "arrow/tensor.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
#include "arrow/util/logging.h"
#include "arrow/visitor_inline.h"
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index edae88c..c21a547 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -41,10 +41,13 @@
#include "arrow/sparse_tensor.h"
#include "arrow/status.h"
#include "arrow/tensor.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
namespace arrow {
diff --git a/cpp/src/arrow/json/parser.h b/cpp/src/arrow/json/parser.h
index f9fb5fc..ec12eee 100644
--- a/cpp/src/arrow/json/parser.h
+++ b/cpp/src/arrow/json/parser.h
@@ -22,6 +22,7 @@
#include "arrow/json/options.h"
#include "arrow/status.h"
+#include "arrow/util/key_value_metadata.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 3e0366a..4164e74 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -23,7 +23,6 @@
#include <iostream> // IWYU pragma: keep
#include <limits>
#include <memory>
-#include <sstream> // IWYU pragma: keep
#include "arrow/status.h"
#include "arrow/util/logging.h" // IWYU pragma: keep
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index ca50bc0..9c2708f 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -18,7 +18,7 @@
#ifndef ARROW_PRETTY_PRINT_H
#define ARROW_PRETTY_PRINT_H
-#include <ostream>
+#include <iosfwd>
#include <string>
#include "arrow/util/visibility.h"
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index f0e4b92..2e39f32 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -24,7 +24,6 @@
#include <cmath>
#include <cstdint>
#include <memory>
-#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 6ca989b..1d4b3ab 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -19,7 +19,7 @@
#include <cstdlib>
#include <mutex>
-#include <sstream>
+#include <string>
#include "arrow/memory_pool.h"
#include "arrow/status.h"
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 27661a9..a759d39 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -19,8 +19,6 @@
#define ARROW_PYTHON_COMMON_H
#include <memory>
-#include <sstream>
-#include <string>
#include <utility>
#include "arrow/python/config.h"
diff --git a/cpp/src/arrow/python/deserialize.cc b/cpp/src/arrow/python/deserialize.cc
index e5091c4..f1690a8 100644
--- a/cpp/src/arrow/python/deserialize.cc
+++ b/cpp/src/arrow/python/deserialize.cc
@@ -21,7 +21,6 @@
#include <cstdint>
#include <memory>
-#include <sstream>
#include <string>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/python/inference.cc b/cpp/src/arrow/python/inference.cc
index c9db5f4..6cf8bed 100644
--- a/cpp/src/arrow/python/inference.cc
+++ b/cpp/src/arrow/python/inference.cc
@@ -23,7 +23,6 @@
#include <algorithm>
#include <limits>
#include <map>
-#include <sstream>
#include <string>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/python/inference.h b/cpp/src/arrow/python/inference.h
index f2e2305..8790250 100644
--- a/cpp/src/arrow/python/inference.h
+++ b/cpp/src/arrow/python/inference.h
@@ -24,8 +24,6 @@
#include "arrow/python/platform.h"
#include <memory>
-#include <ostream>
-#include <string>
#include "arrow/python/visibility.h"
#include "arrow/type.h"
diff --git a/cpp/src/arrow/python/numpy-internal.h b/cpp/src/arrow/python/numpy-internal.h
index e27ae5c..19bcde0 100644
--- a/cpp/src/arrow/python/numpy-internal.h
+++ b/cpp/src/arrow/python/numpy-internal.h
@@ -27,6 +27,7 @@
#include "arrow/python/platform.h"
#include <cstdint>
+#include <sstream>
#include <string>
namespace arrow {
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 02ce0b6..f7068b3 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -21,7 +21,6 @@
#include <cstdint>
#include <memory>
-#include <sstream>
#include <string>
#include <vector>
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index ca3f596..b353a1e 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -28,7 +28,6 @@
#include <cstring>
#include <limits>
#include <memory>
-#include <sstream>
#include <string>
#include <utility>
#include <vector>
diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/python/platform.h
index ca9b553..bc06df9 100644
--- a/cpp/src/arrow/python/platform.h
+++ b/cpp/src/arrow/python/platform.h
@@ -21,7 +21,6 @@
#ifndef ARROW_PYTHON_PLATFORM_H
#define ARROW_PYTHON_PLATFORM_H
-#include <iostream>
#include <Python.h> // IWYU pragma: export
#include <datetime.h>
diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h
index 04ca307..a6e9c87 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -19,7 +19,6 @@
#define PYARROW_UTIL_DATETIME_H
#include <algorithm>
-#include <sstream>
#include <datetime.h>
#include "arrow/python/platform.h"
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index baaf5cb..2bc8c22 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -20,7 +20,6 @@
#include <algorithm>
#include <cstdlib>
#include <memory>
-#include <sstream>
#include <string>
#include <utility>
@@ -40,6 +39,13 @@ Status RecordBatch::AddColumn(int i, const std::string& field_name,
return AddColumn(i, field, column, out);
}
+std::shared_ptr<Array> RecordBatch::GetColumnByName(const std::string& name) const {
+ auto i = schema_->GetFieldIndex(name);
+ return i == -1 ? NULLPTR : column(i);
+}
+
+int RecordBatch::num_columns() const { return schema_->num_fields(); }
+
/// \class SimpleRecordBatch
/// \brief A basic, non-lazy in-memory record batch
class SimpleRecordBatch : public RecordBatch {
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index abae413..f80d4ed 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -23,17 +23,12 @@
#include <string>
#include <vector>
-#include "arrow/type.h"
+#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
-class Array;
-struct ArrayData;
-class Status;
-class Table;
-
/// \class RecordBatch
/// \brief Collection of equal-length arrays matching a particular Schema
///
@@ -95,10 +90,7 @@ class ARROW_EXPORT RecordBatch {
/// \brief Retrieve an array from the record batch
/// \param[in] name field name
/// \return an Array or null if no field was found
- std::shared_ptr<Array> GetColumnByName(const std::string& name) const {
- auto i = schema_->GetFieldIndex(name);
- return i == -1 ? NULLPTR : column(i);
- }
+ std::shared_ptr<Array> GetColumnByName(const std::string& name) const;
/// \brief Retrieve an array's internaldata from the record batch
/// \param[in] i field index, does not boundscheck
@@ -141,7 +133,7 @@ class ARROW_EXPORT RecordBatch {
const std::string& column_name(int i) const;
/// \return the number of columns in the table
- int num_columns() const { return schema_->num_fields(); }
+ int num_columns() const;
/// \return the number of rows (the corresponding length of each column)
int64_t num_rows() const { return num_rows_; }
diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index 7d742f1..e97dc8c 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -12,8 +12,9 @@
#include "arrow/status.h"
-#include <assert.h>
-#include <sstream>
+#include <cassert>
+#include <cstdlib>
+#include <iostream>
namespace arrow {
@@ -114,4 +115,15 @@ std::string Status::ToString() const {
return result;
}
+void Status::Abort() const { Abort(std::string()); }
+
+void Status::Abort(const std::string& message) const {
+ std::cerr << "-- Arrow Fatal Error --\n";
+ if (!message.empty()) {
+ std::cerr << message << "\n";
+ }
+ std::cerr << ToString() << std::endl;
+ std::abort();
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 6c23864..790d9b7 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -335,6 +335,9 @@ class ARROW_EXPORT Status {
/// \brief Return the specific error message attached to this status.
std::string message() const { return ok() ? "" : state_->msg; }
+ [[noreturn]] void Abort() const;
+ [[noreturn]] void Abort(const std::string& message) const;
+
private:
struct State {
StatusCode code;
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 669ff20..b018b8b 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -21,7 +21,6 @@
#include <cstdlib>
#include <limits>
#include <memory>
-#include <sstream>
#include <utility>
#include "arrow/array.h"
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 8c1c58a..743a9bc 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -27,6 +27,7 @@
#include <vector>
#include "arrow/compare.h"
+#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 4811954..ee66b2e 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -29,7 +29,6 @@
#include <iostream>
#include <limits>
#include <memory>
-#include <random>
#include <sstream>
#include <string>
#include <vector>
@@ -46,8 +45,33 @@
namespace arrow {
+static void PrintColumn(const Column& col, std::stringstream* ss) {
+ const ChunkedArray& carr = *col.data();
+ for (int i = 0; i < carr.num_chunks(); ++i) {
+ auto c1 = carr.chunk(i);
+ *ss << "Chunk " << i << std::endl;
+ ARROW_EXPECT_OK(::arrow::PrettyPrint(*c1, 0, ss));
+ *ss << std::endl;
+ }
+}
+
+template <typename T>
+void AssertTsEqual(const T& expected, const T& actual) {
+ if (!expected.Equals(actual)) {
+ std::stringstream pp_expected;
+ std::stringstream pp_actual;
+ ARROW_EXPECT_OK(PrettyPrint(expected, 0, &pp_expected));
+ ARROW_EXPECT_OK(PrettyPrint(actual, 0, &pp_actual));
+ FAIL() << "Got: \n" << pp_actual.str() << "\nExpected: \n" << pp_expected.str();
+ }
+}
+
void AssertArraysEqual(const Array& expected, const Array& actual) {
- ASSERT_ARRAYS_EQUAL(expected, actual);
+ AssertTsEqual(expected, actual);
+}
+
+void AssertBatchesEqual(const RecordBatch& expected, const RecordBatch& actual) {
+ AssertTsEqual(expected, actual);
}
void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual) {
@@ -117,16 +141,6 @@ std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
return out;
}
-void PrintColumn(const Column& col, std::stringstream* ss) {
- const ChunkedArray& carr = *col.data();
- for (int i = 0; i < carr.num_chunks(); ++i) {
- auto c1 = carr.chunk(i);
- *ss << "Chunk " << i << std::endl;
- ARROW_EXPECT_OK(::arrow::PrettyPrint(*c1, 0, ss));
- *ss << std::endl;
- }
-}
-
void AssertTablesEqual(const Table& expected, const Table& actual,
bool same_chunk_layout) {
ASSERT_EQ(expected.num_columns(), actual.num_columns());
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 88ffc22..c44bb17 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -21,25 +21,17 @@
#include <cstdint>
#include <cstdlib>
#include <cstring>
-#include <iostream>
-#include <limits>
#include <memory>
-#include <random>
-#include <sstream>
#include <string>
#include <type_traits>
#include <vector>
#include <gtest/gtest.h>
-#include "arrow/array.h"
#include "arrow/buffer.h"
#include "arrow/builder.h"
-#include "arrow/memory_pool.h"
-#include "arrow/pretty_print.h"
-#include "arrow/record_batch.h"
#include "arrow/status.h"
-#include "arrow/type.h"
+#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/logging.h"
@@ -83,13 +75,12 @@
EXPECT_TRUE(_st.ok()); \
} while (false)
-#define ABORT_NOT_OK(expr) \
- do { \
- ::arrow::Status _st = (expr); \
- if (ARROW_PREDICT_FALSE(!_st.ok())) { \
- std::cerr << _st.ToString() << "\n"; \
- std::abort(); \
- } \
+#define ABORT_NOT_OK(s) \
+ do { \
+ ::arrow::Status _st = (s); \
+ if (ARROW_PREDICT_FALSE(!_st.ok())) { \
+ _st.Abort(); \
+ } \
} while (false);
namespace arrow {
@@ -101,8 +92,10 @@ typedef ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type
Int16Type, Int32Type, Int64Type, FloatType, DoubleType>
NumericArrowTypes;
+class Array;
class ChunkedArray;
class Column;
+class RecordBatch;
class Table;
namespace compute {
@@ -113,22 +106,12 @@ using Datum = compute::Datum;
using ArrayVector = std::vector<std::shared_ptr<Array>>;
-#define ASSERT_PP_EQUAL(LEFT, RIGHT) \
- do { \
- if (!(LEFT).Equals((RIGHT))) { \
- std::stringstream pp_result; \
- std::stringstream pp_expected; \
- \
- ARROW_EXPECT_OK(PrettyPrint(RIGHT, 0, &pp_result)); \
- ARROW_EXPECT_OK(PrettyPrint(LEFT, 0, &pp_expected)); \
- FAIL() << "Got: \n" << pp_result.str() << "\nExpected: \n" << pp_expected.str(); \
- } \
- } while (false)
-
-#define ASSERT_ARRAYS_EQUAL(lhs, rhs) ASSERT_PP_EQUAL(lhs, rhs)
-#define ASSERT_RECORD_BATCHES_EQUAL(lhs, rhs) ASSERT_PP_EQUAL(lhs, rhs)
+#define ASSERT_ARRAYS_EQUAL(lhs, rhs) AssertArraysEqual((lhs), (rhs))
+#define ASSERT_BATCHES_EQUAL(lhs, rhs) AssertBatchesEqual((lhs), (rhs))
ARROW_EXPORT void AssertArraysEqual(const Array& expected, const Array& actual);
+ARROW_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
+ const RecordBatch& actual);
ARROW_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
const ChunkedArray& actual);
ARROW_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
@@ -139,7 +122,6 @@ ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer, const std::string& exp
ARROW_EXPORT void AssertBufferEqual(const Buffer& buffer, const Buffer& expected);
ARROW_EXPORT void AssertSchemaEqual(const Schema& lhs, const Schema& rhs);
-ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
bool same_chunk_layout = true);
@@ -176,19 +158,6 @@ void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
#define DECL_TYPE() typedef typename TestFixture::Type Type;
-#define ASSERT_BATCHES_EQUAL(LEFT, RIGHT) \
- do { \
- if (!(LEFT).ApproxEquals(RIGHT)) { \
- std::stringstream ss; \
- ss << "Left:\n"; \
- ASSERT_OK(PrettyPrint(LEFT, 0, &ss)); \
- \
- ss << "\nRight:\n"; \
- ASSERT_OK(PrettyPrint(RIGHT, 0, &ss)); \
- FAIL() << ss.str(); \
- } \
- } while (false)
-
// ArrayFromJSON: construct an Array from a simple JSON representation
ARROW_EXPORT
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index f69b705..6b188fd 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -17,10 +17,13 @@
#pragma once
+#include <algorithm>
+#include <cassert>
#include <cstdint>
#include <limits>
#include <memory>
#include <random>
+#include <vector>
#include "arrow/type.h"
#include "arrow/util/visibility.h"
@@ -235,4 +238,35 @@ class ARROW_EXPORT RandomArrayGenerator {
};
} // namespace random
+
+//
+// Assorted functions
+//
+
+template <typename T, typename U>
+void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
+ const int random_seed = 0;
+ std::default_random_engine gen(random_seed);
+ std::uniform_int_distribution<T> d(lower, upper);
+ out->resize(N, static_cast<T>(0));
+ std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
+template <typename T, typename U>
+void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
+ std::vector<U>* out) {
+ std::default_random_engine gen(seed);
+ std::uniform_real_distribution<T> d(min_value, max_value);
+ out->resize(n, static_cast<T>(0));
+ std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
+template <typename T, typename U>
+void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
+ assert(out || (n == 0));
+ std::default_random_engine gen(seed);
+ std::uniform_int_distribution<T> d(min_value, max_value);
+ std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 429f6bb..f30f704 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -17,6 +17,9 @@
#include "arrow/testing/util.h"
+#include <chrono>
+#include <random>
+
#ifndef _WIN32
#include <sys/stat.h> // IWYU pragma: keep
#include <sys/wait.h> // IWYU pragma: keep
@@ -24,9 +27,15 @@
#endif
#include "arrow/table.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/logging.h"
namespace arrow {
+uint64_t random_seed() {
+ return std::chrono::high_resolution_clock::now().time_since_epoch().count();
+}
+
void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
const int random_seed = 0;
std::default_random_engine gen(random_seed);
diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h
index ad2350e..d12f57e 100644
--- a/cpp/src/arrow/testing/util.h
+++ b/cpp/src/arrow/testing/util.h
@@ -18,62 +18,36 @@
#pragma once
#include <algorithm>
-#include <chrono>
#include <cstdint>
#include <cstdlib>
#include <cstring>
-#include <iostream>
#include <limits>
#include <memory>
-#include <random>
-#include <sstream>
#include <string>
#include <type_traits>
#include <vector>
-#include "arrow/array.h"
#include "arrow/buffer.h"
-#include "arrow/builder.h"
-#include "arrow/memory_pool.h"
-#include "arrow/pretty_print.h"
#include "arrow/record_batch.h"
#include "arrow/status.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/logging.h"
+#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
+class Array;
class ChunkedArray;
class Column;
+class MemoryPool;
+class RecordBatch;
class Table;
using ArrayVector = std::vector<std::shared_ptr<Array>>;
-template <typename T, typename U>
-void randint(int64_t N, T lower, T upper, std::vector<U>* out) {
- const int random_seed = 0;
- std::default_random_engine gen(random_seed);
- std::uniform_int_distribution<T> d(lower, upper);
- out->resize(N, static_cast<T>(0));
- std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
-}
-
-template <typename T, typename U>
-void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
- std::vector<U>* out) {
- std::default_random_engine gen(seed);
- std::uniform_real_distribution<T> d(min_value, max_value);
- out->resize(n, static_cast<T>(0));
- std::generate(out->begin(), out->end(), [&d, &gen] { return static_cast<U>(d(gen)); });
-}
-
template <typename T>
-inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
- std::shared_ptr<Buffer>* result) {
+Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
+ std::shared_ptr<Buffer>* result) {
int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
std::shared_ptr<Buffer> buffer;
@@ -103,39 +77,7 @@ ARROW_EXPORT Status MakeRandomByteBuffer(int64_t length, MemoryPool* pool,
std::shared_ptr<ResizableBuffer>* out,
uint32_t seed = 0);
-static inline uint64_t random_seed() {
- return std::chrono::high_resolution_clock::now().time_since_epoch().count();
-}
-
-template <typename T, typename U>
-void rand_uniform_int(int64_t n, uint32_t seed, T min_value, T max_value, U* out) {
- DCHECK(out || (n == 0));
- std::default_random_engine gen(seed);
- std::uniform_int_distribution<T> d(min_value, max_value);
- std::generate(out, out + n, [&d, &gen] { return static_cast<U>(d(gen)); });
-}
-
-template <typename T, typename Enable = void>
-struct GenerateRandom {};
-
-template <typename T>
-struct GenerateRandom<T, typename std::enable_if<std::is_integral<T>::value>::type> {
- static void Gen(int64_t length, uint32_t seed, void* out) {
- rand_uniform_int(length, seed, std::numeric_limits<T>::min(),
- std::numeric_limits<T>::max(), reinterpret_cast<T*>(out));
- }
-};
-
-template <typename T>
-Status MakeRandomBuffer(int64_t length, MemoryPool* pool,
- std::shared_ptr<ResizableBuffer>* out, uint32_t seed = 0) {
- DCHECK(pool);
- std::shared_ptr<ResizableBuffer> result;
- RETURN_NOT_OK(AllocateResizableBuffer(pool, sizeof(T) * length, &result));
- GenerateRandom<T>::Gen(length, seed, result->mutable_data());
- *out = result;
- return Status::OK();
-}
+ARROW_EXPORT uint64_t random_seed();
template <class T, class Builder>
Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 3c4adc9..94be608 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -30,6 +30,7 @@
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
namespace arrow {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 67f07ea..93c6f39 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -19,8 +19,10 @@
#include <climits>
#include <cstddef>
+#include <ostream>
#include <sstream> // IWYU pragma: keep
#include <string>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -118,6 +120,11 @@ bool DataType::Equals(const std::shared_ptr<DataType>& other) const {
return Equals(*other.get());
}
+std::ostream& operator<<(std::ostream& os, const DataType& type) {
+ os << type.ToString();
+ return os;
+}
+
std::string BooleanType::ToString() const { return name(); }
FloatingPoint::Precision HalfFloatType::precision() const { return FloatingPoint::HALF; }
@@ -191,6 +198,24 @@ std::string Time64Type::ToString() const {
return ss.str();
}
+std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
+ switch (unit) {
+ case TimeUnit::SECOND:
+ os << "s";
+ break;
+ case TimeUnit::MILLI:
+ os << "ms";
+ break;
+ case TimeUnit::MICRO:
+ os << "us";
+ break;
+ case TimeUnit::NANO:
+ os << "ns";
+ break;
+ }
+ return os;
+}
+
// ----------------------------------------------------------------------
// Timestamp types
@@ -271,11 +296,21 @@ int LookupNameIndex(const std::unordered_multimap<std::string, int>& name_to_ind
} // namespace
+class StructType::Impl {
+ public:
+ explicit Impl(const std::vector<std::shared_ptr<Field>>& fields)
+ : name_to_index_(CreateNameToIndexMap(fields)) {}
+
+ const std::unordered_multimap<std::string, int> name_to_index_;
+};
+
StructType::StructType(const std::vector<std::shared_ptr<Field>>& fields)
- : NestedType(Type::STRUCT), name_to_index_(CreateNameToIndexMap(fields)) {
+ : NestedType(Type::STRUCT), impl_(new Impl(fields)) {
children_ = fields;
}
+StructType::~StructType() {}
+
std::string StructType::ToString() const {
std::stringstream s;
s << "struct<";
@@ -296,12 +331,12 @@ std::shared_ptr<Field> StructType::GetFieldByName(const std::string& name) const
}
int StructType::GetFieldIndex(const std::string& name) const {
- return LookupNameIndex(name_to_index_, name);
+ return LookupNameIndex(impl_->name_to_index_, name);
}
std::vector<int> StructType::GetAllFieldIndices(const std::string& name) const {
std::vector<int> result;
- auto p = name_to_index_.equal_range(name);
+ auto p = impl_->name_to_index_.equal_range(name);
for (auto it = p.first; it != p.second; ++it) {
result.push_back(it->second);
}
@@ -311,7 +346,7 @@ std::vector<int> StructType::GetAllFieldIndices(const std::string& name) const {
std::vector<std::shared_ptr<Field>> StructType::GetAllFieldsByName(
const std::string& name) const {
std::vector<std::shared_ptr<Field>> result;
- auto p = name_to_index_.equal_range(name);
+ auto p = impl_->name_to_index_.equal_range(name);
for (auto it = p.first; it != p.second; ++it) {
result.push_back(children_[it->second]);
}
@@ -371,17 +406,44 @@ std::string NullType::ToString() const { return name(); }
// ----------------------------------------------------------------------
// Schema implementation
+class Schema::Impl {
+ public:
+ Impl(const std::vector<std::shared_ptr<Field>>& fields,
+ const std::shared_ptr<const KeyValueMetadata>& metadata)
+ : fields_(fields),
+ name_to_index_(CreateNameToIndexMap(fields_)),
+ metadata_(metadata) {}
+
+ Impl(std::vector<std::shared_ptr<Field>>&& fields,
+ const std::shared_ptr<const KeyValueMetadata>& metadata)
+ : fields_(std::move(fields)),
+ name_to_index_(CreateNameToIndexMap(fields_)),
+ metadata_(metadata) {}
+
+ std::vector<std::shared_ptr<Field>> fields_;
+ std::unordered_multimap<std::string, int> name_to_index_;
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields,
const std::shared_ptr<const KeyValueMetadata>& metadata)
- : fields_(fields),
- name_to_index_(CreateNameToIndexMap(fields_)),
- metadata_(metadata) {}
+ : impl_(new Impl(fields, metadata)) {}
Schema::Schema(std::vector<std::shared_ptr<Field>>&& fields,
const std::shared_ptr<const KeyValueMetadata>& metadata)
- : fields_(std::move(fields)),
- name_to_index_(CreateNameToIndexMap(fields_)),
- metadata_(metadata) {}
+ : impl_(new Impl(std::move(fields), metadata)) {}
+
+Schema::Schema(const Schema& schema) : impl_(new Impl(*schema.impl_)) {}
+
+Schema::~Schema() {}
+
+int Schema::num_fields() const { return static_cast<int>(impl_->fields_.size()); }
+
+std::shared_ptr<Field> Schema::field(int i) const { return impl_->fields_[i]; }
+
+const std::vector<std::shared_ptr<Field>>& Schema::fields() const {
+ return impl_->fields_;
+}
bool Schema::Equals(const Schema& other, bool check_metadata) const {
if (this == &other) {
@@ -402,7 +464,7 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
if (!check_metadata) {
return true;
} else if (this->HasMetadata() && other.HasMetadata()) {
- return metadata_->Equals(*other.metadata_);
+ return impl_->metadata_->Equals(*other.impl_->metadata_);
} else if (!this->HasMetadata() && !other.HasMetadata()) {
return true;
} else {
@@ -412,16 +474,16 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) const {
int i = GetFieldIndex(name);
- return i == -1 ? nullptr : fields_[i];
+ return i == -1 ? nullptr : impl_->fields_[i];
}
int Schema::GetFieldIndex(const std::string& name) const {
- return LookupNameIndex(name_to_index_, name);
+ return LookupNameIndex(impl_->name_to_index_, name);
}
std::vector<int> Schema::GetAllFieldIndices(const std::string& name) const {
std::vector<int> result;
- auto p = name_to_index_.equal_range(name);
+ auto p = impl_->name_to_index_.equal_range(name);
for (auto it = p.first; it != p.second; ++it) {
result.push_back(it->second);
}
@@ -431,9 +493,9 @@ std::vector<int> Schema::GetAllFieldIndices(const std::string& name) const {
std::vector<std::shared_ptr<Field>> Schema::GetAllFieldsByName(
const std::string& name) const {
std::vector<std::shared_ptr<Field>> result;
- auto p = name_to_index_.equal_range(name);
+ auto p = impl_->name_to_index_.equal_range(name);
for (auto it = p.first; it != p.second; ++it) {
- result.push_back(fields_[it->second]);
+ result.push_back(impl_->fields_[it->second]);
}
return result;
}
@@ -444,8 +506,8 @@ Status Schema::AddField(int i, const std::shared_ptr<Field>& field,
return Status::Invalid("Invalid column index to add field.");
}
- *out =
- std::make_shared<Schema>(internal::AddVectorElement(fields_, i, field), metadata_);
+ *out = std::make_shared<Schema>(internal::AddVectorElement(impl_->fields_, i, field),
+ impl_->metadata_);
return Status::OK();
}
@@ -455,24 +517,26 @@ Status Schema::SetField(int i, const std::shared_ptr<Field>& field,
return Status::Invalid("Invalid column index to add field.");
}
- *out = std::make_shared<Schema>(internal::ReplaceVectorElement(fields_, i, field),
- metadata_);
+ *out = std::make_shared<Schema>(
+ internal::ReplaceVectorElement(impl_->fields_, i, field), impl_->metadata_);
return Status::OK();
}
bool Schema::HasMetadata() const {
- return (metadata_ != nullptr) && (metadata_->size() > 0);
+ return (impl_->metadata_ != nullptr) && (impl_->metadata_->size() > 0);
}
std::shared_ptr<Schema> Schema::AddMetadata(
const std::shared_ptr<const KeyValueMetadata>& metadata) const {
- return std::make_shared<Schema>(fields_, metadata);
+ return std::make_shared<Schema>(impl_->fields_, metadata);
}
-std::shared_ptr<const KeyValueMetadata> Schema::metadata() const { return metadata_; }
+std::shared_ptr<const KeyValueMetadata> Schema::metadata() const {
+ return impl_->metadata_;
+}
std::shared_ptr<Schema> Schema::RemoveMetadata() const {
- return std::make_shared<Schema>(fields_);
+ return std::make_shared<Schema>(impl_->fields_);
}
Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
@@ -480,7 +544,8 @@ Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
return Status::Invalid("Invalid column index to remove field.");
}
- *out = std::make_shared<Schema>(internal::DeleteVectorElement(fields_, i), metadata_);
+ *out = std::make_shared<Schema>(internal::DeleteVectorElement(impl_->fields_, i),
+ impl_->metadata_);
return Status::OK();
}
@@ -488,7 +553,7 @@ std::string Schema::ToString() const {
std::stringstream buffer;
int i = 0;
- for (auto field : fields_) {
+ for (const auto& field : impl_->fields_) {
if (i > 0) {
buffer << std::endl;
}
@@ -496,8 +561,8 @@ std::string Schema::ToString() const {
++i;
}
- if (metadata_) {
- buffer << metadata_->ToString();
+ if (impl_->metadata_) {
+ buffer << impl_->metadata_->ToString();
}
return buffer.str();
@@ -505,7 +570,7 @@ std::string Schema::ToString() const {
std::vector<std::string> Schema::field_names() const {
std::vector<std::string> names;
- for (auto& field : fields_) {
+ for (const auto& field : impl_->fields_) {
names.push_back(field->name());
}
return names;
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 75ee674..eeba7d4 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -20,17 +20,14 @@
#include <climits>
#include <cstdint>
+#include <iosfwd>
#include <memory>
-#include <ostream>
#include <string>
-#include <type_traits>
-#include <unordered_map>
#include <vector>
#include "arrow/status.h"
#include "arrow/type_fwd.h" // IWYU pragma: export
#include "arrow/util/checked_cast.h"
-#include "arrow/util/key_value_metadata.h" // IWYU pragma: export
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor.h" // IWYU pragma: keep
@@ -201,10 +198,7 @@ class ARROW_EXPORT DataType {
ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
};
-inline std::ostream& operator<<(std::ostream& os, const DataType& type) {
- os << type.ToString();
- return os;
-}
+std::ostream& operator<<(std::ostream& os, const DataType& type);
/// \brief Base class for all fixed-width data types
class ARROW_EXPORT FixedWidthType : public DataType {
@@ -548,6 +542,8 @@ class ARROW_EXPORT StructType : public NestedType {
explicit StructType(const std::vector<std::shared_ptr<Field>>& fields);
+ ~StructType() override;
+
std::string ToString() const override;
std::string name() const override { return "struct"; }
@@ -571,7 +567,8 @@ class ARROW_EXPORT StructType : public NestedType {
int GetChildIndex(const std::string& name) const;
private:
- std::unordered_multimap<std::string, int> name_to_index_;
+ class Impl;
+ std::unique_ptr<Impl> impl_;
};
/// \brief Base type class for (fixed-size) decimal data
@@ -691,23 +688,7 @@ struct TimeUnit {
enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
};
-static inline std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
- switch (unit) {
- case TimeUnit::SECOND:
- os << "s";
- break;
- case TimeUnit::MILLI:
- os << "ms";
- break;
- case TimeUnit::MICRO:
- os << "us";
- break;
- case TimeUnit::NANO:
- os << "ns";
- break;
- }
- return os;
-}
+std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);
/// Base type class for time data
class ARROW_EXPORT TimeType : public TemporalType, public ParametricType {
@@ -916,13 +897,22 @@ class ARROW_EXPORT Schema {
explicit Schema(std::vector<std::shared_ptr<Field>>&& fields,
const std::shared_ptr<const KeyValueMetadata>& metadata = NULLPTR);
- virtual ~Schema() = default;
+ Schema(const Schema&);
+
+ virtual ~Schema();
/// Returns true if all of the schema fields are equal
bool Equals(const Schema& other, bool check_metadata = true) const;
+ /// \brief Return the number of fields (columns) in the schema
+ int num_fields() const;
+
/// Return the ith schema element. Does not boundscheck
- std::shared_ptr<Field> field(int i) const { return fields_[i]; }
+ std::shared_ptr<Field> field(int i) const;
+
+ const std::vector<std::shared_ptr<Field>>& fields() const;
+
+ std::vector<std::string> field_names() const;
/// Returns null if name not found
std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
@@ -936,10 +926,6 @@ class ARROW_EXPORT Schema {
/// Return the indices of all fields having this name
std::vector<int> GetAllFieldIndices(const std::string& name) const;
- const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
-
- std::vector<std::string> field_names() const;
-
/// \brief The custom key-value metadata, if any
///
/// \return metadata may be null
@@ -967,15 +953,9 @@ class ARROW_EXPORT Schema {
/// \brief Indicates that Schema has non-empty KevValueMetadata
bool HasMetadata() const;
- /// \brief Return the number of fields (columns) in the schema
- int num_fields() const { return static_cast<int>(fields_.size()); }
-
private:
- std::vector<std::shared_ptr<Field>> fields_;
-
- std::unordered_multimap<std::string, int> name_to_index_;
-
- std::shared_ptr<const KeyValueMetadata> metadata_;
+ class Impl;
+ std::unique_ptr<Impl> impl_;
};
// ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 3211d2a..040ccf2 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -27,6 +27,7 @@ namespace arrow {
class Status;
class DataType;
+class KeyValueMetadata;
class Array;
struct ArrayData;
class ArrayBuilder;
diff --git a/cpp/src/arrow/util/compression_brotli.cc b/cpp/src/arrow/util/compression_brotli.cc
index 8a9beb6..051b8c0 100644
--- a/cpp/src/arrow/util/compression_brotli.cc
+++ b/cpp/src/arrow/util/compression_brotli.cc
@@ -19,7 +19,6 @@
#include <cstddef>
#include <cstdint>
-#include <sstream>
#include <brotli/decode.h>
#include <brotli/encode.h>
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index 238628b..1efd4c6 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -19,7 +19,6 @@
#include <cstdint>
#include <cstring>
-#include <sstream>
#include <lz4.h>
#include <lz4frame.h>
diff --git a/cpp/src/arrow/util/compression_snappy.cc b/cpp/src/arrow/util/compression_snappy.cc
index 2113f98..963de69 100644
--- a/cpp/src/arrow/util/compression_snappy.cc
+++ b/cpp/src/arrow/util/compression_snappy.cc
@@ -19,7 +19,6 @@
#include <cstddef>
#include <cstdint>
-#include <sstream>
#include <snappy.h>
diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc
index 202ef06..5afd5e3 100644
--- a/cpp/src/arrow/util/compression_zlib.cc
+++ b/cpp/src/arrow/util/compression_zlib.cc
@@ -22,7 +22,6 @@
#include <cstring>
#include <limits>
#include <memory>
-#include <sstream>
#include <string>
#include <zconf.h>
diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc
index 4972f43..24a7329 100644
--- a/cpp/src/arrow/util/compression_zstd.cc
+++ b/cpp/src/arrow/util/compression_zstd.cc
@@ -19,7 +19,6 @@
#include <cstddef>
#include <cstdint>
-#include <sstream>
#include <zstd.h>
diff --git a/cpp/src/arrow/util/concatenate.cc b/cpp/src/arrow/util/concatenate.cc
index 9a77501..f0bbaff 100644
--- a/cpp/src/arrow/util/concatenate.cc
+++ b/cpp/src/arrow/util/concatenate.cc
@@ -25,6 +25,7 @@
#include "arrow/array.h"
#include "arrow/memory_pool.h"
+#include "arrow/status.h"
#include "arrow/util/logging.h"
#include "arrow/util/visibility.h"
#include "arrow/visitor_inline.h"
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 4802862..a405b46 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -23,6 +23,7 @@
#include <cstring>
#include <iomanip>
#include <limits>
+#include <ostream>
#include <sstream>
#include <string>
@@ -434,4 +435,9 @@ Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
return status;
}
+std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
+ os << decimal.ToIntegerString();
+ return os;
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 3cb86d1..3a576d0 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -17,10 +17,9 @@
#pragma once
-#include <array>
#include <cstdint>
+#include <iosfwd>
#include <limits>
-#include <sstream>
#include <string>
#include <type_traits>
@@ -123,10 +122,8 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
return Status::OK();
}
- friend std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
- os << decimal.ToIntegerString();
- return os;
- }
+ friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
+ const Decimal128& decimal);
private:
/// Converts internal error code to Status
diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc
index 5f7e292..6eb80a9 100644
--- a/cpp/src/arrow/util/io-util.cc
+++ b/cpp/src/arrow/util/io-util.cc
@@ -28,8 +28,9 @@
#include <cerrno>
#include <cstdint>
#include <cstring>
-#include <sstream>
+#include <iostream>
#include <string>
+#include <utility>
#include <fcntl.h>
#include <stdlib.h>
@@ -47,9 +48,19 @@
// For filename conversion
#if defined(_MSC_VER)
-#include <boost/system/system_error.hpp> // NOLINT
#include <codecvt>
#include <locale>
+#include <stdexcept>
+#endif
+
+#if defined(_MSC_VER)
+#define USE_BOOST_FILESYSTEM 1
+#else
+#define USE_BOOST_FILESYSTEM 0
+#endif
+
+#if USE_BOOST_FILESYSTEM
+#include <boost/filesystem.hpp> // NOLINT
#endif
// ----------------------------------------------------------------------
@@ -92,12 +103,162 @@
#endif
-#include "arrow/status.h"
+#include "arrow/buffer.h"
#include "arrow/util/io-util.h"
namespace arrow {
+namespace io {
+
+//
+// StdoutStream implementation
+//
+
+StdoutStream::StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StdoutStream::Close() { return Status::OK(); }
+
+bool StdoutStream::closed() const { return false; }
+
+Status StdoutStream::Tell(int64_t* position) const {
+ *position = pos_;
+ return Status::OK();
+}
+
+Status StdoutStream::Write(const void* data, int64_t nbytes) {
+ pos_ += nbytes;
+ std::cout.write(reinterpret_cast<const char*>(data), nbytes);
+ return Status::OK();
+}
+
+//
+// StderrStream implementation
+//
+
+StderrStream::StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+
+Status StderrStream::Close() { return Status::OK(); }
+
+bool StderrStream::closed() const { return false; }
+
+Status StderrStream::Tell(int64_t* position) const {
+ *position = pos_;
+ return Status::OK();
+}
+
+Status StderrStream::Write(const void* data, int64_t nbytes) {
+ pos_ += nbytes;
+ std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
+ return Status::OK();
+}
+
+//
+// StdinStream implementation
+//
+
+StdinStream::StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+
+Status StdinStream::Close() { return Status::OK(); }
+
+bool StdinStream::closed() const { return false; }
+
+Status StdinStream::Tell(int64_t* position) const {
+ *position = pos_;
+ return Status::OK();
+}
+
+Status StdinStream::Read(int64_t nbytes, int64_t* bytes_read, void* out) {
+ std::cin.read(reinterpret_cast<char*>(out), nbytes);
+ if (std::cin) {
+ *bytes_read = nbytes;
+ pos_ += nbytes;
+ } else {
+ *bytes_read = 0;
+ }
+ return Status::OK();
+}
+
+Status StdinStream::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
+ std::shared_ptr<ResizableBuffer> buffer;
+ ARROW_RETURN_NOT_OK(AllocateResizableBuffer(nbytes, &buffer));
+ int64_t bytes_read;
+ ARROW_RETURN_NOT_OK(Read(nbytes, &bytes_read, buffer->mutable_data()));
+ ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+ buffer->ZeroPadding();
+ *out = buffer;
+ return Status::OK();
+}
+
+} // namespace io
+
namespace internal {
+//
+// PlatformFilename implementation
+//
+
+struct PlatformFilename::Impl {
+#if USE_BOOST_FILESYSTEM
+ ::boost::filesystem::path path;
+#else
+ std::string path; // 8-bit Unix path
+#endif
+};
+
+PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
+
+PlatformFilename::~PlatformFilename() {}
+
+PlatformFilename::PlatformFilename(const PlatformFilename& other)
+ : impl_(new Impl{other.impl_->path}) {}
+
+PlatformFilename::PlatformFilename(PlatformFilename&& other)
+ : impl_(std::move(other.impl_)) {}
+
+PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
+ this->impl_.reset(new Impl{other.impl_->path});
+ return *this;
+}
+
+PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
+ this->impl_ = std::move(other.impl_);
+ return *this;
+}
+
+#if defined(_MSC_VER)
+PlatformFilename::PlatformFilename(const std::wstring& path) : impl_(new Impl{path}) {}
+#else
+PlatformFilename::PlatformFilename(const std::string& path) : impl_(new Impl{path}) {}
+#endif
+
+#if defined(_MSC_VER)
+const std::wstring& PlatformFilename::ToNative() const { return impl_->path.native(); }
+#elif USE_BOOST_FILESYSTEM
+const std::string& PlatformFilename::ToNative() const { return impl_->path.native(); }
+#else
+const std::string& PlatformFilename::ToNative() const { return impl_->path; }
+#endif
+
+#if USE_BOOST_FILESYSTEM
+std::string PlatformFilename::ToString() const { return impl_->path.string(); }
+#else
+std::string PlatformFilename::ToString() const { return impl_->path; }
+#endif
+
+Status PlatformFilename::FromString(const std::string& file_name, PlatformFilename* out) {
+#if defined(_MSC_VER)
+ try {
+ auto wpath =
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>{}.from_bytes(file_name);
+ *out = PlatformFilename(std::move(wpath));
+ } catch (std::range_error& e) {
+ return Status::Invalid(e.what());
+ }
+#else
+ *out = PlatformFilename(file_name);
+#endif
+ return Status::OK();
+}
+
#define CHECK_LSEEK(retval) \
if ((retval) == -1) return Status::IOError("lseek failed");
@@ -113,7 +274,7 @@ static inline Status CheckFileOpResult(int ret, int errno_actual,
const PlatformFilename& file_name,
const char* opname) {
if (ret == -1) {
- return Status::IOError("Failed to ", opname, " file: ", file_name.string(),
+ return Status::IOError("Failed to ", opname, " file: ", file_name.ToString(),
" , error: ", std::strerror(errno_actual));
}
return Status::OK();
@@ -124,17 +285,7 @@ static inline Status CheckFileOpResult(int ret, int errno_actual,
//
Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
-#if defined(_MSC_VER)
- try {
- std::codecvt_utf8_utf16<wchar_t> utf16_converter;
- out->assign(file_name, utf16_converter);
- } catch (boost::system::system_error& e) {
- return Status::Invalid(e.what());
- }
-#else
- *out = internal::PlatformFilename(file_name);
-#endif
- return Status::OK();
+ return PlatformFilename::FromString(file_name, out);
}
//
@@ -144,11 +295,11 @@ Status FileNameFromString(const std::string& file_name, PlatformFilename* out) {
Status FileOpenReadable(const PlatformFilename& file_name, int* fd) {
int ret, errno_actual;
#if defined(_MSC_VER)
- errno_actual = _wsopen_s(fd, file_name.wstring().c_str(),
+ errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(),
_O_RDONLY | _O_BINARY | _O_NOINHERIT, _SH_DENYNO, _S_IREAD);
ret = *fd;
#else
- ret = *fd = open(file_name.c_str(), O_RDONLY | O_BINARY);
+ ret = *fd = open(file_name.ToNative().c_str(), O_RDONLY | O_BINARY);
errno_actual = errno;
#endif
@@ -161,10 +312,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
#if defined(_MSC_VER)
int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
- int pmode = _S_IWRITE;
- if (!write_only) {
- pmode |= _S_IREAD;
- }
+ int pmode = _S_IREAD | _S_IWRITE;
if (truncate) {
oflag |= _O_TRUNC;
@@ -179,7 +327,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
oflag |= _O_RDWR;
}
- errno_actual = _wsopen_s(fd, file_name.wstring().c_str(), oflag, _SH_DENYNO, pmode);
+ errno_actual = _wsopen_s(fd, file_name.ToNative().c_str(), oflag, _SH_DENYNO, pmode);
ret = *fd;
#else
@@ -198,7 +346,7 @@ Status FileOpenWritable(const PlatformFilename& file_name, bool write_only, bool
oflag |= O_RDWR;
}
- ret = *fd = open(file_name.c_str(), oflag, ARROW_WRITE_SHMODE);
+ ret = *fd = open(file_name.ToNative().c_str(), oflag, ARROW_WRITE_SHMODE);
errno_actual = errno;
#endif
return CheckFileOpResult(ret, errno_actual, file_name, "open local");
diff --git a/cpp/src/arrow/util/io-util.h b/cpp/src/arrow/util/io-util.h
index 7f8320a..1ecb790 100644
--- a/cpp/src/arrow/util/io-util.h
+++ b/cpp/src/arrow/util/io-util.h
@@ -18,40 +18,30 @@
#ifndef ARROW_UTIL_IO_UTIL_H
#define ARROW_UTIL_IO_UTIL_H
-#include <iostream>
#include <memory>
#include <string>
-#include "arrow/buffer.h"
#include "arrow/io/interfaces.h"
#include "arrow/status.h"
-#if defined(_MSC_VER)
-#include <boost/filesystem.hpp> // NOLINT
-#endif
-
namespace arrow {
+
+class Buffer;
+
namespace io {
// Output stream that just writes to stdout.
class ARROW_EXPORT StdoutStream : public OutputStream {
public:
- StdoutStream() : pos_(0) { set_mode(FileMode::WRITE); }
+ StdoutStream();
~StdoutStream() override {}
- Status Close() override { return Status::OK(); }
- bool closed() const override { return false; }
+ Status Close() override;
+ bool closed() const override;
- Status Tell(int64_t* position) const override {
- *position = pos_;
- return Status::OK();
- }
+ Status Tell(int64_t* position) const override;
- Status Write(const void* data, int64_t nbytes) override {
- pos_ += nbytes;
- std::cout.write(reinterpret_cast<const char*>(data), nbytes);
- return Status::OK();
- }
+ Status Write(const void* data, int64_t nbytes) override;
private:
int64_t pos_;
@@ -60,22 +50,15 @@ class ARROW_EXPORT StdoutStream : public OutputStream {
// Output stream that just writes to stderr.
class ARROW_EXPORT StderrStream : public OutputStream {
public:
- StderrStream() : pos_(0) { set_mode(FileMode::WRITE); }
+ StderrStream();
~StderrStream() override {}
- Status Close() override { return Status::OK(); }
- bool closed() const override { return false; }
+ Status Close() override;
+ bool closed() const override;
- Status Tell(int64_t* position) const override {
- *position = pos_;
- return Status::OK();
- }
+ Status Tell(int64_t* position) const override;
- Status Write(const void* data, int64_t nbytes) override {
- pos_ += nbytes;
- std::cerr.write(reinterpret_cast<const char*>(data), nbytes);
- return Status::OK();
- }
+ Status Write(const void* data, int64_t nbytes) override;
private:
int64_t pos_;
@@ -84,38 +67,17 @@ class ARROW_EXPORT StderrStream : public OutputStream {
// Input stream that just reads from stdin.
class ARROW_EXPORT StdinStream : public InputStream {
public:
- StdinStream() : pos_(0) { set_mode(FileMode::READ); }
+ StdinStream();
~StdinStream() override {}
- Status Close() override { return Status::OK(); }
- bool closed() const override { return false; }
-
- Status Tell(int64_t* position) const override {
- *position = pos_;
- return Status::OK();
- }
-
- Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override {
- std::cin.read(reinterpret_cast<char*>(out), nbytes);
- if (std::cin) {
- *bytes_read = nbytes;
- pos_ += nbytes;
- } else {
- *bytes_read = 0;
- }
- return Status::OK();
- }
-
- Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override {
- std::shared_ptr<ResizableBuffer> buffer;
- ARROW_RETURN_NOT_OK(AllocateResizableBuffer(nbytes, &buffer));
- int64_t bytes_read;
- ARROW_RETURN_NOT_OK(Read(nbytes, &bytes_read, buffer->mutable_data()));
- ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
- buffer->ZeroPadding();
- *out = buffer;
- return Status::OK();
- }
+ Status Close() override;
+ bool closed() const override;
+
+ Status Tell(int64_t* position) const override;
+
+ Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override;
+
+ Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
private:
int64_t pos_;
@@ -125,26 +87,34 @@ class ARROW_EXPORT StdinStream : public InputStream {
namespace internal {
-#if defined(_MSC_VER)
-// namespace fs = boost::filesystem;
-// #define PlatformFilename fs::path
-typedef ::boost::filesystem::path PlatformFilename;
+class ARROW_EXPORT PlatformFilename {
+ public:
+ ~PlatformFilename();
+ PlatformFilename();
+ PlatformFilename(const PlatformFilename&);
+ PlatformFilename(PlatformFilename&&);
+ PlatformFilename& operator=(const PlatformFilename&);
+ PlatformFilename& operator=(PlatformFilename&&);
+#if defined(_MSC_VER)
+ const std::wstring& ToNative() const;
#else
+ const std::string& ToNative() const;
+#endif
+ std::string ToString() const;
-struct PlatformFilename {
- PlatformFilename() {}
- explicit PlatformFilename(const std::string& path) { utf8_path = path; }
-
- const char* c_str() const { return utf8_path.c_str(); }
-
- const std::string& string() const { return utf8_path; }
+ static Status FromString(const std::string& file_name, PlatformFilename* out);
- size_t length() const { return utf8_path.size(); }
+ private:
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
- std::string utf8_path;
-};
+#if defined(_MSC_VER)
+ explicit PlatformFilename(const std::wstring& path);
+#else
+ explicit PlatformFilename(const std::string& path);
#endif
+};
ARROW_EXPORT
Status FileNameFromString(const std::string& file_name, PlatformFilename* out);
diff --git a/cpp/src/arrow/util/lazy-benchmark.cc b/cpp/src/arrow/util/lazy-benchmark.cc
index 19e6675..02c7de5 100644
--- a/cpp/src/arrow/util/lazy-benchmark.cc
+++ b/cpp/src/arrow/util/lazy-benchmark.cc
@@ -21,6 +21,7 @@
#include <benchmark/benchmark.h>
+#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/util/lazy.h"
diff --git a/cpp/src/arrow/util/lazy-test.cc b/cpp/src/arrow/util/lazy-test.cc
index 07d591e..aec99d4 100644
--- a/cpp/src/arrow/util/lazy-test.cc
+++ b/cpp/src/arrow/util/lazy-test.cc
@@ -22,6 +22,7 @@
#include <gtest/gtest.h>
+#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/util/lazy.h"
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 7b4ec65..999aca6 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -36,8 +36,8 @@
#else // !GANDIVA_IR
-#include <iostream>
#include <memory>
+#include <ostream>
#include <string>
#include "arrow/util/macros.h"
@@ -153,19 +153,18 @@ class ARROW_EXPORT ArrowLogBase {
}
protected:
- virtual std::ostream& Stream() { return std::cerr; }
+ virtual std::ostream& Stream() = 0;
};
class ARROW_EXPORT ArrowLog : public ArrowLogBase {
public:
ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity);
-
- virtual ~ArrowLog();
+ ~ArrowLog() override;
/// Return whether or not current logging instance is enabled.
///
/// \return True if logging is enabled and false otherwise.
- virtual bool IsEnabled() const;
+ bool IsEnabled() const override;
/// The init function of arrow log for a program which should be called only once.
///
@@ -204,7 +203,7 @@ class ARROW_EXPORT ArrowLog : public ArrowLogBase {
static ArrowLogLevel severity_threshold_;
protected:
- virtual std::ostream& Stream();
+ std::ostream& Stream() override;
};
// This class make ARROW_CHECK compilation pass to change the << operator to void.
diff --git a/cpp/src/arrow/util/parsing.h b/cpp/src/arrow/util/parsing.h
index 3d93ed2..20b749a 100644
--- a/cpp/src/arrow/util/parsing.h
+++ b/cpp/src/arrow/util/parsing.h
@@ -23,9 +23,7 @@
#include <cassert>
#include <chrono>
#include <limits>
-#include <locale>
#include <memory>
-#include <sstream>
#include <string>
#include <type_traits>
diff --git a/cpp/src/arrow/python/platform.h b/cpp/src/arrow/util/string_builder.cc
similarity index 63%
copy from cpp/src/arrow/python/platform.h
copy to cpp/src/arrow/util/string_builder.cc
index ca9b553..6d5a161 100644
--- a/cpp/src/arrow/python/platform.h
+++ b/cpp/src/arrow/util/string_builder.cc
@@ -15,21 +15,26 @@
// specific language governing permissions and limitations
// under the License.
-// Functions for converting between pandas's NumPy-based data representation
-// and Arrow data structures
+#include "arrow/util/string_builder.h"
-#ifndef ARROW_PYTHON_PLATFORM_H
-#define ARROW_PYTHON_PLATFORM_H
+#include <sstream>
-#include <iostream>
-#include <Python.h> // IWYU pragma: export
-#include <datetime.h>
+#include "arrow/util/stl.h"
-// Work around C2528 error
-#ifdef _MSC_VER
-#if _MSC_VER >= 1900
-#undef timezone
-#endif
-#endif
+namespace arrow {
-#endif // ARROW_PYTHON_PLATFORM_H
+using internal::make_unique;
+
+namespace util {
+namespace detail {
+
+StringStreamWrapper::StringStreamWrapper()
+ : sstream_(make_unique<std::ostringstream>()), ostream_(*sstream_) {}
+
+StringStreamWrapper::~StringStreamWrapper() {}
+
+std::string StringStreamWrapper::str() { return sstream_->str(); }
+
+} // namespace detail
+} // namespace util
+} // namespace arrow
diff --git a/cpp/src/arrow/util/string_builder.h b/cpp/src/arrow/util/string_builder.h
index 7b3e107..9129f12 100644
--- a/cpp/src/arrow/util/string_builder.h
+++ b/cpp/src/arrow/util/string_builder.h
@@ -18,31 +18,49 @@
#ifndef ARROW_UTIL_STRING_BUILDER_H
#define ARROW_UTIL_STRING_BUILDER_H
-#include <sstream>
+#include <memory>
+#include <ostream>
#include <string>
#include <utility>
+#include "arrow/util/visibility.h"
+
namespace arrow {
namespace util {
+namespace detail {
+
+class ARROW_EXPORT StringStreamWrapper {
+ public:
+ StringStreamWrapper();
+ ~StringStreamWrapper();
+
+ std::ostream& stream() { return ostream_; }
+ std::string str();
+
+ protected:
+ std::unique_ptr<std::ostringstream> sstream_;
+ std::ostream& ostream_;
+};
+
+} // namespace detail
+
template <typename Head>
-void StringBuilderRecursive(std::stringstream& stream, Head&& head) {
+void StringBuilderRecursive(std::ostream& stream, Head&& head) {
stream << head;
}
template <typename Head, typename... Tail>
-void StringBuilderRecursive(std::stringstream& stream, Head&& head, Tail&&... tail) {
+void StringBuilderRecursive(std::ostream& stream, Head&& head, Tail&&... tail) {
StringBuilderRecursive(stream, std::forward<Head>(head));
StringBuilderRecursive(stream, std::forward<Tail>(tail)...);
}
template <typename... Args>
std::string StringBuilder(Args&&... args) {
- std::stringstream stream;
-
- StringBuilderRecursive(stream, std::forward<Args>(args)...);
-
- return stream.str();
+ detail::StringStreamWrapper ss;
+ StringBuilderRecursive(ss.stream(), std::forward<Args>(args)...);
+ return ss.str();
}
} // namespace util
diff --git a/cpp/src/arrow/util/thread-pool.cc b/cpp/src/arrow/util/thread-pool.cc
index 17ad9c4..6969f3f 100644
--- a/cpp/src/arrow/util/thread-pool.cc
+++ b/cpp/src/arrow/util/thread-pool.cc
@@ -20,6 +20,7 @@
#include <algorithm>
#include <condition_variable>
#include <deque>
+#include <list>
#include <mutex>
#include <string>
#include <thread>
@@ -53,6 +54,65 @@ struct ThreadPool::State {
bool quick_shutdown_;
};
+// The worker loop is an independent function so that it can keep running
+// after the ThreadPool is destroyed.
+static void WorkerLoop(std::shared_ptr<ThreadPool::State> state,
+ std::list<std::thread>::iterator it) {
+ std::unique_lock<std::mutex> lock(state->mutex_);
+
+ // Since we hold the lock, `it` now points to the correct thread object
+ // (LaunchWorkersUnlocked has exited)
+ DCHECK_EQ(std::this_thread::get_id(), it->get_id());
+
+ // If too many threads, we should secede from the pool
+ const auto should_secede = [&]() -> bool {
+ return state->workers_.size() > static_cast<size_t>(state->desired_capacity_);
+ };
+
+ while (true) {
+ // By the time this thread is started, some tasks may have been pushed
+ // or shutdown could even have been requested. So we only wait on the
+ // condition variable at the end of the loop.
+
+ // Execute pending tasks if any
+ while (!state->pending_tasks_.empty() && !state->quick_shutdown_) {
+ // We check this opportunistically at each loop iteration since
+ // it releases the lock below.
+ if (should_secede()) {
+ break;
+ }
+ {
+ std::function<void()> task = std::move(state->pending_tasks_.front());
+ state->pending_tasks_.pop_front();
+ lock.unlock();
+ task();
+ }
+ lock.lock();
+ }
+ // Now either the queue is empty *or* a quick shutdown was requested
+ if (state->please_shutdown_ || should_secede()) {
+ break;
+ }
+ // Wait for next wakeup
+ state->cv_.wait(lock);
+ }
+
+ // We're done. Move our thread object to the trashcan of finished
+ // workers. This has two motivations:
+ // 1) the thread object doesn't get destroyed before this function finishes
+ // (but we could call thread::detach() instead)
+ // 2) we can explicitly join() the trashcan threads to make sure all OS threads
+ // are exited before the ThreadPool is destroyed. Otherwise subtle
+ // timing conditions can lead to false positives with Valgrind.
+ DCHECK_EQ(std::this_thread::get_id(), it->get_id());
+ state->finished_workers_.push_back(std::move(*it));
+ state->workers_.erase(it);
+ if (state->please_shutdown_) {
+ // Notify the function waiting in Shutdown().
+ state->cv_shutdown_.notify_one();
+ }
+}
+
ThreadPool::ThreadPool()
: sp_state_(std::make_shared<ThreadPool::State>()),
state_(sp_state_.get()),
@@ -166,63 +226,6 @@ void ThreadPool::LaunchWorkersUnlocked(int threads) {
}
}
-void ThreadPool::WorkerLoop(std::shared_ptr<State> state,
- std::list<std::thread>::iterator it) {
- std::unique_lock<std::mutex> lock(state->mutex_);
-
- // Since we hold the lock, `it` now points to the correct thread object
- // (LaunchWorkersUnlocked has exited)
- DCHECK_EQ(std::this_thread::get_id(), it->get_id());
-
- // If too many threads, we should secede from the pool
- const auto should_secede = [&]() -> bool {
- return state->workers_.size() > static_cast<size_t>(state->desired_capacity_);
- };
-
- while (true) {
- // By the time this thread is started, some tasks may have been pushed
- // or shutdown could even have been requested. So we only wait on the
- // condition variable at the end of the loop.
-
- // Execute pending tasks if any
- while (!state->pending_tasks_.empty() && !state->quick_shutdown_) {
- // We check this opportunistically at each loop iteration since
- // it releases the lock below.
- if (should_secede()) {
- break;
- }
- {
- std::function<void()> task = std::move(state->pending_tasks_.front());
- state->pending_tasks_.pop_front();
- lock.unlock();
- task();
- }
- lock.lock();
- }
- // Now either the queue is empty *or* a quick shutdown was requested
- if (state->please_shutdown_ || should_secede()) {
- break;
- }
- // Wait for next wakeup
- state->cv_.wait(lock);
- }
-
- // We're done. Move our thread object to the trashcan of finished
- // workers. This has two motivations:
- // 1) the thread object doesn't get destroyed before this function finishes
- // (but we could call thread::detach() instead)
- // 2) we can explicitly join() the trashcan threads to make sure all OS threads
- // are exited before the ThreadPool is destroyed. Otherwise subtle
- // timing conditions can lead to false positives with Valgrind.
- DCHECK_EQ(std::this_thread::get_id(), it->get_id());
- state->finished_workers_.push_back(std::move(*it));
- state->workers_.erase(it);
- if (state->please_shutdown_) {
- // Notify the function waiting in Shutdown().
- state->cv_shutdown_.notify_one();
- }
-}
-
Status ThreadPool::SpawnReal(std::function<void()> task) {
{
ProtectAgainstFork();
diff --git a/cpp/src/arrow/util/thread-pool.h b/cpp/src/arrow/util/thread-pool.h
index f18cfeb..2de212e 100644
--- a/cpp/src/arrow/util/thread-pool.h
+++ b/cpp/src/arrow/util/thread-pool.h
@@ -25,11 +25,8 @@
#include <cstdlib>
#include <functional>
#include <future>
-#include <iostream>
-#include <list>
#include <memory>
#include <string>
-#include <thread>
#include <type_traits>
#include <utility>
@@ -127,20 +124,18 @@ class ARROW_EXPORT ThreadPool {
Status st = SpawnReal(detail::packaged_task_wrapper<Result>(std::move(task)));
if (!st.ok()) {
- // This happens when Submit() is called after Shutdown()
- std::cerr << st.ToString() << std::endl;
- std::abort();
+ st.Abort("ThreadPool::Submit() was probably called after Shutdown()");
}
return fut;
}
+ struct State;
+
protected:
FRIEND_TEST(TestThreadPool, SetCapacity);
FRIEND_TEST(TestGlobalThreadPool, Capacity);
friend ARROW_EXPORT ThreadPool* GetCpuThreadPool();
- struct State;
-
ThreadPool();
ARROW_DISALLOW_COPY_AND_ASSIGN(ThreadPool);
@@ -155,11 +150,6 @@ class ARROW_EXPORT ThreadPool {
// Reinitialize the thread pool if the pid changed
void ProtectAgainstFork();
- // The worker loop is a static method so that it can keep running
- // after the ThreadPool is destroyed
- static void WorkerLoop(std::shared_ptr<State> state,
- std::list<std::thread>::iterator it);
-
static std::shared_ptr<ThreadPool> MakeCpuThreadPool();
std::shared_ptr<State> sp_state_;
diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h
index 4699238..01bf442 100644
--- a/cpp/src/arrow/visitor_inline.h
+++ b/cpp/src/arrow/visitor_inline.h
@@ -24,7 +24,6 @@
#include "arrow/extension_type.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
-#include "arrow/tensor.h"
#include "arrow/type.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/checked_cast.h"
diff --git a/cpp/src/gandiva/tests/literal_test.cc b/cpp/src/gandiva/tests/literal_test.cc
index 53323cb..b5ffff0 100644
--- a/cpp/src/gandiva/tests/literal_test.cc
+++ b/cpp/src/gandiva/tests/literal_test.cc
@@ -197,7 +197,7 @@ TEST_F(TestLiteral, TestNullLiteralInIf) {
auto res = field("res", float64());
auto node_a = TreeExprBuilder::MakeField(field_a);
- auto literal_5 = TreeExprBuilder::MakeLiteral((double_t)5);
+ auto literal_5 = TreeExprBuilder::MakeLiteral(5.0);
auto a_gt_5 = TreeExprBuilder::MakeFunction("greater_than", {node_a, literal_5},
arrow::boolean());
auto literal_null = TreeExprBuilder::MakeNull(arrow::float64());
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index e66f9e7..238fbe2 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -15,9 +15,13 @@
// specific language governing permissions and limitations
// under the License.
-#include "gandiva/projector.h"
+#include <cmath>
+
#include <gtest/gtest.h>
+
#include "arrow/memory_pool.h"
+
+#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index d9fd2d3..84ace3f 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -354,16 +354,6 @@ void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual
}
}
-void PrintColumn(const Column& col, std::stringstream* ss) {
- const ChunkedArray& carr = *col.data();
- for (int i = 0; i < carr.num_chunks(); ++i) {
- auto c1 = carr.chunk(i);
- *ss << "Chunk " << i << std::endl;
- ARROW_EXPECT_OK(::arrow::PrettyPrint(*c1, 0, ss));
- *ss << std::endl;
- }
-}
-
void DoSimpleRoundtrip(const std::shared_ptr<Table>& table, bool use_threads,
int64_t row_group_size, const std::vector<int>& column_subset,
std::shared_ptr<Table>* out,
diff --git a/cpp/src/parquet/arrow/test-util.h b/cpp/src/parquet/arrow/test-util.h
index b963c3e..b99e28f 100644
--- a/cpp/src/parquet/arrow/test-util.h
+++ b/cpp/src/parquet/arrow/test-util.h
@@ -26,6 +26,7 @@
#include "arrow/api.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
#include "arrow/type_traits.h"
#include "arrow/util/decimal.h"