You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2021/12/29 18:38:31 UTC
[arrow] branch master updated: ARROW-15194: [C++] Combine ChunkedArray constructors
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5aa3f4b ARROW-15194: [C++] Combine ChunkedArray constructors
5aa3f4b is described below
commit 5aa3f4b8fc280a58e54aad32a3ac3b6a10b5cd05
Author: Eduardo Ponce <ed...@gmail.com>
AuthorDate: Wed Dec 29 13:36:59 2021 -0500
ARROW-15194: [C++] Combine ChunkedArray constructors
This PR combines the ChunkedArray constructors into a single one, and only validates for empty and omitted data type.
Closes #12036 from edponce/ARROW-15194-ChunkedArray-constructor-is-missing-size
Authored-by: Eduardo Ponce <ed...@gmail.com>
Signed-off-by: David Li <li...@gmail.com>
---
cpp/src/arrow/chunked_array.cc | 25 +++++++++----------------
cpp/src/arrow/chunked_array.h | 14 +++++---------
cpp/src/arrow/chunked_array_test.cc | 7 +++++++
3 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index f4918be..3074690 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -42,24 +42,17 @@ class MemoryPool;
// ----------------------------------------------------------------------
// ChunkedArray methods
-ChunkedArray::ChunkedArray(ArrayVector chunks) : chunks_(std::move(chunks)) {
- length_ = 0;
- null_count_ = 0;
-
- ARROW_CHECK_GT(chunks_.size(), 0)
- << "cannot construct ChunkedArray from empty vector and omitted type";
- type_ = chunks_[0]->type();
- for (const std::shared_ptr<Array>& chunk : chunks_) {
- length_ += chunk->length();
- null_count_ += chunk->null_count();
- }
-}
-
ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
: chunks_(std::move(chunks)), type_(std::move(type)) {
length_ = 0;
null_count_ = 0;
- for (const std::shared_ptr<Array>& chunk : chunks_) {
+
+ if (type_ == nullptr) {
+ ARROW_CHECK_GT(chunks_.size(), 0)
+ << "cannot construct ChunkedArray from empty vector and omitted type";
+ type_ = chunks_[0]->type();
+ }
+ for (const auto& chunk : chunks_) {
length_ += chunk->length();
null_count_ += chunk->null_count();
}
@@ -75,8 +68,8 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::Make(ArrayVector chunks,
}
type = chunks[0]->type();
}
- for (size_t i = 0; i < chunks.size(); ++i) {
- if (!chunks[i]->type()->Equals(*type)) {
+ for (const auto& chunk : chunks) {
+ if (!chunk->type()->Equals(*type)) {
return Status::Invalid("Array chunks must all be same type");
}
}
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 0c4ddc4..36b2f38 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -67,12 +67,6 @@ class MemoryPool;
/// inputs should not expect the chunk layout to be the same in each input.
class ARROW_EXPORT ChunkedArray {
public:
- /// \brief Construct a chunked array from a vector of arrays
- ///
- /// The vector must be non-empty and all its elements must have the same
- /// data type.
- explicit ChunkedArray(ArrayVector chunks);
-
ChunkedArray(ChunkedArray&&) = default;
ChunkedArray& operator=(ChunkedArray&&) = default;
@@ -80,10 +74,12 @@ class ARROW_EXPORT ChunkedArray {
explicit ChunkedArray(std::shared_ptr<Array> chunk)
: ChunkedArray(ArrayVector{std::move(chunk)}) {}
- /// \brief Construct a chunked array from a vector of arrays and a data type
+ /// \brief Construct a chunked array from a vector of arrays and an optional data type
///
- /// As the data type is passed explicitly, the vector may be empty.
- ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type);
+ /// The vector elements must have the same data type.
+ /// If the data type is passed explicitly, the vector may be empty.
+ /// If the data type is omitted, the vector must be non-empty.
+ explicit ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
// \brief Constructor with basic input validation.
static Result<std::shared_ptr<ChunkedArray>> Make(
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index d6e0ef9..eb09485 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -196,6 +196,12 @@ TEST_F(TestChunkedArray, Validate) {
ASSERT_OK(no_chunks->ValidateFull());
random::RandomArrayGenerator gen(0);
+
+ // Valid if non-empty and ommitted type
+ ArrayVector arrays = {gen.Int64(50, 0, 100, 0.1), gen.Int64(50, 0, 100, 0.1)};
+ auto chunks_with_no_type = std::make_shared<ChunkedArray>(arrays, nullptr);
+ ASSERT_OK(chunks_with_no_type->ValidateFull());
+
arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
Construct();
ASSERT_OK(one_->ValidateFull());
@@ -204,6 +210,7 @@ TEST_F(TestChunkedArray, Validate) {
Construct();
ASSERT_OK(one_->ValidateFull());
+ // Invalid if different chunk types
arrays_one_.push_back(gen.String(50, 0, 10, 0.1));
Construct();
ASSERT_RAISES(Invalid, one_->ValidateFull());