You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2021/12/29 18:38:31 UTC

[arrow] branch master updated: ARROW-15194: [C++] Combine ChunkedArray constructors

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5aa3f4b  ARROW-15194: [C++] Combine ChunkedArray constructors
5aa3f4b is described below

commit 5aa3f4b8fc280a58e54aad32a3ac3b6a10b5cd05
Author: Eduardo Ponce <ed...@gmail.com>
AuthorDate: Wed Dec 29 13:36:59 2021 -0500

    ARROW-15194: [C++] Combine ChunkedArray constructors
    
    This PR combines the ChunkedArray constructors into a single one, and only validates for empty and omitted data type.
    
    Closes #12036 from edponce/ARROW-15194-ChunkedArray-constructor-is-missing-size
    
    Authored-by: Eduardo Ponce <ed...@gmail.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 cpp/src/arrow/chunked_array.cc      | 25 +++++++++----------------
 cpp/src/arrow/chunked_array.h       | 14 +++++---------
 cpp/src/arrow/chunked_array_test.cc |  7 +++++++
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index f4918be..3074690 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -42,24 +42,17 @@ class MemoryPool;
 // ----------------------------------------------------------------------
 // ChunkedArray methods
 
-ChunkedArray::ChunkedArray(ArrayVector chunks) : chunks_(std::move(chunks)) {
-  length_ = 0;
-  null_count_ = 0;
-
-  ARROW_CHECK_GT(chunks_.size(), 0)
-      << "cannot construct ChunkedArray from empty vector and omitted type";
-  type_ = chunks_[0]->type();
-  for (const std::shared_ptr<Array>& chunk : chunks_) {
-    length_ += chunk->length();
-    null_count_ += chunk->null_count();
-  }
-}
-
 ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
     : chunks_(std::move(chunks)), type_(std::move(type)) {
   length_ = 0;
   null_count_ = 0;
-  for (const std::shared_ptr<Array>& chunk : chunks_) {
+
+  if (type_ == nullptr) {
+    ARROW_CHECK_GT(chunks_.size(), 0)
+        << "cannot construct ChunkedArray from empty vector and omitted type";
+    type_ = chunks_[0]->type();
+  }
+  for (const auto& chunk : chunks_) {
     length_ += chunk->length();
     null_count_ += chunk->null_count();
   }
@@ -75,8 +68,8 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::Make(ArrayVector chunks,
     }
     type = chunks[0]->type();
   }
-  for (size_t i = 0; i < chunks.size(); ++i) {
-    if (!chunks[i]->type()->Equals(*type)) {
+  for (const auto& chunk : chunks) {
+    if (!chunk->type()->Equals(*type)) {
       return Status::Invalid("Array chunks must all be same type");
     }
   }
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 0c4ddc4..36b2f38 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -67,12 +67,6 @@ class MemoryPool;
 /// inputs should not expect the chunk layout to be the same in each input.
 class ARROW_EXPORT ChunkedArray {
  public:
-  /// \brief Construct a chunked array from a vector of arrays
-  ///
-  /// The vector must be non-empty and all its elements must have the same
-  /// data type.
-  explicit ChunkedArray(ArrayVector chunks);
-
   ChunkedArray(ChunkedArray&&) = default;
   ChunkedArray& operator=(ChunkedArray&&) = default;
 
@@ -80,10 +74,12 @@ class ARROW_EXPORT ChunkedArray {
   explicit ChunkedArray(std::shared_ptr<Array> chunk)
       : ChunkedArray(ArrayVector{std::move(chunk)}) {}
 
-  /// \brief Construct a chunked array from a vector of arrays and a data type
+  /// \brief Construct a chunked array from a vector of arrays and an optional data type
   ///
-  /// As the data type is passed explicitly, the vector may be empty.
-  ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type);
+  /// The vector elements must have the same data type.
+  /// If the data type is passed explicitly, the vector may be empty.
+  /// If the data type is omitted, the vector must be non-empty.
+  explicit ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
 
   // \brief Constructor with basic input validation.
   static Result<std::shared_ptr<ChunkedArray>> Make(
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index d6e0ef9..eb09485 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -196,6 +196,12 @@ TEST_F(TestChunkedArray, Validate) {
   ASSERT_OK(no_chunks->ValidateFull());
 
   random::RandomArrayGenerator gen(0);
+
+  // Valid if non-empty and ommitted type
+  ArrayVector arrays = {gen.Int64(50, 0, 100, 0.1), gen.Int64(50, 0, 100, 0.1)};
+  auto chunks_with_no_type = std::make_shared<ChunkedArray>(arrays, nullptr);
+  ASSERT_OK(chunks_with_no_type->ValidateFull());
+
   arrays_one_.push_back(gen.Int32(50, 0, 100, 0.1));
   Construct();
   ASSERT_OK(one_->ValidateFull());
@@ -204,6 +210,7 @@ TEST_F(TestChunkedArray, Validate) {
   Construct();
   ASSERT_OK(one_->ValidateFull());
 
+  // Invalid if different chunk types
   arrays_one_.push_back(gen.String(50, 0, 10, 0.1));
   Construct();
   ASSERT_RAISES(Invalid, one_->ValidateFull());