You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/29 20:52:21 UTC

[arrow] branch master updated: ARROW-3740: [C++] Builder should not downsize

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a4951ed  ARROW-3740: [C++] Builder should not downsize
a4951ed is described below

commit a4951edb4bcec6d87284f46ecba81b6cf8d56e91
Author: François Saint-Jacques <fs...@gmail.com>
AuthorDate: Thu Nov 29 14:51:13 2018 -0600

    ARROW-3740: [C++] Builder should not downsize
    
    Downsizing is a tricky business, it is safer to invoke Reset() then Reserve().
    
    Author: François Saint-Jacques <fs...@gmail.com>
    
    Closes #3055 from fsaintjacques/ARROW-3740-builder-resize-shrink and squashes the following commits:
    
    4af5f1661 <François Saint-Jacques> Arrow-3740:  Builder should not downsize
---
 cpp/src/arrow/array-test.cc | 12 ++++++++++++
 cpp/src/arrow/builder.cc    | 42 ++++++++++++++++++++++++------------------
 cpp/src/arrow/builder.h     |  4 +++-
 3 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index d7445d6..ab03ced 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -261,6 +261,16 @@ TEST_F(TestBuilder, TestReserve) {
   ASSERT_EQ(BitUtil::NextPower2(1030), builder.capacity());
 }
 
+TEST_F(TestBuilder, TestResizeDownsize) {
+  UInt8Builder builder(pool_);
+
+  ASSERT_OK(builder.Resize(1000));
+  ASSERT_EQ(1000, builder.capacity());
+
+  // Can't downsize.
+  ASSERT_RAISES(Invalid, builder.Resize(500));
+}
+
 template <typename Attrs>
 class TestPrimitiveBuilder : public TestBuilder {
  public:
@@ -889,6 +899,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
   ASSERT_OK(this->builder_->Advance(100));
   ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
 
+  ASSERT_RAISES(Invalid, this->builder_->Resize(1));
+
   ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
 }
 
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 6aa415b..0d9e0e0 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -79,9 +79,17 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length)
   return Status::OK();
 }
 
+static inline Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
+  if (new_capacity < 0) return Status::Invalid("Resize capacity must be positive");
+  if (new_capacity < old_capacity) return Status::Invalid("Resize cannot downsize");
+
+  return Status::OK();
+}
+
 Status ArrayBuilder::Resize(int64_t capacity) {
   // Target size of validity (null) bitmap data
   const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
 
   if (capacity_ == 0) {
     RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &null_bitmap_));
@@ -200,10 +208,8 @@ void PrimitiveBuilder<T>::Reset() {
 
 template <typename T>
 Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
-  // XXX: Set floor size for now
-  if (capacity < kMinBuilderCapacity) {
-    capacity = kMinBuilderCapacity;
-  }
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
 
   int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
   if (capacity_ == 0) {
@@ -298,10 +304,8 @@ void AdaptiveIntBuilderBase::Reset() {
 }
 
 Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
-  // XXX: Set floor size for now
-  if (capacity < kMinBuilderCapacity) {
-    capacity = kMinBuilderCapacity;
-  }
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
 
   int64_t nbytes = capacity * int_size_;
   if (capacity_ == 0) {
@@ -634,10 +638,8 @@ void BooleanBuilder::Reset() {
 }
 
 Status BooleanBuilder::Resize(int64_t capacity) {
-  // XXX: Set floor size for now
-  if (capacity < kMinBuilderCapacity) {
-    capacity = kMinBuilderCapacity;
-  }
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
 
   const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
   if (capacity_ == 0) {
@@ -792,9 +794,8 @@ void DictionaryBuilder<T>::Reset() {
 
 template <typename T>
 Status DictionaryBuilder<T>::Resize(int64_t capacity) {
-  if (capacity < kMinBuilderCapacity) {
-    capacity = kMinBuilderCapacity;
-  }
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
 
   if (capacity_ == 0) {
     // Initialize hash table
@@ -807,9 +808,9 @@ Status DictionaryBuilder<T>::Resize(int64_t capacity) {
 }
 
 Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
-  if (capacity < kMinBuilderCapacity) {
-    capacity = kMinBuilderCapacity;
-  }
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+  capacity = std::max(capacity, kMinBuilderCapacity);
+
   RETURN_NOT_OK(values_builder_.Resize(capacity));
   return ArrayBuilder::Resize(capacity);
 }
@@ -1003,6 +1004,8 @@ Status ListBuilder::Append(bool is_valid) {
 
 Status ListBuilder::Resize(int64_t capacity) {
   DCHECK_LE(capacity, kListMaximumElements);
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
   // one more then requested for offsets
   RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
   return ArrayBuilder::Resize(capacity);
@@ -1054,6 +1057,8 @@ BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {
 
 Status BinaryBuilder::Resize(int64_t capacity) {
   DCHECK_LE(capacity, kListMaximumElements);
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
   // one more then requested for offsets
   RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
   return ArrayBuilder::Resize(capacity);
@@ -1266,6 +1271,7 @@ void FixedSizeBinaryBuilder::Reset() {
 }
 
 Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
+  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
   RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
   return ArrayBuilder::Resize(capacity);
 }
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 1e8bcca..183e7e9 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -87,7 +87,9 @@ class ARROW_EXPORT ArrayBuilder {
   /// been appended. Does not account for reallocations that may be due to
   /// variable size data, like binary values. To make space for incremental
   /// appends, use Reserve instead.
-  /// \param[in] capacity the minimum number of additional array values
+  ///
+  /// \param[in] capacity the minimum number of total array values to
+  ///            accommodate. Must be greater than the current capacity.
   /// \return Status
   virtual Status Resize(int64_t capacity);