You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/29 20:52:21 UTC
[arrow] branch master updated: ARROW-3740: [C++] Builder should not
downsize
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a4951ed ARROW-3740: [C++] Builder should not downsize
a4951ed is described below
commit a4951edb4bcec6d87284f46ecba81b6cf8d56e91
Author: François Saint-Jacques <fs...@gmail.com>
AuthorDate: Thu Nov 29 14:51:13 2018 -0600
ARROW-3740: [C++] Builder should not downsize
Downsizing is a tricky business, it is safer to invoke Reset() then Reserve().
Author: François Saint-Jacques <fs...@gmail.com>
Closes #3055 from fsaintjacques/ARROW-3740-builder-resize-shrink and squashes the following commits:
4af5f1661 <François Saint-Jacques> Arrow-3740: Builder should not downsize
---
cpp/src/arrow/array-test.cc | 12 ++++++++++++
cpp/src/arrow/builder.cc | 42 ++++++++++++++++++++++++------------------
cpp/src/arrow/builder.h | 4 +++-
3 files changed, 39 insertions(+), 19 deletions(-)
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index d7445d6..ab03ced 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -261,6 +261,16 @@ TEST_F(TestBuilder, TestReserve) {
ASSERT_EQ(BitUtil::NextPower2(1030), builder.capacity());
}
+TEST_F(TestBuilder, TestResizeDownsize) {
+ UInt8Builder builder(pool_);
+
+ ASSERT_OK(builder.Resize(1000));
+ ASSERT_EQ(1000, builder.capacity());
+
+ // Can't downsize.
+ ASSERT_RAISES(Invalid, builder.Resize(500));
+}
+
template <typename Attrs>
class TestPrimitiveBuilder : public TestBuilder {
public:
@@ -889,6 +899,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
ASSERT_OK(this->builder_->Advance(100));
ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
+ ASSERT_RAISES(Invalid, this->builder_->Resize(1));
+
ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
}
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 6aa415b..0d9e0e0 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -79,9 +79,17 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length)
return Status::OK();
}
+static inline Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
+ if (new_capacity < 0) return Status::Invalid("Resize capacity must be positive");
+ if (new_capacity < old_capacity) return Status::Invalid("Resize cannot downsize");
+
+ return Status::OK();
+}
+
Status ArrayBuilder::Resize(int64_t capacity) {
// Target size of validity (null) bitmap data
const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
if (capacity_ == 0) {
RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &null_bitmap_));
@@ -200,10 +208,8 @@ void PrimitiveBuilder<T>::Reset() {
template <typename T>
Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
- // XXX: Set floor size for now
- if (capacity < kMinBuilderCapacity) {
- capacity = kMinBuilderCapacity;
- }
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
if (capacity_ == 0) {
@@ -298,10 +304,8 @@ void AdaptiveIntBuilderBase::Reset() {
}
Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
- // XXX: Set floor size for now
- if (capacity < kMinBuilderCapacity) {
- capacity = kMinBuilderCapacity;
- }
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
int64_t nbytes = capacity * int_size_;
if (capacity_ == 0) {
@@ -634,10 +638,8 @@ void BooleanBuilder::Reset() {
}
Status BooleanBuilder::Resize(int64_t capacity) {
- // XXX: Set floor size for now
- if (capacity < kMinBuilderCapacity) {
- capacity = kMinBuilderCapacity;
- }
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
if (capacity_ == 0) {
@@ -792,9 +794,8 @@ void DictionaryBuilder<T>::Reset() {
template <typename T>
Status DictionaryBuilder<T>::Resize(int64_t capacity) {
- if (capacity < kMinBuilderCapacity) {
- capacity = kMinBuilderCapacity;
- }
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
if (capacity_ == 0) {
// Initialize hash table
@@ -807,9 +808,9 @@ Status DictionaryBuilder<T>::Resize(int64_t capacity) {
}
Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
- if (capacity < kMinBuilderCapacity) {
- capacity = kMinBuilderCapacity;
- }
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+
RETURN_NOT_OK(values_builder_.Resize(capacity));
return ArrayBuilder::Resize(capacity);
}
@@ -1003,6 +1004,8 @@ Status ListBuilder::Append(bool is_valid) {
Status ListBuilder::Resize(int64_t capacity) {
DCHECK_LE(capacity, kListMaximumElements);
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
// one more then requested for offsets
RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
return ArrayBuilder::Resize(capacity);
@@ -1054,6 +1057,8 @@ BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {
Status BinaryBuilder::Resize(int64_t capacity) {
DCHECK_LE(capacity, kListMaximumElements);
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
// one more then requested for offsets
RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
return ArrayBuilder::Resize(capacity);
@@ -1266,6 +1271,7 @@ void FixedSizeBinaryBuilder::Reset() {
}
Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
return ArrayBuilder::Resize(capacity);
}
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 1e8bcca..183e7e9 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -87,7 +87,9 @@ class ARROW_EXPORT ArrayBuilder {
/// been appended. Does not account for reallocations that may be due to
/// variable size data, like binary values. To make space for incremental
/// appends, use Reserve instead.
- /// \param[in] capacity the minimum number of additional array values
+ ///
+ /// \param[in] capacity the minimum number of total array values to
+ /// accommodate. Must be greater than the current capacity.
/// \return Status
virtual Status Resize(int64_t capacity);