You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/04/24 09:13:51 UTC

[arrow] branch master updated: ARROW-5204: [C++] Improve builder performance

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 948379f  ARROW-5204: [C++] Improve builder performance
948379f is described below

commit 948379f2bb87480a44ca2218c0aaa9da7cd164ff
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Apr 24 11:13:43 2019 +0200

    ARROW-5204: [C++] Improve builder performance
    
    Remove a spurious memset() call that would 0-initialize any additional data area.
    
    Also tweak the overallocation strategy.
    
    This makes PrimitiveBuilder 60+% faster here (4.5 -> 7.4 GB/s).
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #4193 from pitrou/ARROW-5204-buffer-builder-perf and squashes the following commits:
    
    d80385eb1 <Antoine Pitrou> ARROW-5204:  Improve builder performance
---
 cpp/src/arrow/array-binary-test.cc |  4 ++-
 cpp/src/arrow/array-test.cc        | 59 ++++++++++++++++++++----------------
 cpp/src/arrow/array/builder_base.h | 11 ++++---
 cpp/src/arrow/buffer-builder.h     | 62 ++++++++++++++++++--------------------
 4 files changed, 72 insertions(+), 64 deletions(-)

diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc
index 2e32e54..cd9caa1 100644
--- a/cpp/src/arrow/array-binary-test.cc
+++ b/cpp/src/arrow/array-binary-test.cc
@@ -603,7 +603,9 @@ TEST_F(TestBinaryBuilder, TestCapacityReserve) {
   ASSERT_OK(builder_->ReserveData(extra_capacity));
 
   ASSERT_EQ(length, builder_->value_data_length());
-  ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+  int64_t actual_capacity = builder_->value_data_capacity();
+  ASSERT_GE(actual_capacity, expected_capacity);
+  ASSERT_EQ(actual_capacity & 63, 0);
 
   Done();
 
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 6826c72..71183fb 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -274,10 +274,9 @@ TEST_F(TestBuilder, TestReserve) {
   ASSERT_OK(builder.Resize(1000));
   ASSERT_EQ(1000, builder.capacity());
 
-  // Builder only contains 0 elements, but calling Reserve will result in a round
-  // up to next power of 2
+  // Reserve overallocates for small upsizes.
   ASSERT_OK(builder.Reserve(1030));
-  ASSERT_EQ(BitUtil::NextPower2(1030), builder.capacity());
+  ASSERT_GE(builder.capacity(), 1500);
 }
 
 TEST_F(TestBuilder, TestResizeDownsize) {
@@ -516,9 +515,16 @@ typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PIn
 TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
 
 TYPED_TEST(TestPrimitiveBuilder, TestInit) {
-  int64_t n = 1000;
-  ASSERT_OK(this->builder_->Reserve(n));
-  ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
+  ASSERT_OK(this->builder_->Reserve(1000));
+  ASSERT_EQ(1000, this->builder_->capacity());
+
+  // Small upsize => should overallocate
+  ASSERT_OK(this->builder_->Reserve(1200));
+  ASSERT_GE(1500, this->builder_->capacity());
+
+  // Large upsize => should allocate exactly
+  ASSERT_OK(this->builder_->Reserve(32768));
+  ASSERT_EQ(32768, this->builder_->capacity());
 
   // unsure if this should go in all builder classes
   ASSERT_EQ(0, this->builder_->num_children());
@@ -678,10 +684,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
   ASSERT_EQ(null_count, this->builder_->null_count());
 
   ASSERT_EQ(1000, this->builder_->length());
-  ASSERT_EQ(1024, this->builder_->capacity());
+  ASSERT_EQ(1000, this->builder_->capacity());
 
   ASSERT_EQ(1000, this->builder_nn_->length());
-  ASSERT_EQ(1024, this->builder_nn_->capacity());
+  ASSERT_EQ(1000, this->builder_nn_->capacity());
 
   ASSERT_OK(this->builder_->Reserve(size - 1000));
   ASSERT_OK(this->builder_nn_->Reserve(size - 1000));
@@ -697,10 +703,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
   }
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   ASSERT_EQ(size, this->builder_nn_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
+  ASSERT_GE(size, this->builder_nn_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -722,10 +728,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
   ASSERT_OK(this->builder_nn_->AppendValues(draws.data(), K));
 
   ASSERT_EQ(1000, this->builder_->length());
-  ASSERT_EQ(1024, this->builder_->capacity());
+  ASSERT_EQ(1000, this->builder_->capacity());
 
   ASSERT_EQ(1000, this->builder_nn_->length());
-  ASSERT_EQ(1024, this->builder_nn_->capacity());
+  ASSERT_EQ(1000, this->builder_nn_->capacity());
 
   // Append the next 9000
   ASSERT_OK(
@@ -733,10 +739,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValues) {
   ASSERT_OK(this->builder_nn_->AppendValues(draws.data() + K, size - K));
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   ASSERT_EQ(size, this->builder_nn_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
+  ASSERT_GE(size, this->builder_nn_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -751,7 +757,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIter) {
   ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin(), this->draws_.end()));
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -765,7 +771,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterNullValid) {
                                             this->draws_.begin() + size / 2,
                                             static_cast<uint8_t*>(nullptr)));
 
-  ASSERT_EQ(BitUtil::NextPower2(size / 2), this->builder_nn_->capacity());
+  ASSERT_GE(size / 2, this->builder_nn_->capacity());
 
   ASSERT_OK(this->builder_nn_->AppendValues(this->draws_.begin() + size / 2,
                                             this->draws_.end(),
@@ -836,10 +842,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesIterConverted) {
   ASSERT_OK(this->builder_nn_->AppendValues(cast_values.begin(), cast_values.end()));
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -884,9 +890,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) {
   ASSERT_OK(this->builder_nn_->AppendValues(draws.data(), K));
 
   ASSERT_EQ(1000, this->builder_->length());
-  ASSERT_EQ(1024, this->builder_->capacity());
+  ASSERT_EQ(1000, this->builder_->capacity());
   ASSERT_EQ(1000, this->builder_nn_->length());
-  ASSERT_EQ(1024, this->builder_nn_->capacity());
+  ASSERT_EQ(1000, this->builder_nn_->capacity());
 
   // Append the next 9000
   is_valid.clear();
@@ -900,10 +906,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesStdBool) {
   ASSERT_OK(this->builder_nn_->AppendValues(partial_draws));
 
   ASSERT_EQ(size, this->builder_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   ASSERT_EQ(size, this->builder_nn_->length());
-  ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+  ASSERT_GE(size, this->builder_->capacity());
 
   this->Check(this->builder_, true);
   this->Check(this->builder_nn_, false);
@@ -934,13 +940,14 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
   ASSERT_EQ(0, this->builder_->length());
   ASSERT_EQ(kMinBuilderCapacity, this->builder_->capacity());
 
-  ASSERT_OK(this->builder_->Reserve(90));
+  ASSERT_OK(this->builder_->Reserve(100));
+  ASSERT_EQ(0, this->builder_->length());
+  ASSERT_GE(100, this->builder_->capacity());
   ASSERT_OK(this->builder_->Advance(100));
-  ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
+  ASSERT_EQ(100, this->builder_->length());
+  ASSERT_GE(100, this->builder_->capacity());
 
   ASSERT_RAISES(Invalid, this->builder_->Resize(1));
-
-  ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
 }
 
 TEST(TestBooleanBuilder, AppendNullsAdvanceBuilder) {
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index ee35407..21503ee 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -85,17 +85,18 @@ class ARROW_EXPORT ArrayBuilder {
   virtual Status Resize(int64_t capacity);
 
   /// \brief Ensure that there is enough space allocated to add the indicated
-  /// number of elements without any further calls to Resize. The memory
-  /// allocated is rounded up to the next highest power of 2 similar to memory
-  /// allocations in STL containers like std::vector
+  /// number of elements without any further calls to Resize. Overallocation is
+  /// used in order to minimize the impact of incremental Reserve() calls.
+  ///
   /// \param[in] additional_capacity the number of additional array values
   /// \return Status
   Status Reserve(int64_t additional_capacity) {
+    auto current_capacity = capacity();
     auto min_capacity = length() + additional_capacity;
-    if (min_capacity <= capacity()) return Status::OK();
+    if (min_capacity <= current_capacity) return Status::OK();
 
     // leave growth factor up to BufferBuilder
-    auto new_capacity = BufferBuilder::GrowByFactor(min_capacity);
+    auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
     return Resize(new_capacity);
   }
 
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
index a843021..32d7804 100644
--- a/cpp/src/arrow/buffer-builder.h
+++ b/cpp/src/arrow/buffer-builder.h
@@ -57,7 +57,6 @@ class ARROW_EXPORT BufferBuilder {
     if (new_capacity == 0) {
       return Status::OK();
     }
-    int64_t old_capacity = capacity_;
     if (buffer_ == NULLPTR) {
       ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_capacity, &buffer_));
     } else {
@@ -65,9 +64,6 @@ class ARROW_EXPORT BufferBuilder {
     }
     capacity_ = buffer_->capacity();
     data_ = buffer_->mutable_data();
-    if (capacity_ > old_capacity) {
-      memset(data_ + old_capacity, 0, capacity_ - old_capacity);
-    }
     return Status::OK();
   }
 
@@ -75,25 +71,23 @@ class ARROW_EXPORT BufferBuilder {
   /// without the need to perform allocations
   ///
   /// \param[in] additional_bytes number of additional bytes to make space for
-  /// \param[in] grow_by_factor if true, round up allocations using the
-  /// strategy in BufferBuilder::GrowByFactor
   /// \return Status
-  Status Reserve(const int64_t additional_bytes, bool grow_by_factor = false) {
+  Status Reserve(const int64_t additional_bytes) {
     auto min_capacity = size_ + additional_bytes;
-    if (min_capacity <= capacity_) return Status::OK();
-    if (grow_by_factor) {
-      min_capacity = GrowByFactor(min_capacity);
+    if (min_capacity <= capacity_) {
+      return Status::OK();
     }
-    return Resize(min_capacity, false);
+    return Resize(GrowByFactor(capacity_, min_capacity), false);
   }
 
-  /// \brief Return a capacity expanded by a growth factor of 2
-  static int64_t GrowByFactor(const int64_t min_capacity) {
-    // If the capacity was not already a multiple of 2, do so here
-    // TODO(emkornfield) doubling isn't great default allocation practice
+  /// \brief Return a capacity expanded by an unspecified growth factor
+  static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
+    // NOTE: Doubling isn't a great overallocation practice
     // see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
-    // for discussion
-    return BitUtil::NextPower2(min_capacity);
+    // for discussion.
+    // Grow exactly if a large upsize (the caller might know the exact final size).
+    // Otherwise overallocate by 1.5 to keep a linear amortized cost.
+    return std::max(new_capacity, current_capacity * 3 / 2);
   }
 
   /// \brief Append the given data to the buffer
@@ -101,7 +95,7 @@ class ARROW_EXPORT BufferBuilder {
   /// The buffer is automatically expanded if necessary.
   Status Append(const void* data, const int64_t length) {
     if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
-      ARROW_RETURN_NOT_OK(Resize(GrowByFactor(size_ + length), false));
+      ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
     }
     UnsafeAppend(data, length);
     return Status::OK();
@@ -111,7 +105,7 @@ class ARROW_EXPORT BufferBuilder {
   ///
   /// The buffer is automatically expanded if necessary.
   Status Append(const int64_t num_copies, uint8_t value) {
-    ARROW_RETURN_NOT_OK(Reserve(num_copies, true));
+    ARROW_RETURN_NOT_OK(Reserve(num_copies));
     UnsafeAppend(num_copies, value);
     return Status::OK();
   }
@@ -122,7 +116,7 @@ class ARROW_EXPORT BufferBuilder {
   template <size_t NBYTES>
   Status Append(const std::array<uint8_t, NBYTES>& data) {
     constexpr auto nbytes = static_cast<int64_t>(NBYTES);
-    ARROW_RETURN_NOT_OK(Reserve(NBYTES, true));
+    ARROW_RETURN_NOT_OK(Reserve(NBYTES));
     std::copy(data.cbegin(), data.cend(), data_ + size_);
     size_ += nbytes;
     return Status::OK();
@@ -200,8 +194,7 @@ class TypedBufferBuilder<T, typename std::enable_if<std::is_arithmetic<T>::value
   }
 
   Status Append(const int64_t num_copies, T value) {
-    ARROW_RETURN_NOT_OK(
-        Resize(BufferBuilder::GrowByFactor(num_copies + length()), false));
+    ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
     UnsafeAppend(num_copies, value);
     return Status::OK();
   }
@@ -266,19 +259,19 @@ class TypedBufferBuilder<bool> {
       : bytes_builder_(pool) {}
 
   Status Append(bool value) {
-    ARROW_RETURN_NOT_OK(ResizeWithGrowthFactor(bit_length_ + 1));
+    ARROW_RETURN_NOT_OK(Reserve(1));
     UnsafeAppend(value);
     return Status::OK();
   }
 
   Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
-    ARROW_RETURN_NOT_OK(ResizeWithGrowthFactor(bit_length_ + num_elements));
+    ARROW_RETURN_NOT_OK(Reserve(num_elements));
     UnsafeAppend(valid_bytes, num_elements);
     return Status::OK();
   }
 
   Status Append(const int64_t num_copies, bool value) {
-    ARROW_RETURN_NOT_OK(ResizeWithGrowthFactor(bit_length_ + num_copies));
+    ARROW_RETURN_NOT_OK(Reserve(num_copies));
     UnsafeAppend(num_copies, value);
     return Status::OK();
   }
@@ -327,9 +320,14 @@ class TypedBufferBuilder<bool> {
 
   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
     const int64_t old_byte_capacity = bytes_builder_.capacity();
-    const int64_t new_byte_capacity = BitUtil::BytesForBits(new_capacity);
-    ARROW_RETURN_NOT_OK(bytes_builder_.Resize(new_byte_capacity, shrink_to_fit));
+    ARROW_RETURN_NOT_OK(
+        bytes_builder_.Resize(BitUtil::BytesForBits(new_capacity), shrink_to_fit));
+    // Resize() may have chosen a larger capacity (e.g. for padding),
+    // so ask it again before calling memset().
+    const int64_t new_byte_capacity = bytes_builder_.capacity();
     if (new_byte_capacity > old_byte_capacity) {
+      // The additional buffer space is 0-initialized for convenience,
+      // so that other methods can simply bump the length.
       memset(mutable_data() + old_byte_capacity, 0,
              static_cast<size_t>(new_byte_capacity - old_byte_capacity));
     }
@@ -337,13 +335,16 @@ class TypedBufferBuilder<bool> {
   }
 
   Status Reserve(const int64_t additional_elements) {
-    return Resize(bit_length_ + additional_elements, false);
+    return Resize(
+        BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
+        false);
   }
 
   Status Advance(const int64_t length) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
     bit_length_ += length;
     false_count_ += length;
-    return ResizeWithGrowthFactor(bit_length_);
+    return Status::OK();
   }
 
   Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
@@ -366,9 +367,6 @@ class TypedBufferBuilder<bool> {
   int64_t false_count() const { return false_count_; }
 
  private:
-  Status ResizeWithGrowthFactor(const int64_t min_capacity) {
-    return Resize(BufferBuilder::GrowByFactor(min_capacity), false);
-  }
   BufferBuilder bytes_builder_;
   int64_t bit_length_ = 0;
   int64_t false_count_ = 0;