You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/24 03:03:54 UTC
[arrow] branch master updated: ARROW-1712: [C++] Add method to
BinaryBuilder to reserve space for value data
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0a49022 ARROW-1712: [C++] Add method to BinaryBuilder to reserve space for value data
0a49022 is described below
commit 0a490222c4078a7cb0ff085cd5c9884fdda57998
Author: Panchen Xue <pa...@gmail.com>
AuthorDate: Tue Jan 23 22:03:50 2018 -0500
ARROW-1712: [C++] Add method to BinaryBuilder to reserve space for value data
Modified BinaryBuilder::Resize(int64_t) so that when building BinaryArrays with a known size, space is also reserved for value_data_builder_ to prevent internal reallocation.
Author: Panchen Xue <pa...@gmail.com>
Closes #1481 from xuepanchen/master and squashes the following commits:
707b67bf [Panchen Xue] ARROW-1712: [C++] Fix lint errors
360e6018 [Panchen Xue] Merge branch 'master' of https://github.com/xuepanchen/arrow
d4bbd151 [Panchen Xue] ARROW-1712: [C++] Modify test case for BinaryBuilder::ReserveData() and change arguments for offsets_builder_.Resize()
77f8f3c1 [Panchen Xue] Merge pull request #5 from apache/master
bc5db7d3 [Panchen Xue] ARROW-1712: [C++] Remove unneeded data member in BinaryBuilder and modify test case
5a5b70e2 [Panchen Xue] Merge pull request #4 from apache/master
8e4c8925 [Panchen Xue] Merge pull request #3 from xuepanchen/xuepanchen-arrow-1712
d3c8202b [Panchen Xue] ARROW-1945: [C++] Fix a small typo
0b078955 [Panchen Xue] ARROW-1945: [C++] Add data_capacity_ to track capacity of value data
18f90fb8 [Panchen Xue] ARROW-1945: [C++] Add data_capacity_ to track capacity of value data
bbc65270 [Panchen Xue] ARROW-1945: [C++] Update test case for BinaryBuild data value space reservation
15e045cc [Panchen Xue] Add test case for array-test.cc
5a5593e5 [Panchen Xue] Update again ReserveData(int64_t) method for BinaryBuilder
9b5e8059 [Panchen Xue] Update ReserveData(int64_t) method signature for BinaryBuilder
8dd5eaa9 [Panchen Xue] Update builder.cc
b002e0bd [Panchen Xue] Remove override keyword from ReserveData(int64_t) method for BinaryBuilder
de318f47 [Panchen Xue] Implement ReserveData(int64_t) method for BinaryBuilder
e0434e61 [Panchen Xue] Add ReserveData(int64_t) and value_data_capacity() for methods for BinaryBuilder
5ebfb320 [Panchen Xue] Add capacity() method for TypedBufferBuilder
5b73c1c5 [Panchen Xue] Update again BinaryBuilder::Resize(int64_t capacity) in builder.cc
d021c54b [Panchen Xue] Merge pull request #2 from xuepanchen/xuepanchen-arrow-1712
232024e3 [Panchen Xue] Update BinaryBuilder::Resize(int64_t capacity) in builder.cc
c2f8dc4e [Panchen Xue] Merge pull request #1 from apache/master
---
cpp/src/arrow/array-test.cc | 39 +++++++++++++++++++++++++++++++++++++++
cpp/src/arrow/buffer.h | 1 +
cpp/src/arrow/builder.cc | 18 ++++++++++++++----
cpp/src/arrow/builder.h | 5 +++++
4 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 7ff3261..c53da85 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -1155,6 +1155,45 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
}
}
+TEST_F(TestBinaryBuilder, TestCapacityReserve) {
+ vector<string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc", "dddddddddd"};
+ int N = static_cast<int>(strings.size());
+ int reps = 15;
+ int64_t length = 0;
+ int64_t capacity = 1000;
+ int64_t expected_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
+
+ ASSERT_OK(builder_->ReserveData(capacity));
+
+ ASSERT_EQ(length, builder_->value_data_length());
+ ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+
+ for (int j = 0; j < reps; ++j) {
+ for (int i = 0; i < N; ++i) {
+ ASSERT_OK(builder_->Append(strings[i]));
+ length += static_cast<int>(strings[i].size());
+
+ ASSERT_EQ(length, builder_->value_data_length());
+ ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+ }
+ }
+
+ int extra_capacity = 500;
+ expected_capacity = BitUtil::RoundUpToMultipleOf64(length + extra_capacity);
+
+ ASSERT_OK(builder_->ReserveData(extra_capacity));
+
+ ASSERT_EQ(length, builder_->value_data_length());
+ ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+
+ Done();
+
+ ASSERT_EQ(reps * N, result_->length());
+ ASSERT_EQ(0, result_->null_count());
+ ASSERT_EQ(reps * 40, result_->value_data()->size());
+ ASSERT_EQ(expected_capacity, result_->value_data()->capacity());
+}
+
TEST_F(TestBinaryBuilder, TestZeroLength) {
// All buffers are null
Done();
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index b50b1a1..44c352a 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -333,6 +333,7 @@ class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder {
const T* data() const { return reinterpret_cast<const T*>(data_); }
int64_t length() const { return size_ / sizeof(T); }
+ int64_t capacity() const { return capacity_ / sizeof(T); }
};
/// \brief Allocate a fixed size mutable buffer from a memory pool
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index de132b5..db90152 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1165,13 +1165,13 @@ Status ListBuilder::Init(int64_t elements) {
DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
RETURN_NOT_OK(ArrayBuilder::Init(elements));
// one more then requested for offsets
- return offsets_builder_.Resize((elements + 1) * sizeof(int64_t));
+ return offsets_builder_.Resize((elements + 1) * sizeof(int32_t));
}
Status ListBuilder::Resize(int64_t capacity) {
DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
// one more then requested for offsets
- RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int64_t)));
+ RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
return ArrayBuilder::Resize(capacity);
}
@@ -1216,16 +1216,26 @@ Status BinaryBuilder::Init(int64_t elements) {
DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
RETURN_NOT_OK(ArrayBuilder::Init(elements));
// one more then requested for offsets
- return offsets_builder_.Resize((elements + 1) * sizeof(int64_t));
+ return offsets_builder_.Resize((elements + 1) * sizeof(int32_t));
}
Status BinaryBuilder::Resize(int64_t capacity) {
DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
// one more then requested for offsets
- RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int64_t)));
+ RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
return ArrayBuilder::Resize(capacity);
}
+Status BinaryBuilder::ReserveData(int64_t elements) {
+ if (value_data_length() + elements > value_data_capacity()) {
+ if (value_data_length() + elements > std::numeric_limits<int32_t>::max()) {
+ return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 for binary");
+ }
+ RETURN_NOT_OK(value_data_builder_.Reserve(elements));
+ }
+ return Status::OK();
+}
+
Status BinaryBuilder::AppendNextOffset() {
const int64_t num_bytes = value_data_builder_.length();
if (ARROW_PREDICT_FALSE(num_bytes > kMaximumCapacity)) {
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index ce7b8cd..d1611f6 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -682,10 +682,15 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
Status Init(int64_t elements) override;
Status Resize(int64_t capacity) override;
+ /// \brief Ensures there is enough allocated capacity to append the indicated
+ /// number of bytes to the value data buffer without additional allocations
+ Status ReserveData(int64_t elements);
Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
/// \return size of values buffer so far
int64_t value_data_length() const { return value_data_builder_.length(); }
+ /// \return capacity of values buffer
+ int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
/// Temporary access to a value.
///
--
To stop receiving notification emails like this one, please contact
wesm@apache.org.