You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/04 01:19:21 UTC
[arrow] branch master updated: ARROW-3906: [C++] Break out
builder.cc into multiple compilation units
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bb739b0 ARROW-3906: [C++] Break out builder.cc into multiple compilation units
bb739b0 is described below
commit bb739b0373ec141b777150e70a3783a9fde898a8
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Dec 3 19:19:10 2018 -0600
ARROW-3906: [C++] Break out builder.cc into multiple compilation units
This improves readability and can also make incremental compilation faster.
Author: Antoine Pitrou <an...@python.org>
Closes #3076 from pitrou/ARROW-3906-break-out-builder-cc and squashes the following commits:
3cf77e4d2 <Antoine Pitrou> ARROW-3906: Break out builder.cc into multiple compilation units
---
cpp/src/arrow/CMakeLists.txt | 3 +
cpp/src/arrow/builder-adaptive.cc | 405 ++++++++++++++++++
cpp/src/arrow/builder-binary.cc | 315 ++++++++++++++
cpp/src/arrow/builder-dict.cc | 228 ++++++++++
cpp/src/arrow/builder.cc | 853 +-------------------------------------
cpp/src/arrow/builder.h | 12 +
6 files changed, 966 insertions(+), 850 deletions(-)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index a56079f..336007d 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -19,6 +19,9 @@ set(ARROW_SRCS
array.cc
buffer.cc
builder.cc
+ builder-adaptive.cc
+ builder-binary.cc
+ builder-dict.cc
compare.cc
memory_pool.cc
pretty_print.cc
diff --git a/cpp/src/arrow/builder-adaptive.cc b/cpp/src/arrow/builder-adaptive.cc
new file mode 100644
index 0000000..a715f46
--- /dev/null
+++ b/cpp/src/arrow/builder-adaptive.cc
@@ -0,0 +1,405 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/int-util.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::AdaptiveIntBuilderBase;
+
+AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(MemoryPool* pool)
+ : ArrayBuilder(int64(), pool),
+ data_(nullptr),
+ raw_data_(nullptr),
+ int_size_(1),
+ pending_pos_(0),
+ pending_has_nulls_(false) {}
+
+void AdaptiveIntBuilderBase::Reset() {
+ ArrayBuilder::Reset();
+ data_.reset();
+ raw_data_ = nullptr;
+ pending_pos_ = 0;
+ pending_has_nulls_ = false;
+}
+
+Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+
+ int64_t nbytes = capacity * int_size_;
+ if (capacity_ == 0) {
+ RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
+ } else {
+ RETURN_NOT_OK(data_->Resize(nbytes));
+ }
+ raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+
+ return ArrayBuilder::Resize(capacity);
+}
+
+AdaptiveIntBuilder::AdaptiveIntBuilder(MemoryPool* pool) : AdaptiveIntBuilderBase(pool) {}
+
+Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ RETURN_NOT_OK(CommitPendingData());
+
+ std::shared_ptr<DataType> output_type;
+ switch (int_size_) {
+ case 1:
+ output_type = int8();
+ break;
+ case 2:
+ output_type = int16();
+ break;
+ case 4:
+ output_type = int32();
+ break;
+ case 8:
+ output_type = int64();
+ break;
+ default:
+ DCHECK(false);
+ return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
+ }
+
+ RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
+ RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
+
+ *out = ArrayData::Make(output_type, length_, {null_bitmap_, data_}, null_count_);
+
+ data_ = null_bitmap_ = nullptr;
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+Status AdaptiveIntBuilder::CommitPendingData() {
+ if (pending_pos_ == 0) {
+ return Status::OK();
+ }
+ RETURN_NOT_OK(Reserve(pending_pos_));
+ const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+ RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
+ pending_pos_, valid_bytes));
+ pending_has_nulls_ = false;
+ pending_pos_ = 0;
+ return Status::OK();
+}
+
+static constexpr int64_t kAdaptiveIntChunkSize = 8192;
+
+Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ while (length > 0) {
+ // In case `length` is very large, we don't want to trash the cache by
+ // scanning it twice (first to detect int width, second to copy the data).
+ // Instead, process data in L2-cacheable chunks.
+ const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+ uint8_t new_int_size;
+ new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+ DCHECK_GE(new_int_size, int_size_);
+ if (new_int_size > int_size_) {
+ // This updates int_size_
+ RETURN_NOT_OK(ExpandIntSize(new_int_size));
+ }
+
+ switch (int_size_) {
+ case 1:
+ internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 2:
+ internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 4:
+ internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 8:
+ internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ default:
+ DCHECK(false);
+ }
+
+ // This updates length_
+ ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+ values += chunk_size;
+ if (valid_bytes != nullptr) {
+ valid_bytes += chunk_size;
+ }
+ length -= chunk_size;
+ }
+
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::CommitPendingData() {
+ if (pending_pos_ == 0) {
+ return Status::OK();
+ }
+ RETURN_NOT_OK(Reserve(pending_pos_));
+ const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
+ RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
+ pending_has_nulls_ = false;
+ pending_pos_ = 0;
+ return Status::OK();
+}
+
+Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(CommitPendingData());
+ RETURN_NOT_OK(Reserve(length));
+
+ return AppendValuesInternal(values, length, valid_bytes);
+}
+
+template <typename new_type, typename old_type>
+typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+AdaptiveIntBuilder::ExpandIntSizeInternal() {
+ return Status::OK();
+}
+
+#define __LESS(a, b) (a) < (b)
+template <typename new_type, typename old_type>
+typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
+AdaptiveIntBuilder::ExpandIntSizeInternal() {
+ int_size_ = sizeof(new_type);
+ RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
+ raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+ const old_type* src = reinterpret_cast<old_type*>(raw_data_);
+ new_type* dst = reinterpret_cast<new_type*>(raw_data_);
+
+ // By doing the backward copy, we ensure that no element is overriden during
+ // the copy process and the copy stays in-place.
+ std::copy_backward(src, src + length_, dst + length_);
+
+ return Status::OK();
+}
+#undef __LESS
+
+template <typename new_type>
+Status AdaptiveIntBuilder::ExpandIntSizeN() {
+ switch (int_size_) {
+ case 1:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int8_t>()));
+ break;
+ case 2:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int16_t>()));
+ break;
+ case 4:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int32_t>()));
+ break;
+ case 8:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int64_t>()));
+ break;
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+Status AdaptiveIntBuilder::ExpandIntSize(uint8_t new_int_size) {
+ switch (new_int_size) {
+ case 1:
+ RETURN_NOT_OK((ExpandIntSizeN<int8_t>()));
+ break;
+ case 2:
+ RETURN_NOT_OK((ExpandIntSizeN<int16_t>()));
+ break;
+ case 4:
+ RETURN_NOT_OK((ExpandIntSizeN<int32_t>()));
+ break;
+ case 8:
+ RETURN_NOT_OK((ExpandIntSizeN<int64_t>()));
+ break;
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+AdaptiveUIntBuilder::AdaptiveUIntBuilder(MemoryPool* pool)
+ : AdaptiveIntBuilderBase(pool) {}
+
+Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ RETURN_NOT_OK(CommitPendingData());
+
+ std::shared_ptr<DataType> output_type;
+ switch (int_size_) {
+ case 1:
+ output_type = uint8();
+ break;
+ case 2:
+ output_type = uint16();
+ break;
+ case 4:
+ output_type = uint32();
+ break;
+ case 8:
+ output_type = uint64();
+ break;
+ default:
+ DCHECK(false);
+ return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
+ }
+
+ RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
+ RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
+
+ *out = ArrayData::Make(output_type, length_, {null_bitmap_, data_}, null_count_);
+
+ data_ = null_bitmap_ = nullptr;
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ while (length > 0) {
+ // See AdaptiveIntBuilder::AppendValuesInternal
+ const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
+
+ uint8_t new_int_size;
+ new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
+
+ DCHECK_GE(new_int_size, int_size_);
+ if (new_int_size > int_size_) {
+ // This updates int_size_
+ RETURN_NOT_OK(ExpandIntSize(new_int_size));
+ }
+
+ switch (int_size_) {
+ case 1:
+ internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 2:
+ internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 4:
+ internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ case 8:
+ internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
+ chunk_size);
+ break;
+ default:
+ DCHECK(false);
+ }
+
+ // This updates length_
+ ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
+ values += chunk_size;
+ if (valid_bytes != nullptr) {
+ valid_bytes += chunk_size;
+ }
+ length -= chunk_size;
+ }
+
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+
+ return AppendValuesInternal(values, length, valid_bytes);
+}
+
+template <typename new_type, typename old_type>
+typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
+AdaptiveUIntBuilder::ExpandIntSizeInternal() {
+ return Status::OK();
+}
+
+#define __LESS(a, b) (a) < (b)
+template <typename new_type, typename old_type>
+typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
+AdaptiveUIntBuilder::ExpandIntSizeInternal() {
+ int_size_ = sizeof(new_type);
+ RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
+
+ old_type* src = reinterpret_cast<old_type*>(raw_data_);
+ new_type* dst = reinterpret_cast<new_type*>(raw_data_);
+ // By doing the backward copy, we ensure that no element is overriden during
+ // the copy process and the copy stays in-place.
+ std::copy_backward(src, src + length_, dst + length_);
+
+ return Status::OK();
+}
+#undef __LESS
+
+template <typename new_type>
+Status AdaptiveUIntBuilder::ExpandIntSizeN() {
+ switch (int_size_) {
+ case 1:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint8_t>()));
+ break;
+ case 2:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint16_t>()));
+ break;
+ case 4:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint32_t>()));
+ break;
+ case 8:
+ RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint64_t>()));
+ break;
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+Status AdaptiveUIntBuilder::ExpandIntSize(uint8_t new_int_size) {
+ switch (new_int_size) {
+ case 1:
+ RETURN_NOT_OK((ExpandIntSizeN<uint8_t>()));
+ break;
+ case 2:
+ RETURN_NOT_OK((ExpandIntSizeN<uint16_t>()));
+ break;
+ case 4:
+ RETURN_NOT_OK((ExpandIntSizeN<uint32_t>()));
+ break;
+ case 8:
+ RETURN_NOT_OK((ExpandIntSizeN<uint64_t>()));
+ break;
+ default:
+ DCHECK(false);
+ }
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/cpp/src/arrow/builder-binary.cc b/cpp/src/arrow/builder-binary.cc
new file mode 100644
index 0000000..c250837
--- /dev/null
+++ b/cpp/src/arrow/builder-binary.cc
@@ -0,0 +1,315 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
+// String and binary
+
+BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+ : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
+
+BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
+
+Status BinaryBuilder::Resize(int64_t capacity) {
+ DCHECK_LE(capacity, kListMaximumElements);
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
+ // one more then requested for offsets
+ RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status BinaryBuilder::ReserveData(int64_t elements) {
+ if (value_data_length() + elements > value_data_capacity()) {
+ if (value_data_length() + elements > kBinaryMemoryLimit) {
+ return Status::CapacityError(
+ "Cannot reserve capacity larger than 2^31 - 1 for binary");
+ }
+ RETURN_NOT_OK(value_data_builder_.Reserve(elements));
+ }
+ return Status::OK();
+}
+
+Status BinaryBuilder::AppendNextOffset() {
+ const int64_t num_bytes = value_data_builder_.length();
+ if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+ std::stringstream ss;
+ ss << "BinaryArray cannot contain more than " << kBinaryMemoryLimit << " bytes, have "
+ << num_bytes;
+ return Status::CapacityError(ss.str());
+ }
+ return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
+}
+
+Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
+ RETURN_NOT_OK(Reserve(1));
+ RETURN_NOT_OK(AppendNextOffset());
+ RETURN_NOT_OK(value_data_builder_.Append(value, length));
+
+ UnsafeAppendToBitmap(true);
+ return Status::OK();
+}
+
+Status BinaryBuilder::AppendNull() {
+ RETURN_NOT_OK(AppendNextOffset());
+ RETURN_NOT_OK(Reserve(1));
+
+ UnsafeAppendToBitmap(false);
+ return Status::OK();
+}
+
+Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ // Write final offset (values length)
+ RETURN_NOT_OK(AppendNextOffset());
+
+ // These buffers' padding zeroed by BufferBuilder
+ std::shared_ptr<Buffer> offsets, value_data;
+ RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+ RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+
+ *out = ArrayData::Make(type_, length_, {null_bitmap_, offsets, value_data}, null_count_,
+ 0);
+ Reset();
+ return Status::OK();
+}
+
+void BinaryBuilder::Reset() {
+ ArrayBuilder::Reset();
+ offsets_builder_.Reset();
+ value_data_builder_.Reset();
+}
+
+const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
+ const int32_t* offsets = offsets_builder_.data();
+ int32_t offset = offsets[i];
+ if (i == (length_ - 1)) {
+ *out_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
+ } else {
+ *out_length = offsets[i + 1] - offset;
+ }
+ return value_data_builder_.data() + offset;
+}
+
+util::string_view BinaryBuilder::GetView(int64_t i) const {
+ const int32_t* offsets = offsets_builder_.data();
+ int32_t offset = offsets[i];
+ int32_t value_length;
+ if (i == (length_ - 1)) {
+ value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
+ } else {
+ value_length = offsets[i + 1] - offset;
+ }
+ return util::string_view(
+ reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
+}
+
+StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
+
+Status StringBuilder::AppendValues(const std::vector<std::string>& values,
+ const uint8_t* valid_bytes) {
+ std::size_t total_length = std::accumulate(
+ values.begin(), values.end(), 0ULL,
+ [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+ RETURN_NOT_OK(Reserve(values.size()));
+ RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+ RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+ if (valid_bytes) {
+ for (std::size_t i = 0; i < values.size(); ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
+ if (valid_bytes[i]) {
+ RETURN_NOT_OK(value_data_builder_.Append(
+ reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
+ }
+ }
+ } else {
+ for (std::size_t i = 0; i < values.size(); ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
+ RETURN_NOT_OK(value_data_builder_.Append(
+ reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
+ }
+ }
+
+ UnsafeAppendToBitmap(valid_bytes, values.size());
+ return Status::OK();
+}
+
+Status StringBuilder::AppendValues(const char** values, int64_t length,
+ const uint8_t* valid_bytes) {
+ std::size_t total_length = 0;
+ std::vector<std::size_t> value_lengths(length);
+ bool have_null_value = false;
+ for (int64_t i = 0; i < length; ++i) {
+ if (values[i]) {
+ auto value_length = strlen(values[i]);
+ value_lengths[i] = value_length;
+ total_length += value_length;
+ } else {
+ have_null_value = true;
+ }
+ }
+ RETURN_NOT_OK(Reserve(length));
+ RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+ RETURN_NOT_OK(offsets_builder_.Reserve(length));
+
+ if (valid_bytes) {
+ int64_t valid_bytes_offset = 0;
+ for (int64_t i = 0; i < length; ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
+ if (valid_bytes[i]) {
+ if (values[i]) {
+ RETURN_NOT_OK(value_data_builder_.Append(
+ reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
+ } else {
+ UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - valid_bytes_offset);
+ UnsafeAppendToBitmap(false);
+ valid_bytes_offset = i + 1;
+ }
+ }
+ }
+ UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+ } else {
+ if (have_null_value) {
+ std::vector<uint8_t> valid_vector(length, 0);
+ for (int64_t i = 0; i < length; ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
+ if (values[i]) {
+ RETURN_NOT_OK(value_data_builder_.Append(
+ reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
+ valid_vector[i] = 1;
+ }
+ }
+ UnsafeAppendToBitmap(valid_vector.data(), length);
+ } else {
+ for (int64_t i = 0; i < length; ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
+ RETURN_NOT_OK(value_data_builder_.Append(
+ reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
+ }
+ UnsafeAppendToBitmap(nullptr, length);
+ }
+ }
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Fixed width binary
+
+FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool)
+ : ArrayBuilder(type, pool),
+ byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
+ byte_builder_(pool) {}
+
+#ifndef NDEBUG
+void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
+ DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
+}
+#endif
+
+Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ return byte_builder_.Append(data, length * byte_width_);
+}
+
+Status FixedSizeBinaryBuilder::AppendNull() {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(false);
+ return byte_builder_.Advance(byte_width_);
+}
+
+void FixedSizeBinaryBuilder::Reset() {
+ ArrayBuilder::Reset();
+ byte_builder_.Reset();
+}
+
+Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<Buffer> data;
+ RETURN_NOT_OK(byte_builder_.Finish(&data));
+
+ *out = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_);
+
+ null_bitmap_ = nullptr;
+ capacity_ = length_ = null_count_ = 0;
+ return Status::OK();
+}
+
+const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
+ const uint8_t* data_ptr = byte_builder_.data();
+ return data_ptr + i * byte_width_;
+}
+
+util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
+ const uint8_t* data_ptr = byte_builder_.data();
+ return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
+ byte_width_);
+}
+
+// ----------------------------------------------------------------------
+// Decimal128Builder
+
+Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool)
+ : FixedSizeBinaryBuilder(type, pool) {}
+
+Status Decimal128Builder::Append(const Decimal128& value) {
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
+ return FixedSizeBinaryBuilder::Append(value.ToBytes());
+}
+
+Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<Buffer> data;
+ RETURN_NOT_OK(byte_builder_.Finish(&data));
+
+ *out = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_);
+
+ return Status::OK();
+}
+
+} // namespace arrow
diff --git a/cpp/src/arrow/builder-dict.cc b/cpp/src/arrow/builder-dict.cc
new file mode 100644
index 0000000..b021c3a
--- /dev/null
+++ b/cpp/src/arrow/builder-dict.cc
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+// ----------------------------------------------------------------------
+// DictionaryBuilder
+
+template <typename T>
+class DictionaryBuilder<T>::MemoTableImpl
+ : public internal::HashTraits<T>::MemoTableType {
+ public:
+ using MemoTableType = typename internal::HashTraits<T>::MemoTableType;
+ using MemoTableType::MemoTableType;
+};
+
+template <typename T>
+DictionaryBuilder<T>::~DictionaryBuilder() {}
+
+template <typename T>
+DictionaryBuilder<T>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool)
+ : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) {
+ DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder";
+}
+
+DictionaryBuilder<NullType>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
+ MemoryPool* pool)
+ : ArrayBuilder(type, pool), values_builder_(pool) {
+ DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder";
+}
+
+template <>
+DictionaryBuilder<FixedSizeBinaryType>::DictionaryBuilder(
+ const std::shared_ptr<DataType>& type, MemoryPool* pool)
+ : ArrayBuilder(type, pool),
+ byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
+
+template <typename T>
+void DictionaryBuilder<T>::Reset() {
+ ArrayBuilder::Reset();
+ values_builder_.Reset();
+ memo_table_.reset();
+ delta_offset_ = 0;
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+
+ if (capacity_ == 0) {
+ // Initialize hash table
+ // XXX should we let the user pass additional size heuristics?
+ memo_table_.reset(new MemoTableImpl(0));
+ delta_offset_ = 0;
+ }
+ RETURN_NOT_OK(values_builder_.Resize(capacity));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
+ RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+ capacity = std::max(capacity, kMinBuilderCapacity);
+
+ RETURN_NOT_OK(values_builder_.Resize(capacity));
+ return ArrayBuilder::Resize(capacity);
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::Append(const Scalar& value) {
+ RETURN_NOT_OK(Reserve(1));
+
+ auto memo_index = memo_table_->GetOrInsert(value);
+ RETURN_NOT_OK(values_builder_.Append(memo_index));
+
+ return Status::OK();
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::AppendNull() {
+ return values_builder_.AppendNull();
+}
+
+Status DictionaryBuilder<NullType>::AppendNull() { return values_builder_.AppendNull(); }
+
+template <typename T>
+Status DictionaryBuilder<T>::AppendArray(const Array& array) {
+ const auto& numeric_array = checked_cast<const NumericArray<T>&>(array);
+ for (int64_t i = 0; i < array.length(); i++) {
+ if (array.IsNull(i)) {
+ RETURN_NOT_OK(AppendNull());
+ } else {
+ RETURN_NOT_OK(Append(numeric_array.Value(i)));
+ }
+ }
+ return Status::OK();
+}
+
+Status DictionaryBuilder<NullType>::AppendArray(const Array& array) {
+ for (int64_t i = 0; i < array.length(); i++) {
+ RETURN_NOT_OK(AppendNull());
+ }
+ return Status::OK();
+}
+
+template <typename T>
+Status DictionaryBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ // Finalize indices array
+ RETURN_NOT_OK(values_builder_.FinishInternal(out));
+
+ // Generate dictionary array from hash table contents
+ std::shared_ptr<Array> dictionary;
+ std::shared_ptr<ArrayData> dictionary_data;
+
+ RETURN_NOT_OK(internal::DictionaryTraits<T>::GetDictionaryArrayData(
+ pool_, type_, *memo_table_, delta_offset_, &dictionary_data));
+ dictionary = MakeArray(dictionary_data);
+
+ // Set type of array data to the right dictionary type
+ (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
+
+ // Update internals for further uses of this DictionaryBuilder
+ delta_offset_ = memo_table_->size();
+ values_builder_.Reset();
+
+ return Status::OK();
+}
+
+Status DictionaryBuilder<NullType>::FinishInternal(std::shared_ptr<ArrayData>* out) {
+ std::shared_ptr<Array> dictionary = std::make_shared<NullArray>(0);
+
+ RETURN_NOT_OK(values_builder_.FinishInternal(out));
+ (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
+
+ return Status::OK();
+}
+
+//
+// StringType and BinaryType specializations
+//
+
+#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \
+ \
+ template <> \
+ Status DictionaryBuilder<Type>::AppendArray(const Array& array) { \
+ using ArrayType = typename TypeTraits<Type>::ArrayType; \
+ const ArrayType& binary_array = checked_cast<const ArrayType&>(array); \
+ for (int64_t i = 0; i < array.length(); i++) { \
+ if (array.IsNull(i)) { \
+ RETURN_NOT_OK(AppendNull()); \
+ } else { \
+ RETURN_NOT_OK(Append(binary_array.GetView(i))); \
+ } \
+ } \
+ return Status::OK(); \
+ }
+
+BINARY_DICTIONARY_SPECIALIZATIONS(StringType);
+BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType);
+
+template <>
+Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
+ if (!type_->Equals(*array.type())) {
+ return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
+ }
+
+ const auto& typed_array = checked_cast<const FixedSizeBinaryArray&>(array);
+ for (int64_t i = 0; i < array.length(); i++) {
+ if (array.IsNull(i)) {
+ RETURN_NOT_OK(AppendNull());
+ } else {
+ RETURN_NOT_OK(Append(typed_array.GetValue(i)));
+ }
+ }
+ return Status::OK();
+}
+
+template class DictionaryBuilder<UInt8Type>;
+template class DictionaryBuilder<UInt16Type>;
+template class DictionaryBuilder<UInt32Type>;
+template class DictionaryBuilder<UInt64Type>;
+template class DictionaryBuilder<Int8Type>;
+template class DictionaryBuilder<Int16Type>;
+template class DictionaryBuilder<Int32Type>;
+template class DictionaryBuilder<Int64Type>;
+template class DictionaryBuilder<Date32Type>;
+template class DictionaryBuilder<Date64Type>;
+template class DictionaryBuilder<Time32Type>;
+template class DictionaryBuilder<Time64Type>;
+template class DictionaryBuilder<TimestampType>;
+template class DictionaryBuilder<FloatType>;
+template class DictionaryBuilder<DoubleType>;
+template class DictionaryBuilder<FixedSizeBinaryType>;
+template class DictionaryBuilder<BinaryType>;
+template class DictionaryBuilder<StringType>;
+
+} // namespace arrow
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 0e10be7..aef4df0 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -15,36 +15,30 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/builder.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <numeric>
#include <sstream>
#include <utility>
#include <vector>
#include "arrow/array.h"
#include "arrow/buffer.h"
+#include "arrow/builder.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/checked_cast.h"
-#include "arrow/util/decimal.h"
-#include "arrow/util/hashing.h"
#include "arrow/util/int-util.h"
#include "arrow/util/logging.h"
namespace arrow {
-using internal::AdaptiveIntBuilderBase;
using internal::checked_cast;
-namespace {
-
-Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
+Status ArrayBuilder::TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
if (buffer) {
if (bytes_filled < buffer->size()) {
// Trim buffer
@@ -59,8 +53,6 @@ Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
return Status::OK();
}
-} // namespace
-
Status ArrayBuilder::AppendToBitmap(bool is_valid) {
if (length_ == capacity_) {
// If the capacity was not already a multiple of 2, do so here
@@ -80,13 +72,6 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length)
return Status::OK();
}
-static inline Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
- if (new_capacity < 0) return Status::Invalid("Resize capacity must be positive");
- if (new_capacity < old_capacity) return Status::Invalid("Resize cannot downsize");
-
- return Status::OK();
-}
-
Status ArrayBuilder::Resize(int64_t capacity) {
// Target size of validity (null) bitmap data
const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
@@ -295,375 +280,6 @@ template class PrimitiveBuilder<HalfFloatType>;
template class PrimitiveBuilder<FloatType>;
template class PrimitiveBuilder<DoubleType>;
-AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(MemoryPool* pool)
- : ArrayBuilder(int64(), pool),
- data_(nullptr),
- raw_data_(nullptr),
- int_size_(1),
- pending_pos_(0),
- pending_has_nulls_(false) {}
-
-void AdaptiveIntBuilderBase::Reset() {
- ArrayBuilder::Reset();
- data_.reset();
- raw_data_ = nullptr;
- pending_pos_ = 0;
- pending_has_nulls_ = false;
-}
-
-Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
- capacity = std::max(capacity, kMinBuilderCapacity);
-
- int64_t nbytes = capacity * int_size_;
- if (capacity_ == 0) {
- RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
- } else {
- RETURN_NOT_OK(data_->Resize(nbytes));
- }
- raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
- return ArrayBuilder::Resize(capacity);
-}
-
-AdaptiveIntBuilder::AdaptiveIntBuilder(MemoryPool* pool) : AdaptiveIntBuilderBase(pool) {}
-
-Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- RETURN_NOT_OK(CommitPendingData());
-
- std::shared_ptr<DataType> output_type;
- switch (int_size_) {
- case 1:
- output_type = int8();
- break;
- case 2:
- output_type = int16();
- break;
- case 4:
- output_type = int32();
- break;
- case 8:
- output_type = int64();
- break;
- default:
- DCHECK(false);
- return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
- }
-
- RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
- RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
-
- *out = ArrayData::Make(output_type, length_, {null_bitmap_, data_}, null_count_);
-
- data_ = null_bitmap_ = nullptr;
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-Status AdaptiveIntBuilder::CommitPendingData() {
- if (pending_pos_ == 0) {
- return Status::OK();
- }
- RETURN_NOT_OK(Reserve(pending_pos_));
- const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
- RETURN_NOT_OK(AppendValuesInternal(reinterpret_cast<const int64_t*>(pending_data_),
- pending_pos_, valid_bytes));
- pending_has_nulls_ = false;
- pending_pos_ = 0;
- return Status::OK();
-}
-
-static constexpr int64_t kAdaptiveIntChunkSize = 8192;
-
-Status AdaptiveIntBuilder::AppendValuesInternal(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- while (length > 0) {
- // In case `length` is very large, we don't want to trash the cache by
- // scanning it twice (first to detect int width, second to copy the data).
- // Instead, process data in L2-cacheable chunks.
- const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
-
- uint8_t new_int_size;
- new_int_size = internal::DetectIntWidth(values, valid_bytes, chunk_size, int_size_);
-
- DCHECK_GE(new_int_size, int_size_);
- if (new_int_size > int_size_) {
- // This updates int_size_
- RETURN_NOT_OK(ExpandIntSize(new_int_size));
- }
-
- switch (int_size_) {
- case 1:
- internal::DowncastInts(values, reinterpret_cast<int8_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 2:
- internal::DowncastInts(values, reinterpret_cast<int16_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 4:
- internal::DowncastInts(values, reinterpret_cast<int32_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 8:
- internal::DowncastInts(values, reinterpret_cast<int64_t*>(raw_data_) + length_,
- chunk_size);
- break;
- default:
- DCHECK(false);
- }
-
- // This updates length_
- ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
- values += chunk_size;
- if (valid_bytes != nullptr) {
- valid_bytes += chunk_size;
- }
- length -= chunk_size;
- }
-
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::CommitPendingData() {
- if (pending_pos_ == 0) {
- return Status::OK();
- }
- RETURN_NOT_OK(Reserve(pending_pos_));
- const uint8_t* valid_bytes = pending_has_nulls_ ? pending_valid_ : nullptr;
- RETURN_NOT_OK(AppendValuesInternal(pending_data_, pending_pos_, valid_bytes));
- pending_has_nulls_ = false;
- pending_pos_ = 0;
- return Status::OK();
-}
-
-Status AdaptiveIntBuilder::AppendValues(const int64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(CommitPendingData());
- RETURN_NOT_OK(Reserve(length));
-
- return AppendValuesInternal(values, length, valid_bytes);
-}
-
-template <typename new_type, typename old_type>
-typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-AdaptiveIntBuilder::ExpandIntSizeInternal() {
- return Status::OK();
-}
-
-#define __LESS(a, b) (a) < (b)
-template <typename new_type, typename old_type>
-typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
-AdaptiveIntBuilder::ExpandIntSizeInternal() {
- int_size_ = sizeof(new_type);
- RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
- raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
- const old_type* src = reinterpret_cast<old_type*>(raw_data_);
- new_type* dst = reinterpret_cast<new_type*>(raw_data_);
-
- // By doing the backward copy, we ensure that no element is overriden during
- // the copy process and the copy stays in-place.
- std::copy_backward(src, src + length_, dst + length_);
-
- return Status::OK();
-}
-#undef __LESS
-
-template <typename new_type>
-Status AdaptiveIntBuilder::ExpandIntSizeN() {
- switch (int_size_) {
- case 1:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int8_t>()));
- break;
- case 2:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int16_t>()));
- break;
- case 4:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int32_t>()));
- break;
- case 8:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, int64_t>()));
- break;
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-Status AdaptiveIntBuilder::ExpandIntSize(uint8_t new_int_size) {
- switch (new_int_size) {
- case 1:
- RETURN_NOT_OK((ExpandIntSizeN<int8_t>()));
- break;
- case 2:
- RETURN_NOT_OK((ExpandIntSizeN<int16_t>()));
- break;
- case 4:
- RETURN_NOT_OK((ExpandIntSizeN<int32_t>()));
- break;
- case 8:
- RETURN_NOT_OK((ExpandIntSizeN<int64_t>()));
- break;
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-AdaptiveUIntBuilder::AdaptiveUIntBuilder(MemoryPool* pool)
- : AdaptiveIntBuilderBase(pool) {}
-
-Status AdaptiveUIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- RETURN_NOT_OK(CommitPendingData());
-
- std::shared_ptr<DataType> output_type;
- switch (int_size_) {
- case 1:
- output_type = uint8();
- break;
- case 2:
- output_type = uint16();
- break;
- case 4:
- output_type = uint32();
- break;
- case 8:
- output_type = uint64();
- break;
- default:
- DCHECK(false);
- return Status::NotImplemented("Only ints of size 1,2,4,8 are supported");
- }
-
- RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
- RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
-
- *out = ArrayData::Make(output_type, length_, {null_bitmap_, data_}, null_count_);
-
- data_ = null_bitmap_ = nullptr;
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::AppendValuesInternal(const uint64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- while (length > 0) {
- // See AdaptiveIntBuilder::AppendValuesInternal
- const int64_t chunk_size = std::min(length, kAdaptiveIntChunkSize);
-
- uint8_t new_int_size;
- new_int_size = internal::DetectUIntWidth(values, valid_bytes, chunk_size, int_size_);
-
- DCHECK_GE(new_int_size, int_size_);
- if (new_int_size > int_size_) {
- // This updates int_size_
- RETURN_NOT_OK(ExpandIntSize(new_int_size));
- }
-
- switch (int_size_) {
- case 1:
- internal::DowncastUInts(values, reinterpret_cast<uint8_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 2:
- internal::DowncastUInts(values, reinterpret_cast<uint16_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 4:
- internal::DowncastUInts(values, reinterpret_cast<uint32_t*>(raw_data_) + length_,
- chunk_size);
- break;
- case 8:
- internal::DowncastUInts(values, reinterpret_cast<uint64_t*>(raw_data_) + length_,
- chunk_size);
- break;
- default:
- DCHECK(false);
- }
-
- // This updates length_
- ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, chunk_size);
- values += chunk_size;
- if (valid_bytes != nullptr) {
- valid_bytes += chunk_size;
- }
- length -= chunk_size;
- }
-
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::AppendValues(const uint64_t* values, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(Reserve(length));
-
- return AppendValuesInternal(values, length, valid_bytes);
-}
-
-template <typename new_type, typename old_type>
-typename std::enable_if<sizeof(old_type) >= sizeof(new_type), Status>::type
-AdaptiveUIntBuilder::ExpandIntSizeInternal() {
- return Status::OK();
-}
-
-#define __LESS(a, b) (a) < (b)
-template <typename new_type, typename old_type>
-typename std::enable_if<__LESS(sizeof(old_type), sizeof(new_type)), Status>::type
-AdaptiveUIntBuilder::ExpandIntSizeInternal() {
- int_size_ = sizeof(new_type);
- RETURN_NOT_OK(Resize(data_->size() / sizeof(old_type)));
-
- old_type* src = reinterpret_cast<old_type*>(raw_data_);
- new_type* dst = reinterpret_cast<new_type*>(raw_data_);
- // By doing the backward copy, we ensure that no element is overriden during
- // the copy process and the copy stays in-place.
- std::copy_backward(src, src + length_, dst + length_);
-
- return Status::OK();
-}
-#undef __LESS
-
-template <typename new_type>
-Status AdaptiveUIntBuilder::ExpandIntSizeN() {
- switch (int_size_) {
- case 1:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint8_t>()));
- break;
- case 2:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint16_t>()));
- break;
- case 4:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint32_t>()));
- break;
- case 8:
- RETURN_NOT_OK((ExpandIntSizeInternal<new_type, uint64_t>()));
- break;
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
-Status AdaptiveUIntBuilder::ExpandIntSize(uint8_t new_int_size) {
- switch (new_int_size) {
- case 1:
- RETURN_NOT_OK((ExpandIntSizeN<uint8_t>()));
- break;
- case 2:
- RETURN_NOT_OK((ExpandIntSizeN<uint16_t>()));
- break;
- case 4:
- RETURN_NOT_OK((ExpandIntSizeN<uint32_t>()));
- break;
- case 8:
- RETURN_NOT_OK((ExpandIntSizeN<uint64_t>()));
- break;
- default:
- DCHECK(false);
- }
- return Status::OK();
-}
-
BooleanBuilder::BooleanBuilder(MemoryPool* pool)
: ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {}
@@ -793,219 +409,6 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
}
// ----------------------------------------------------------------------
-// DictionaryBuilder
-
-template <typename T>
-class DictionaryBuilder<T>::MemoTableImpl
- : public internal::HashTraits<T>::MemoTableType {
- public:
- using MemoTableType = typename internal::HashTraits<T>::MemoTableType;
- using MemoTableType::MemoTableType;
-};
-
-template <typename T>
-DictionaryBuilder<T>::~DictionaryBuilder() {}
-
-template <typename T>
-DictionaryBuilder<T>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool)
- : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) {
- DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder";
-}
-
-DictionaryBuilder<NullType>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool)
- : ArrayBuilder(type, pool), values_builder_(pool) {
- DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder";
-}
-
-template <>
-DictionaryBuilder<FixedSizeBinaryType>::DictionaryBuilder(
- const std::shared_ptr<DataType>& type, MemoryPool* pool)
- : ArrayBuilder(type, pool),
- byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
-
-template <typename T>
-void DictionaryBuilder<T>::Reset() {
- ArrayBuilder::Reset();
- values_builder_.Reset();
- memo_table_.reset();
- delta_offset_ = 0;
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
- capacity = std::max(capacity, kMinBuilderCapacity);
-
- if (capacity_ == 0) {
- // Initialize hash table
- // XXX should we let the user pass additional size heuristics?
- memo_table_.reset(new MemoTableImpl(0));
- delta_offset_ = 0;
- }
- RETURN_NOT_OK(values_builder_.Resize(capacity));
- return ArrayBuilder::Resize(capacity);
-}
-
-Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
- capacity = std::max(capacity, kMinBuilderCapacity);
-
- RETURN_NOT_OK(values_builder_.Resize(capacity));
- return ArrayBuilder::Resize(capacity);
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::Append(const Scalar& value) {
- RETURN_NOT_OK(Reserve(1));
-
- auto memo_index = memo_table_->GetOrInsert(value);
- RETURN_NOT_OK(values_builder_.Append(memo_index));
-
- return Status::OK();
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::AppendNull() {
- return values_builder_.AppendNull();
-}
-
-Status DictionaryBuilder<NullType>::AppendNull() { return values_builder_.AppendNull(); }
-
-template <typename T>
-Status DictionaryBuilder<T>::AppendArray(const Array& array) {
- const auto& numeric_array = checked_cast<const NumericArray<T>&>(array);
- for (int64_t i = 0; i < array.length(); i++) {
- if (array.IsNull(i)) {
- RETURN_NOT_OK(AppendNull());
- } else {
- RETURN_NOT_OK(Append(numeric_array.Value(i)));
- }
- }
- return Status::OK();
-}
-
-Status DictionaryBuilder<NullType>::AppendArray(const Array& array) {
- for (int64_t i = 0; i < array.length(); i++) {
- RETURN_NOT_OK(AppendNull());
- }
- return Status::OK();
-}
-
-template <typename T>
-Status DictionaryBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
- // Finalize indices array
- RETURN_NOT_OK(values_builder_.FinishInternal(out));
-
- // Generate dictionary array from hash table contents
- std::shared_ptr<Array> dictionary;
- std::shared_ptr<ArrayData> dictionary_data;
-
- RETURN_NOT_OK(internal::DictionaryTraits<T>::GetDictionaryArrayData(
- pool_, type_, *memo_table_, delta_offset_, &dictionary_data));
- dictionary = MakeArray(dictionary_data);
-
- // Set type of array data to the right dictionary type
- (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
-
- // Update internals for further uses of this DictionaryBuilder
- delta_offset_ = memo_table_->size();
- values_builder_.Reset();
-
- return Status::OK();
-}
-
-Status DictionaryBuilder<NullType>::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Array> dictionary = std::make_shared<NullArray>(0);
-
- RETURN_NOT_OK(values_builder_.FinishInternal(out));
- (*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);
-
- return Status::OK();
-}
-
-//
-// StringType and BinaryType specializations
-//
-
-#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \
- \
- template <> \
- Status DictionaryBuilder<Type>::AppendArray(const Array& array) { \
- using ArrayType = typename TypeTraits<Type>::ArrayType; \
- const ArrayType& binary_array = checked_cast<const ArrayType&>(array); \
- for (int64_t i = 0; i < array.length(); i++) { \
- if (array.IsNull(i)) { \
- RETURN_NOT_OK(AppendNull()); \
- } else { \
- RETURN_NOT_OK(Append(binary_array.GetView(i))); \
- } \
- } \
- return Status::OK(); \
- }
-
-BINARY_DICTIONARY_SPECIALIZATIONS(StringType);
-BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType);
-
-template <>
-Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
- if (!type_->Equals(*array.type())) {
- return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
- }
-
- const auto& typed_array = checked_cast<const FixedSizeBinaryArray&>(array);
- for (int64_t i = 0; i < array.length(); i++) {
- if (array.IsNull(i)) {
- RETURN_NOT_OK(AppendNull());
- } else {
- RETURN_NOT_OK(Append(typed_array.GetValue(i)));
- }
- }
- return Status::OK();
-}
-
-template class DictionaryBuilder<UInt8Type>;
-template class DictionaryBuilder<UInt16Type>;
-template class DictionaryBuilder<UInt32Type>;
-template class DictionaryBuilder<UInt64Type>;
-template class DictionaryBuilder<Int8Type>;
-template class DictionaryBuilder<Int16Type>;
-template class DictionaryBuilder<Int32Type>;
-template class DictionaryBuilder<Int64Type>;
-template class DictionaryBuilder<Date32Type>;
-template class DictionaryBuilder<Date64Type>;
-template class DictionaryBuilder<Time32Type>;
-template class DictionaryBuilder<Time64Type>;
-template class DictionaryBuilder<TimestampType>;
-template class DictionaryBuilder<FloatType>;
-template class DictionaryBuilder<DoubleType>;
-template class DictionaryBuilder<FixedSizeBinaryType>;
-template class DictionaryBuilder<BinaryType>;
-template class DictionaryBuilder<StringType>;
-
-// ----------------------------------------------------------------------
-// Decimal128Builder
-
-Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool)
- : FixedSizeBinaryBuilder(type, pool) {}
-
-Status Decimal128Builder::Append(const Decimal128& value) {
- RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
- return FixedSizeBinaryBuilder::Append(value.ToBytes());
-}
-
-Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Buffer> data;
- RETURN_NOT_OK(byte_builder_.Finish(&data));
-
- *out = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_);
-
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
// ListBuilder
ListBuilder::ListBuilder(MemoryPool* pool,
@@ -1089,257 +492,6 @@ ArrayBuilder* ListBuilder::value_builder() const {
}
// ----------------------------------------------------------------------
-// String and binary
-
-BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
- : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
-
-BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
-
-Status BinaryBuilder::Resize(int64_t capacity) {
- DCHECK_LE(capacity, kListMaximumElements);
- RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-
- // one more then requested for offsets
- RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
- return ArrayBuilder::Resize(capacity);
-}
-
-Status BinaryBuilder::ReserveData(int64_t elements) {
- if (value_data_length() + elements > value_data_capacity()) {
- if (value_data_length() + elements > kBinaryMemoryLimit) {
- return Status::CapacityError(
- "Cannot reserve capacity larger than 2^31 - 1 for binary");
- }
- RETURN_NOT_OK(value_data_builder_.Reserve(elements));
- }
- return Status::OK();
-}
-
-Status BinaryBuilder::AppendNextOffset() {
- const int64_t num_bytes = value_data_builder_.length();
- if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
- std::stringstream ss;
- ss << "BinaryArray cannot contain more than " << kBinaryMemoryLimit << " bytes, have "
- << num_bytes;
- return Status::CapacityError(ss.str());
- }
- return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
-}
-
-Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
- RETURN_NOT_OK(Reserve(1));
- RETURN_NOT_OK(AppendNextOffset());
- RETURN_NOT_OK(value_data_builder_.Append(value, length));
-
- UnsafeAppendToBitmap(true);
- return Status::OK();
-}
-
-Status BinaryBuilder::AppendNull() {
- RETURN_NOT_OK(AppendNextOffset());
- RETURN_NOT_OK(Reserve(1));
-
- UnsafeAppendToBitmap(false);
- return Status::OK();
-}
-
-Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- // Write final offset (values length)
- RETURN_NOT_OK(AppendNextOffset());
-
- // These buffers' padding zeroed by BufferBuilder
- std::shared_ptr<Buffer> offsets, value_data;
- RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
- RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
-
- *out = ArrayData::Make(type_, length_, {null_bitmap_, offsets, value_data}, null_count_,
- 0);
- Reset();
- return Status::OK();
-}
-
-void BinaryBuilder::Reset() {
- ArrayBuilder::Reset();
- offsets_builder_.Reset();
- value_data_builder_.Reset();
-}
-
-const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
- const int32_t* offsets = offsets_builder_.data();
- int32_t offset = offsets[i];
- if (i == (length_ - 1)) {
- *out_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
- } else {
- *out_length = offsets[i + 1] - offset;
- }
- return value_data_builder_.data() + offset;
-}
-
-util::string_view BinaryBuilder::GetView(int64_t i) const {
- const int32_t* offsets = offsets_builder_.data();
- int32_t offset = offsets[i];
- int32_t value_length;
- if (i == (length_ - 1)) {
- value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
- } else {
- value_length = offsets[i + 1] - offset;
- }
- return util::string_view(
- reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
-}
-
-StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
-
-Status StringBuilder::AppendValues(const std::vector<std::string>& values,
- const uint8_t* valid_bytes) {
- std::size_t total_length = std::accumulate(
- values.begin(), values.end(), 0ULL,
- [](uint64_t sum, const std::string& str) { return sum + str.size(); });
- RETURN_NOT_OK(Reserve(values.size()));
- RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
- RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
-
- if (valid_bytes) {
- for (std::size_t i = 0; i < values.size(); ++i) {
- RETURN_NOT_OK(AppendNextOffset());
- if (valid_bytes[i]) {
- RETURN_NOT_OK(value_data_builder_.Append(
- reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
- }
- }
- } else {
- for (std::size_t i = 0; i < values.size(); ++i) {
- RETURN_NOT_OK(AppendNextOffset());
- RETURN_NOT_OK(value_data_builder_.Append(
- reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
- }
- }
-
- UnsafeAppendToBitmap(valid_bytes, values.size());
- return Status::OK();
-}
-
-Status StringBuilder::AppendValues(const char** values, int64_t length,
- const uint8_t* valid_bytes) {
- std::size_t total_length = 0;
- std::vector<std::size_t> value_lengths(length);
- bool have_null_value = false;
- for (int64_t i = 0; i < length; ++i) {
- if (values[i]) {
- auto value_length = strlen(values[i]);
- value_lengths[i] = value_length;
- total_length += value_length;
- } else {
- have_null_value = true;
- }
- }
- RETURN_NOT_OK(Reserve(length));
- RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
- RETURN_NOT_OK(offsets_builder_.Reserve(length));
-
- if (valid_bytes) {
- int64_t valid_bytes_offset = 0;
- for (int64_t i = 0; i < length; ++i) {
- RETURN_NOT_OK(AppendNextOffset());
- if (valid_bytes[i]) {
- if (values[i]) {
- RETURN_NOT_OK(value_data_builder_.Append(
- reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
- } else {
- UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - valid_bytes_offset);
- UnsafeAppendToBitmap(false);
- valid_bytes_offset = i + 1;
- }
- }
- }
- UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
- } else {
- if (have_null_value) {
- std::vector<uint8_t> valid_vector(length, 0);
- for (int64_t i = 0; i < length; ++i) {
- RETURN_NOT_OK(AppendNextOffset());
- if (values[i]) {
- RETURN_NOT_OK(value_data_builder_.Append(
- reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
- valid_vector[i] = 1;
- }
- }
- UnsafeAppendToBitmap(valid_vector.data(), length);
- } else {
- for (int64_t i = 0; i < length; ++i) {
- RETURN_NOT_OK(AppendNextOffset());
- RETURN_NOT_OK(value_data_builder_.Append(
- reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
- }
- UnsafeAppendToBitmap(nullptr, length);
- }
- }
- return Status::OK();
-}
-
-// ----------------------------------------------------------------------
-// Fixed width binary
-
-FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
- MemoryPool* pool)
- : ArrayBuilder(type, pool),
- byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
- byte_builder_(pool) {}
-
-#ifndef NDEBUG
-void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
- DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
-}
-#endif
-
-Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
- const uint8_t* valid_bytes) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- return byte_builder_.Append(data, length * byte_width_);
-}
-
-Status FixedSizeBinaryBuilder::AppendNull() {
- RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(false);
- return byte_builder_.Advance(byte_width_);
-}
-
-void FixedSizeBinaryBuilder::Reset() {
- ArrayBuilder::Reset();
- byte_builder_.Reset();
-}
-
-Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
- RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
- RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
- return ArrayBuilder::Resize(capacity);
-}
-
-Status FixedSizeBinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Buffer> data;
- RETURN_NOT_OK(byte_builder_.Finish(&data));
-
- *out = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_);
-
- null_bitmap_ = nullptr;
- capacity_ = length_ = null_count_ = 0;
- return Status::OK();
-}
-
-const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
- const uint8_t* data_ptr = byte_builder_.data();
- return data_ptr + i * byte_width_;
-}
-
-util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
- const uint8_t* data_ptr = byte_builder_.data();
- return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
- byte_width_);
-}
-
-// ----------------------------------------------------------------------
// Struct
StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
@@ -1352,6 +504,7 @@ void StructBuilder::Reset() {
field_builder->Reset();
}
}
+
Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), null_bitmap_.get()));
*out = ArrayData::Make(type_, length_, {null_bitmap_}, null_count_);
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 34cac55..34398ee 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -193,6 +193,18 @@ class ARROW_EXPORT ArrayBuilder {
// Set the next length bits to not null (i.e. valid).
void UnsafeSetNotNull(int64_t length);
+ static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
+
+ static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
+ if (new_capacity < 0) {
+ return Status::Invalid("Resize capacity must be positive");
+ }
+ if (new_capacity < old_capacity) {
+ return Status::Invalid("Resize cannot downsize");
+ }
+ return Status::OK();
+ }
+
std::shared_ptr<DataType> type_;
MemoryPool* pool_;