You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/04 12:21:55 UTC
[arrow] branch master updated: ARROW-2388: [C++] Use valid_bytes
API for StringBuilder::Append
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 933b32b ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append
933b32b is described below
commit 933b32b612ab0935dab6fc0f670765444f0ab510
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Wed Apr 4 14:21:00 2018 +0200
ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append
Because Append of other builders use valid_bytes not null_bytes.
Author: Kouhei Sutou <ko...@clear-code.com>
Closes #1833 from kou/cpp-string-array-builder-append-valid-bytes and squashes the following commits:
47b2158 <Kouhei Sutou> Fix format
5c03f85 <Kouhei Sutou> Use valid_bytes API for StringBuilder::Append
---
cpp/src/arrow/array-test.cc | 10 +++++-----
cpp/src/arrow/builder.cc | 20 +++++++++++++-------
cpp/src/arrow/builder.h | 3 ++-
3 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 308bbcd..0b4342a 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -991,13 +991,13 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
TEST_F(TestStringBuilder, TestAppendVector) {
vector<string> strings = {"", "bb", "a", "", "ccc"};
- vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+ vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
int N = static_cast<int>(strings.size());
int reps = 1000;
for (int j = 0; j < reps; ++j) {
- ASSERT_OK(builder_->Append(strings, is_null.data()));
+ ASSERT_OK(builder_->Append(strings, valid_bytes.data()));
}
Done();
@@ -1008,9 +1008,7 @@ TEST_F(TestStringBuilder, TestAppendVector) {
int32_t length;
int32_t pos = 0;
for (int i = 0; i < N * reps; ++i) {
- if (is_null[i % N]) {
- ASSERT_TRUE(result_->IsNull(i));
- } else {
+ if (valid_bytes[i % N]) {
ASSERT_FALSE(result_->IsNull(i));
result_->GetValue(i, &length);
ASSERT_EQ(pos, result_->value_offset(i));
@@ -1018,6 +1016,8 @@ TEST_F(TestStringBuilder, TestAppendVector) {
ASSERT_EQ(strings[i % N], result_->GetString(i));
pos += length;
+ } else {
+ ASSERT_TRUE(result_->IsNull(i));
}
}
}
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index ec48656..a502e1f 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1386,7 +1386,7 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
Status StringBuilder::Append(const std::vector<std::string>& values,
- uint8_t* null_bytes) {
+ const uint8_t* valid_bytes) {
std::size_t total_length = std::accumulate(
values.begin(), values.end(), 0ULL,
[](uint64_t sum, const std::string& str) { return sum + str.size(); });
@@ -1394,16 +1394,22 @@ Status StringBuilder::Append(const std::vector<std::string>& values,
RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
- for (std::size_t i = 0; i < values.size(); ++i) {
- RETURN_NOT_OK(AppendNextOffset());
- if (null_bytes[i]) {
- UnsafeAppendToBitmap(false);
- } else {
+ if (valid_bytes) {
+ for (std::size_t i = 0; i < values.size(); ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
+ if (valid_bytes[i]) {
+ RETURN_NOT_OK(value_data_builder_.Append(
+ reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
+ }
+ }
+ } else {
+ for (std::size_t i = 0; i < values.size(); ++i) {
+ RETURN_NOT_OK(AppendNextOffset());
RETURN_NOT_OK(value_data_builder_.Append(
reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
- UnsafeAppendToBitmap(true);
}
}
+ UnsafeAppendToBitmap(valid_bytes, values.size());
return Status::OK();
}
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index cdcee80..32cfdd4 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -718,7 +718,8 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
using BinaryBuilder::Append;
- Status Append(const std::vector<std::string>& values, uint8_t* null_bytes);
+ Status Append(const std::vector<std::string>& values,
+ const uint8_t* valid_bytes = NULLPTR);
};
// ----------------------------------------------------------------------
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.