You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/04 12:21:55 UTC

[arrow] branch master updated: ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 933b32b  ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append
933b32b is described below

commit 933b32b612ab0935dab6fc0f670765444f0ab510
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Wed Apr 4 14:21:00 2018 +0200

    ARROW-2388: [C++] Use valid_bytes API for StringBuilder::Append
    
    Because Append of other builders use valid_bytes not null_bytes.
    
    Author: Kouhei Sutou <ko...@clear-code.com>
    
    Closes #1833 from kou/cpp-string-array-builder-append-valid-bytes and squashes the following commits:
    
    47b2158 <Kouhei Sutou>  Fix format
    5c03f85 <Kouhei Sutou>  Use valid_bytes API for StringBuilder::Append
---
 cpp/src/arrow/array-test.cc | 10 +++++-----
 cpp/src/arrow/builder.cc    | 20 +++++++++++++-------
 cpp/src/arrow/builder.h     |  3 ++-
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 308bbcd..0b4342a 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -991,13 +991,13 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
 
 TEST_F(TestStringBuilder, TestAppendVector) {
   vector<string> strings = {"", "bb", "a", "", "ccc"};
-  vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+  vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
 
   int N = static_cast<int>(strings.size());
   int reps = 1000;
 
   for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->Append(strings, is_null.data()));
+    ASSERT_OK(builder_->Append(strings, valid_bytes.data()));
   }
   Done();
 
@@ -1008,9 +1008,7 @@ TEST_F(TestStringBuilder, TestAppendVector) {
   int32_t length;
   int32_t pos = 0;
   for (int i = 0; i < N * reps; ++i) {
-    if (is_null[i % N]) {
-      ASSERT_TRUE(result_->IsNull(i));
-    } else {
+    if (valid_bytes[i % N]) {
       ASSERT_FALSE(result_->IsNull(i));
       result_->GetValue(i, &length);
       ASSERT_EQ(pos, result_->value_offset(i));
@@ -1018,6 +1016,8 @@ TEST_F(TestStringBuilder, TestAppendVector) {
       ASSERT_EQ(strings[i % N], result_->GetString(i));
 
       pos += length;
+    } else {
+      ASSERT_TRUE(result_->IsNull(i));
     }
   }
 }
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index ec48656..a502e1f 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1386,7 +1386,7 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
 StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
 
 Status StringBuilder::Append(const std::vector<std::string>& values,
-                             uint8_t* null_bytes) {
+                             const uint8_t* valid_bytes) {
   std::size_t total_length = std::accumulate(
       values.begin(), values.end(), 0ULL,
       [](uint64_t sum, const std::string& str) { return sum + str.size(); });
@@ -1394,16 +1394,22 @@ Status StringBuilder::Append(const std::vector<std::string>& values,
   RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
   RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
 
-  for (std::size_t i = 0; i < values.size(); ++i) {
-    RETURN_NOT_OK(AppendNextOffset());
-    if (null_bytes[i]) {
-      UnsafeAppendToBitmap(false);
-    } else {
+  if (valid_bytes) {
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      RETURN_NOT_OK(AppendNextOffset());
+      if (valid_bytes[i]) {
+        RETURN_NOT_OK(value_data_builder_.Append(
+            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
+      }
+    }
+  } else {
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      RETURN_NOT_OK(AppendNextOffset());
       RETURN_NOT_OK(value_data_builder_.Append(
           reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
-      UnsafeAppendToBitmap(true);
     }
   }
+  UnsafeAppendToBitmap(valid_bytes, values.size());
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index cdcee80..32cfdd4 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -718,7 +718,8 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
 
   using BinaryBuilder::Append;
 
-  Status Append(const std::vector<std::string>& values, uint8_t* null_bytes);
+  Status Append(const std::vector<std::string>& values,
+                const uint8_t* valid_bytes = NULLPTR);
 };
 
 // ----------------------------------------------------------------------

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.