You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/03 08:58:53 UTC

[arrow] branch master updated: ARROW-2351 [C++] StringBuilder::append(vector...) not impleme…

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 65d2558  ARROW-2351 [C++] StringBuilder::append(vector<string>...) not impleme…
65d2558 is described below

commit 65d25584b0a63d03bc9b3a476abc74abc3e33707
Author: Lizhou Gao <li...@zilliz.com>
AuthorDate: Tue Apr 3 10:58:46 2018 +0200

    ARROW-2351 [C++] StringBuilder::append(vector<string>...) not impleme…
    
    changed the API from`Status Append(const std::vector<std::string>& values, uint8_t* null_bytes);` to  `Status Append(const std::vector<std::string>& values);` IMO, if string is empty, then it should be null, and vice versa.
    
    **[update]** change the API back to original.
    
    Author: Lizhou Gao <li...@zilliz.com>
    
    Closes #1803 from gaolizhou/ARROW-2351 and squashes the following commits:
    
    608e369 <Lizhou Gao> ARROW-2351: Fix CI warning
    9da7a39 <Lizhou Gao> ARROW-2351: add null_bytes back & format the code
    dd3937b <Lizhou Gao> ARROW-2351: Fix whitespace/blank_line
    397efab <Lizhou Gao> ARROW-2351:  StringBuilder::append(vector<string>...) not implemented
    85cd078 <Lizhou Gao> ARROW-2351  StringBuilder::append(vector<string>...) not implemented
---
 cpp/src/arrow/array-test.cc | 33 +++++++++++++++++++++++++++++++++
 cpp/src/arrow/builder.cc    | 24 +++++++++++++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 2aa73a0..308bbcd 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -989,6 +989,39 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
   }
 }
 
+TEST_F(TestStringBuilder, TestAppendVector) {
+  vector<string> strings = {"", "bb", "a", "", "ccc"};
+  vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+  int N = static_cast<int>(strings.size());
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    ASSERT_OK(builder_->Append(strings, is_null.data()));
+  }
+  Done();
+
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps, result_->null_count());
+  ASSERT_EQ(reps * 6, result_->value_data()->size());
+
+  int32_t length;
+  int32_t pos = 0;
+  for (int i = 0; i < N * reps; ++i) {
+    if (is_null[i % N]) {
+      ASSERT_TRUE(result_->IsNull(i));
+    } else {
+      ASSERT_FALSE(result_->IsNull(i));
+      result_->GetValue(i, &length);
+      ASSERT_EQ(pos, result_->value_offset(i));
+      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
+      ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+      pos += length;
+    }
+  }
+}
+
 TEST_F(TestStringBuilder, TestZeroLength) {
   // All buffers are null
   Done();
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index aa9f3ce..ec48656 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -16,11 +16,11 @@
 // under the License.
 
 #include "arrow/builder.h"
-
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <limits>
+#include <numeric>
 #include <sstream>
 #include <utility>
 #include <vector>
@@ -1385,6 +1385,28 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
 
 StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
 
+Status StringBuilder::Append(const std::vector<std::string>& values,
+                             uint8_t* null_bytes) {
+  std::size_t total_length = std::accumulate(
+      values.begin(), values.end(), 0ULL,
+      [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+  RETURN_NOT_OK(Reserve(values.size()));
+  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+  RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+  for (std::size_t i = 0; i < values.size(); ++i) {
+    RETURN_NOT_OK(AppendNextOffset());
+    if (null_bytes[i]) {
+      UnsafeAppendToBitmap(false);
+    } else {
+      RETURN_NOT_OK(value_data_builder_.Append(
+          reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size()));
+      UnsafeAppendToBitmap(true);
+    }
+  }
+  return Status::OK();
+}
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.