You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/17 16:13:42 UTC

[arrow] branch master updated: ARROW-3157: [C++] Add Buffer::Wrap, MutableBuffer::Wrap convenience methods for wrapping typed memory, std::vector

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2df2a9e  ARROW-3157: [C++] Add Buffer::Wrap, MutableBuffer::Wrap convenience methods for wrapping typed memory, std::vector<T>
2df2a9e is described below

commit 2df2a9ef9db670ee9b2e6eab70bc983821a12f26
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Sep 17 12:13:28 2018 -0400

    ARROW-3157: [C++] Add Buffer::Wrap, MutableBuffer::Wrap convenience methods for wrapping typed memory, std::vector<T>
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #2566 from wesm/ARROW-3157 and squashes the following commits:
    
    983e6ec37 <Wes McKinney> Add Buffer::Wrap, MutableBuffer::Wrap convenience for creating a buffer referencing a typed C array or std::vector
---
 cpp/apidoc/tutorials/tensor_to_py.md     |  5 +----
 cpp/cmake_modules/BuildUtils.cmake       |  2 +-
 cpp/src/arrow/array-test.cc              | 14 ++++++--------
 cpp/src/arrow/buffer-test.cc             |  9 +++++++++
 cpp/src/arrow/buffer.h                   | 33 ++++++++++++++++++++++++++++++++
 cpp/src/arrow/compute/compute-test.cc    |  9 +++------
 cpp/src/arrow/ipc/ipc-json-test.cc       |  5 ++---
 cpp/src/arrow/ipc/ipc-read-write-test.cc |  4 ++--
 cpp/src/arrow/test-util.h                |  6 ------
 cpp/src/parquet/CMakeLists.txt           |  2 +-
 cpp/src/parquet/file-deserialize-test.cc |  9 +++------
 11 files changed, 61 insertions(+), 37 deletions(-)

diff --git a/cpp/apidoc/tutorials/tensor_to_py.md b/cpp/apidoc/tutorials/tensor_to_py.md
index e7a7416..0be973a 100644
--- a/cpp/apidoc/tutorials/tensor_to_py.md
+++ b/cpp/apidoc/tutorials/tensor_to_py.md
@@ -60,12 +60,9 @@ for (int64_t i = 0; i < input_length; ++i) {
   input[i] = 2.0;
 }
 
-// Cast float array to bytes array
-const uint8_t* bytes_array = reinterpret_cast<const uint8_t*>(input.data());
-
 // Create Arrow Tensor Object, no copy made!
 // {input_length} is the shape of the tensor
-auto value_buffer = std::make_shared<Buffer>(bytes_array, sizeof(float) * input_length);
+auto value_buffer = Buffer::Wrap<float>(input);
 Tensor t(float32(), value_buffer, {input_length});
 ```
 
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 257c759..af814e4 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -368,7 +368,7 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
   endif()
 
   if (ARG_LABELS)
-    set(ARG_LABELS "unittest;${ARG_LABELS}")
+    set(ARG_LABELS "${ARG_LABELS}")
   else()
     set(ARG_LABELS unittest)
   endif()
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 5f002a5..bec5a99 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -313,9 +313,7 @@ class TestPrimitiveBuilder : public TestBuilder {
 
   void Check(const std::unique_ptr<BuilderType>& builder, bool nullable) {
     int64_t size = builder->length();
-
-    auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
-                                            size * sizeof(T));
+    auto ex_data = Buffer::Wrap(draws_.data(), size);
 
     std::shared_ptr<Buffer> ex_null_bitmap;
     int64_t ex_null_count = 0;
@@ -1006,8 +1004,8 @@ class TestStringArray : public ::testing::Test {
 
   void MakeArray() {
     length_ = static_cast<int64_t>(offsets_.size()) - 1;
-    value_buf_ = GetBufferFromVector(chars_);
-    offsets_buf_ = GetBufferFromVector(offsets_);
+    value_buf_ = Buffer::Wrap(chars_);
+    offsets_buf_ = Buffer::Wrap(offsets_);
     ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
     null_count_ = CountNulls(valid_bytes_);
 
@@ -1071,7 +1069,7 @@ TEST_F(TestStringArray, TestGetString) {
 
 TEST_F(TestStringArray, TestEmptyStringComparison) {
   offsets_ = {0, 0, 0, 0, 0, 0};
-  offsets_buf_ = GetBufferFromVector(offsets_);
+  offsets_buf_ = Buffer::Wrap(offsets_);
   length_ = static_cast<int64_t>(offsets_.size() - 1);
 
   auto strings_a = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
@@ -1318,8 +1316,8 @@ class TestBinaryArray : public ::testing::Test {
 
   void MakeArray() {
     length_ = static_cast<int64_t>(offsets_.size() - 1);
-    value_buf_ = GetBufferFromVector(chars_);
-    offsets_buf_ = GetBufferFromVector(offsets_);
+    value_buf_ = Buffer::Wrap(chars_);
+    offsets_buf_ = Buffer::Wrap(offsets_);
 
     ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
     null_count_ = CountNulls(valid_bytes_);
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index 368a9cb..b664250 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -132,6 +132,15 @@ TEST(TestBuffer, SliceBuffer) {
   ASSERT_EQ(2, buf.use_count());
 }
 
+TEST(TestMutableBuffer, Wrap) {
+  std::vector<int32_t> values = {1, 2, 3};
+
+  auto buf = MutableBuffer::Wrap(values.data(), values.size());
+  reinterpret_cast<int32_t*>(buf->mutable_data())[1] = 4;
+
+  ASSERT_EQ(4, values[1]);
+}
+
 TEST(TestBuffer, SliceMutableBuffer) {
   std::string data_str = "some data to slice";
   auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 99beb23..8f170c9 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -24,6 +24,7 @@
 #include <memory>
 #include <string>
 #include <type_traits>
+#include <vector>
 
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
@@ -123,6 +124,28 @@ class ARROW_EXPORT Buffer {
   /// using the default memory pool
   static Status FromString(const std::string& data, std::shared_ptr<Buffer>* out);
 
+  /// \brief Create buffer referencing typed memory with some length without
+  /// copying
+  /// \param[in] data the typed memory as C array
+  /// \param[in] length the number of values in the array
+  /// \return a new shared_ptr<Buffer>
+  template <typename T, typename SizeType = int64_t>
+  static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
+    return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
+                                    static_cast<int64_t>(sizeof(T) * length));
+  }
+
+  /// \brief Create buffer referencing std::vector with some length without
+  /// copying
+  /// \param[in] data the vector to be referenced. If this vector is changed,
+  /// the buffer may become invalid
+  /// \return a new shared_ptr<Buffer>
+  template <typename T>
+  static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
+    return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
+                                    static_cast<int64_t>(sizeof(T) * data.size()));
+  }  // namespace arrow
+
   int64_t capacity() const { return capacity_; }
   const uint8_t* data() const { return data_; }
 
@@ -185,6 +208,16 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
   MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
                 const int64_t size);
 
+  /// \brief Create buffer referencing typed memory with some length
+  /// \param[in] data the typed memory as C array
+  /// \param[in] length the number of values in the array
+  /// \return a new shared_ptr<Buffer>
+  template <typename T, typename SizeType = int64_t>
+  static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
+    return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
+                                           static_cast<int64_t>(sizeof(T) * length));
+  }
+
  protected:
   MutableBuffer() : Buffer(NULLPTR, 0) {}
 };
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 233f8a6..056fb03 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -198,8 +198,7 @@ TEST_F(TestCast, OverflowInNullSlot) {
   shared_ptr<Array> expected;
   ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
 
-  auto buf = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(v11.data()),
-                                      static_cast<int64_t>(v11.size()));
+  auto buf = Buffer::Wrap(v11.data(), v11.size());
   Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
 
   CheckPass(tmp11, *expected, int16(), options);
@@ -972,10 +971,8 @@ TEST_F(TestCast, DictToNonDictNoNulls) {
   // Explicitly construct with nullptr for the null_bitmap_data
   std::vector<int32_t> i1 = {1, 0, 1};
   std::vector<int32_t> i2 = {2, 1, 0, 1};
-  auto c1 = std::make_shared<NumericArray<Int32Type>>(
-      3, arrow::GetBufferFromVector<int32_t>(i1));
-  auto c2 = std::make_shared<NumericArray<Int32Type>>(
-      4, arrow::GetBufferFromVector<int32_t>(i2));
+  auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
+  auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
 
   ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
                              std::make_shared<DictionaryArray>(dict_type, c2)};
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc
index 549a93c..26d396d 100644
--- a/cpp/src/arrow/ipc/ipc-json-test.cc
+++ b/cpp/src/arrow/ipc/ipc-json-test.cc
@@ -184,7 +184,7 @@ TEST(TestJsonArrayWriter, NestedTypes) {
 
   std::shared_ptr<Buffer> list_bitmap;
   ASSERT_OK(GetBitmapFromVector(list_is_valid, &list_bitmap));
-  std::shared_ptr<Buffer> offsets_buffer = GetBufferFromVector(offsets);
+  std::shared_ptr<Buffer> offsets_buffer = Buffer::Wrap(offsets);
 
   ListArray list_array(list(value_type), 5, offsets_buffer, values_array, list_bitmap, 1);
 
@@ -345,8 +345,7 @@ TEST(TestJsonFileReadWrite, MinimalFormatExample) {
 }
 )example";
 
-  auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(example),
-                                         strlen(example));
+  auto buffer = Buffer::Wrap(example, strlen(example));
 
   std::unique_ptr<JsonReader> reader;
   ASSERT_OK(JsonReader::Open(buffer, &reader));
diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc
index 37127cb..3154d57 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-test.cc
+++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc
@@ -749,7 +749,7 @@ TEST_F(TestTensorRoundTrip, BasicRoundtrip) {
   std::vector<int64_t> values;
   randint(size, 0, 100, &values);
 
-  auto data = GetBufferFromVector(values);
+  auto data = Buffer::Wrap(values);
 
   Tensor t0(int64(), data, shape, strides, dim_names);
   Tensor tzero(int64(), data, {}, {}, {});
@@ -770,7 +770,7 @@ TEST_F(TestTensorRoundTrip, NonContiguous) {
   std::vector<int64_t> values;
   randint(24, 0, 100, &values);
 
-  auto data = GetBufferFromVector(values);
+  auto data = Buffer::Wrap(values);
   Tensor tensor(int64(), data, {4, 3}, {48, 16});
 
   CheckTensorRoundTrip(tensor);
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index dfc1ab5..1a50a07 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -116,12 +116,6 @@ void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
 }
 
 template <typename T>
-std::shared_ptr<Buffer> GetBufferFromVector(const std::vector<T>& values) {
-  return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values.data()),
-                                  values.size() * sizeof(T));
-}
-
-template <typename T>
 inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
                                    std::shared_ptr<Buffer>* result) {
   int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index f731179..8a37dc7 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -37,7 +37,7 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
   ADD_ARROW_TEST(${REL_TEST_NAME}
     STATIC_LINK_LIBS ${PARQUET_TEST_LINK_LIBS}
     PREFIX "parquet"
-    LABELS "parquet")
+    LABELS "unittest;parquet")
 endfunction()
 
 # ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/file-deserialize-test.cc b/cpp/src/parquet/file-deserialize-test.cc
index fb95534..b766eed 100644
--- a/cpp/src/parquet/file-deserialize-test.cc
+++ b/cpp/src/parquet/file-deserialize-test.cc
@@ -262,22 +262,19 @@ class TestParquetFileReader : public ::testing::Test {
 TEST_F(TestParquetFileReader, InvalidHeader) {
   const char* bad_header = "PAR2";
 
-  auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_header),
-                                         strlen(bad_header));
+  auto buffer = Buffer::Wrap(bad_header, strlen(bad_header));
   ASSERT_NO_FATAL_FAILURE(AssertInvalidFileThrows(buffer));
 }
 
 TEST_F(TestParquetFileReader, InvalidFooter) {
   // File is smaller than FOOTER_SIZE
   const char* bad_file = "PAR1PAR";
-  auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file),
-                                         strlen(bad_file));
+  auto buffer = Buffer::Wrap(bad_file, strlen(bad_file));
   ASSERT_NO_FATAL_FAILURE(AssertInvalidFileThrows(buffer));
 
   // Magic number incorrect
   const char* bad_file2 = "PAR1PAR2";
-  buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file2),
-                                    strlen(bad_file2));
+  buffer = Buffer::Wrap(bad_file2, strlen(bad_file2));
   ASSERT_NO_FATAL_FAILURE(AssertInvalidFileThrows(buffer));
 }