You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/17 16:13:42 UTC
[arrow] branch master updated: ARROW-3157: [C++] Add Buffer::Wrap,
MutableBuffer::Wrap convenience methods for wrapping typed memory,
std::vector
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2df2a9e ARROW-3157: [C++] Add Buffer::Wrap, MutableBuffer::Wrap convenience methods for wrapping typed memory, std::vector<T>
2df2a9e is described below
commit 2df2a9ef9db670ee9b2e6eab70bc983821a12f26
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Sep 17 12:13:28 2018 -0400
ARROW-3157: [C++] Add Buffer::Wrap, MutableBuffer::Wrap convenience methods for wrapping typed memory, std::vector<T>
Author: Wes McKinney <we...@apache.org>
Closes #2566 from wesm/ARROW-3157 and squashes the following commits:
983e6ec37 <Wes McKinney> Add Buffer::Wrap, MutableBuffer::Wrap convenience for creating a buffer referencing a typed C array or std::vector
---
cpp/apidoc/tutorials/tensor_to_py.md | 5 +----
cpp/cmake_modules/BuildUtils.cmake | 2 +-
cpp/src/arrow/array-test.cc | 14 ++++++--------
cpp/src/arrow/buffer-test.cc | 9 +++++++++
cpp/src/arrow/buffer.h | 33 ++++++++++++++++++++++++++++++++
cpp/src/arrow/compute/compute-test.cc | 9 +++------
cpp/src/arrow/ipc/ipc-json-test.cc | 5 ++---
cpp/src/arrow/ipc/ipc-read-write-test.cc | 4 ++--
cpp/src/arrow/test-util.h | 6 ------
cpp/src/parquet/CMakeLists.txt | 2 +-
cpp/src/parquet/file-deserialize-test.cc | 9 +++------
11 files changed, 61 insertions(+), 37 deletions(-)
diff --git a/cpp/apidoc/tutorials/tensor_to_py.md b/cpp/apidoc/tutorials/tensor_to_py.md
index e7a7416..0be973a 100644
--- a/cpp/apidoc/tutorials/tensor_to_py.md
+++ b/cpp/apidoc/tutorials/tensor_to_py.md
@@ -60,12 +60,9 @@ for (int64_t i = 0; i < input_length; ++i) {
input[i] = 2.0;
}
-// Cast float array to bytes array
-const uint8_t* bytes_array = reinterpret_cast<const uint8_t*>(input.data());
-
// Create Arrow Tensor Object, no copy made!
// {input_length} is the shape of the tensor
-auto value_buffer = std::make_shared<Buffer>(bytes_array, sizeof(float) * input_length);
+auto value_buffer = Buffer::Wrap<float>(input);
Tensor t(float32(), value_buffer, {input_length});
```
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 257c759..af814e4 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -368,7 +368,7 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
endif()
if (ARG_LABELS)
- set(ARG_LABELS "unittest;${ARG_LABELS}")
+ set(ARG_LABELS "${ARG_LABELS}")
else()
set(ARG_LABELS unittest)
endif()
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 5f002a5..bec5a99 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -313,9 +313,7 @@ class TestPrimitiveBuilder : public TestBuilder {
void Check(const std::unique_ptr<BuilderType>& builder, bool nullable) {
int64_t size = builder->length();
-
- auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
- size * sizeof(T));
+ auto ex_data = Buffer::Wrap(draws_.data(), size);
std::shared_ptr<Buffer> ex_null_bitmap;
int64_t ex_null_count = 0;
@@ -1006,8 +1004,8 @@ class TestStringArray : public ::testing::Test {
void MakeArray() {
length_ = static_cast<int64_t>(offsets_.size()) - 1;
- value_buf_ = GetBufferFromVector(chars_);
- offsets_buf_ = GetBufferFromVector(offsets_);
+ value_buf_ = Buffer::Wrap(chars_);
+ offsets_buf_ = Buffer::Wrap(offsets_);
ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
null_count_ = CountNulls(valid_bytes_);
@@ -1071,7 +1069,7 @@ TEST_F(TestStringArray, TestGetString) {
TEST_F(TestStringArray, TestEmptyStringComparison) {
offsets_ = {0, 0, 0, 0, 0, 0};
- offsets_buf_ = GetBufferFromVector(offsets_);
+ offsets_buf_ = Buffer::Wrap(offsets_);
length_ = static_cast<int64_t>(offsets_.size() - 1);
auto strings_a = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
@@ -1318,8 +1316,8 @@ class TestBinaryArray : public ::testing::Test {
void MakeArray() {
length_ = static_cast<int64_t>(offsets_.size() - 1);
- value_buf_ = GetBufferFromVector(chars_);
- offsets_buf_ = GetBufferFromVector(offsets_);
+ value_buf_ = Buffer::Wrap(chars_);
+ offsets_buf_ = Buffer::Wrap(offsets_);
ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
null_count_ = CountNulls(valid_bytes_);
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index 368a9cb..b664250 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -132,6 +132,15 @@ TEST(TestBuffer, SliceBuffer) {
ASSERT_EQ(2, buf.use_count());
}
+TEST(TestMutableBuffer, Wrap) {
+ std::vector<int32_t> values = {1, 2, 3};
+
+ auto buf = MutableBuffer::Wrap(values.data(), values.size());
+ reinterpret_cast<int32_t*>(buf->mutable_data())[1] = 4;
+
+ ASSERT_EQ(4, values[1]);
+}
+
TEST(TestBuffer, SliceMutableBuffer) {
std::string data_str = "some data to slice";
auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 99beb23..8f170c9 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -24,6 +24,7 @@
#include <memory>
#include <string>
#include <type_traits>
+#include <vector>
#include "arrow/memory_pool.h"
#include "arrow/status.h"
@@ -123,6 +124,28 @@ class ARROW_EXPORT Buffer {
/// using the default memory pool
static Status FromString(const std::string& data, std::shared_ptr<Buffer>* out);
+ /// \brief Create buffer referencing typed memory with some length without
+ /// copying
+ /// \param[in] data the typed memory as C array
+ /// \param[in] length the number of values in the array
+ /// \return a new shared_ptr<Buffer>
+ template <typename T, typename SizeType = int64_t>
+ static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
+ return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
+ static_cast<int64_t>(sizeof(T) * length));
+ }
+
+ /// \brief Create buffer referencing std::vector with some length without
+ /// copying
+ /// \param[in] data the vector to be referenced. If this vector is changed,
+ /// the buffer may become invalid
+ /// \return a new shared_ptr<Buffer>
+ template <typename T>
+ static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
+ return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
+ static_cast<int64_t>(sizeof(T) * data.size()));
+ } // namespace arrow
+
int64_t capacity() const { return capacity_; }
const uint8_t* data() const { return data_; }
@@ -185,6 +208,16 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
const int64_t size);
+ /// \brief Create buffer referencing typed memory with some length
+ /// \param[in] data the typed memory as C array
+ /// \param[in] length the number of values in the array
+ /// \return a new shared_ptr<Buffer>
+ template <typename T, typename SizeType = int64_t>
+ static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
+ return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
+ static_cast<int64_t>(sizeof(T) * length));
+ }
+
protected:
MutableBuffer() : Buffer(NULLPTR, 0) {}
};
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 233f8a6..056fb03 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -198,8 +198,7 @@ TEST_F(TestCast, OverflowInNullSlot) {
shared_ptr<Array> expected;
ArrayFromVector<Int16Type, int16_t>(int16(), is_valid, e11, &expected);
- auto buf = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(v11.data()),
- static_cast<int64_t>(v11.size()));
+ auto buf = Buffer::Wrap(v11.data(), v11.size());
Int32Array tmp11(5, buf, expected->null_bitmap(), -1);
CheckPass(tmp11, *expected, int16(), options);
@@ -972,10 +971,8 @@ TEST_F(TestCast, DictToNonDictNoNulls) {
// Explicitly construct with nullptr for the null_bitmap_data
std::vector<int32_t> i1 = {1, 0, 1};
std::vector<int32_t> i2 = {2, 1, 0, 1};
- auto c1 = std::make_shared<NumericArray<Int32Type>>(
- 3, arrow::GetBufferFromVector<int32_t>(i1));
- auto c2 = std::make_shared<NumericArray<Int32Type>>(
- 4, arrow::GetBufferFromVector<int32_t>(i2));
+ auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
+ auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
std::make_shared<DictionaryArray>(dict_type, c2)};
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc
index 549a93c..26d396d 100644
--- a/cpp/src/arrow/ipc/ipc-json-test.cc
+++ b/cpp/src/arrow/ipc/ipc-json-test.cc
@@ -184,7 +184,7 @@ TEST(TestJsonArrayWriter, NestedTypes) {
std::shared_ptr<Buffer> list_bitmap;
ASSERT_OK(GetBitmapFromVector(list_is_valid, &list_bitmap));
- std::shared_ptr<Buffer> offsets_buffer = GetBufferFromVector(offsets);
+ std::shared_ptr<Buffer> offsets_buffer = Buffer::Wrap(offsets);
ListArray list_array(list(value_type), 5, offsets_buffer, values_array, list_bitmap, 1);
@@ -345,8 +345,7 @@ TEST(TestJsonFileReadWrite, MinimalFormatExample) {
}
)example";
- auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(example),
- strlen(example));
+ auto buffer = Buffer::Wrap(example, strlen(example));
std::unique_ptr<JsonReader> reader;
ASSERT_OK(JsonReader::Open(buffer, &reader));
diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc
index 37127cb..3154d57 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-test.cc
+++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc
@@ -749,7 +749,7 @@ TEST_F(TestTensorRoundTrip, BasicRoundtrip) {
std::vector<int64_t> values;
randint(size, 0, 100, &values);
- auto data = GetBufferFromVector(values);
+ auto data = Buffer::Wrap(values);
Tensor t0(int64(), data, shape, strides, dim_names);
Tensor tzero(int64(), data, {}, {}, {});
@@ -770,7 +770,7 @@ TEST_F(TestTensorRoundTrip, NonContiguous) {
std::vector<int64_t> values;
randint(24, 0, 100, &values);
- auto data = GetBufferFromVector(values);
+ auto data = Buffer::Wrap(values);
Tensor tensor(int64(), data, {4, 3}, {48, 16});
CheckTensorRoundTrip(tensor);
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index dfc1ab5..1a50a07 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -116,12 +116,6 @@ void random_real(int64_t n, uint32_t seed, T min_value, T max_value,
}
template <typename T>
-std::shared_ptr<Buffer> GetBufferFromVector(const std::vector<T>& values) {
- return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values.data()),
- values.size() * sizeof(T));
-}
-
-template <typename T>
inline Status CopyBufferFromVector(const std::vector<T>& values, MemoryPool* pool,
std::shared_ptr<Buffer>* result) {
int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index f731179..8a37dc7 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -37,7 +37,7 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
ADD_ARROW_TEST(${REL_TEST_NAME}
STATIC_LINK_LIBS ${PARQUET_TEST_LINK_LIBS}
PREFIX "parquet"
- LABELS "parquet")
+ LABELS "unittest;parquet")
endfunction()
# ----------------------------------------------------------------------
diff --git a/cpp/src/parquet/file-deserialize-test.cc b/cpp/src/parquet/file-deserialize-test.cc
index fb95534..b766eed 100644
--- a/cpp/src/parquet/file-deserialize-test.cc
+++ b/cpp/src/parquet/file-deserialize-test.cc
@@ -262,22 +262,19 @@ class TestParquetFileReader : public ::testing::Test {
TEST_F(TestParquetFileReader, InvalidHeader) {
const char* bad_header = "PAR2";
- auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_header),
- strlen(bad_header));
+ auto buffer = Buffer::Wrap(bad_header, strlen(bad_header));
ASSERT_NO_FATAL_FAILURE(AssertInvalidFileThrows(buffer));
}
TEST_F(TestParquetFileReader, InvalidFooter) {
// File is smaller than FOOTER_SIZE
const char* bad_file = "PAR1PAR";
- auto buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file),
- strlen(bad_file));
+ auto buffer = Buffer::Wrap(bad_file, strlen(bad_file));
ASSERT_NO_FATAL_FAILURE(AssertInvalidFileThrows(buffer));
// Magic number incorrect
const char* bad_file2 = "PAR1PAR2";
- buffer = std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(bad_file2),
- strlen(bad_file2));
+ buffer = Buffer::Wrap(bad_file2, strlen(bad_file2));
ASSERT_NO_FATAL_FAILURE(AssertInvalidFileThrows(buffer));
}