You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/02 16:07:02 UTC
arrow git commit: ARROW-737: [C++] Enable mutable buffer slices,
SliceMutableBuffer function
Repository: arrow
Updated Branches:
refs/heads/master e333576a0 -> d54ab9a23
ARROW-737: [C++] Enable mutable buffer slices, SliceMutableBuffer function
This patch also results in better microperformance on my laptop.
```
Run on (4 X 1200 MHz CPU s)
2017-04-01 22:35:30
Benchmark Time CPU Iterations
----------------------------------------------------------------------------------------
BM_WriteRecordBatch/1/min_time:1.000/real_time 53617 ns 53423 ns 26903 18.2136GB/s
BM_WriteRecordBatch/4/min_time:1.000/real_time 48118 ns 47999 ns 26823 20.295GB/s
BM_WriteRecordBatch/16/min_time:1.000/real_time 46679 ns 46562 ns 30409 20.9209GB/s
BM_WriteRecordBatch/64/min_time:1.000/real_time 50583 ns 50344 ns 27308 19.3061GB/s
BM_WriteRecordBatch/256/min_time:1.000/real_time 93474 ns 93259 ns 11720 10.4474GB/s
BM_WriteRecordBatch/1024/min_time:1.000/real_time 254056 ns 253461 ns 5434 3.84389GB/s
BM_WriteRecordBatch/4k/min_time:1.000/real_time 892292 ns 888924 ns 1210 1120.71MB/s
BM_WriteRecordBatch/8k/min_time:1.000/real_time 1799323 ns 1795023 ns 754 555.764MB/s
BM_ReadRecordBatch/1/min_time:1.000/real_time 2501 ns 2452 ns 586633 390.521GB/s
BM_ReadRecordBatch/4/min_time:1.000/real_time 6921 ns 5577 ns 227670 141.095GB/s
BM_ReadRecordBatch/16/min_time:1.000/real_time 15561 ns 14505 ns 67493 62.758GB/s
BM_ReadRecordBatch/64/min_time:1.000/real_time 48583 ns 48242 ns 30070 20.1008GB/s
BM_ReadRecordBatch/256/min_time:1.000/real_time 184637 ns 183306 ns 6660 5.2891GB/s
BM_ReadRecordBatch/1024/min_time:1.000/real_time 734128 ns 729692 ns 1905 1.33024GB/s
BM_ReadRecordBatch/4k/min_time:1.000/real_time 3027028 ns 3008020 ns 445 330.357MB/s
BM_ReadRecordBatch/8k/min_time:1.000/real_time 6472022 ns 6426801 ns 211 154.511MB/s
```
before
```
Run on (4 X 1200 MHz CPU s)
2017-04-01 22:37:36
Benchmark Time CPU Iterations
----------------------------------------------------------------------------------------
BM_WriteRecordBatch/1/min_time:1.000/real_time 59317 ns 59202 ns 22727 16.4633GB/s
BM_WriteRecordBatch/4/min_time:1.000/real_time 56322 ns 56191 ns 24944 17.3389GB/s
BM_WriteRecordBatch/16/min_time:1.000/real_time 52027 ns 51880 ns 26682 18.7703GB/s
BM_WriteRecordBatch/64/min_time:1.000/real_time 56324 ns 56202 ns 24724 17.3384GB/s
BM_WriteRecordBatch/256/min_time:1.000/real_time 108096 ns 107868 ns 11284 9.03423GB/s
BM_WriteRecordBatch/1024/min_time:1.000/real_time 279508 ns 278730 ns 4957 3.49386GB/s
BM_WriteRecordBatch/4k/min_time:1.000/real_time 1013229 ns 1009772 ns 1191 986.944MB/s
BM_WriteRecordBatch/8k/min_time:1.000/real_time 2011309 ns 2005377 ns 661 497.189MB/s
BM_ReadRecordBatch/1/min_time:1.000/real_time 2507 ns 2501 ns 558949 389.563GB/s
BM_ReadRecordBatch/4/min_time:1.000/real_time 5120 ns 5110 ns 275798 190.735GB/s
BM_ReadRecordBatch/16/min_time:1.000/real_time 15800 ns 15706 ns 85481 61.8072GB/s
BM_ReadRecordBatch/64/min_time:1.000/real_time 55678 ns 55476 ns 25022 17.5393GB/s
BM_ReadRecordBatch/256/min_time:1.000/real_time 218083 ns 217596 ns 6163 4.47794GB/s
BM_ReadRecordBatch/1024/min_time:1.000/real_time 875861 ns 873419 ns 1591 1.11497GB/s
BM_ReadRecordBatch/4k/min_time:1.000/real_time 3545586 ns 3538141 ns 383 282.041MB/s
BM_ReadRecordBatch/8k/min_time:1.000/real_time 7118830 ns 7104433 ns 194 140.473MB/s
```
Author: Wes McKinney <we...@twosigma.com>
Closes #479 from wesm/ARROW-737 and squashes the following commits:
b663ca4 [Wes McKinney] Enable mutable buffer slices, SliceMutableBuffer function
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d54ab9a2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d54ab9a2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d54ab9a2
Branch: refs/heads/master
Commit: d54ab9a23aa8d4fe52ce91b117511540cc7491bb
Parents: e333576
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Apr 2 12:06:56 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Apr 2 12:06:56 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/buffer-test.cc | 17 +++++++++++++++++
cpp/src/arrow/buffer.cc | 17 ++++++++++++-----
cpp/src/arrow/buffer.h | 25 +++++++++++++++++--------
3 files changed, 46 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index e0a2137..5815ed1 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -168,4 +168,21 @@ TEST_F(TestBuffer, SliceBuffer) {
ASSERT_EQ(2, buf.use_count());
}
+TEST_F(TestBuffer, SliceMutableBuffer) {
+ std::string data_str = "some data to slice";
+ auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
+
+ std::shared_ptr<MutableBuffer> buffer;
+ ASSERT_OK(AllocateBuffer(default_memory_pool(), 50, &buffer));
+
+ memcpy(buffer->mutable_data(), data, data_str.size());
+
+ std::shared_ptr<Buffer> slice = SliceMutableBuffer(buffer, 5, 10);
+ ASSERT_TRUE(slice->is_mutable());
+ ASSERT_EQ(10, slice->size());
+
+ Buffer expected(data + 5, 10);
+ ASSERT_TRUE(slice->Equals(expected));
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 5962340..fb63798 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -27,11 +27,6 @@
namespace arrow {
-Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size)
- : Buffer(parent->data() + offset, size) {
- parent_ = parent;
-}
-
Buffer::~Buffer() {}
Status Buffer::Copy(
@@ -116,6 +111,18 @@ Status PoolBuffer::Resize(int64_t new_size, bool shrink_to_fit) {
return Status::OK();
}
+std::shared_ptr<Buffer> SliceMutableBuffer(
+ const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
+ return std::make_shared<MutableBuffer>(buffer, offset, length);
+}
+
+MutableBuffer::MutableBuffer(
+ const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size)
+ : MutableBuffer(parent->mutable_data() + offset, size) {
+ DCHECK(parent->is_mutable()) << "Must pass mutable buffer";
+ parent_ = parent;
+}
+
Status AllocateBuffer(
MemoryPool* pool, int64_t size, std::shared_ptr<MutableBuffer>* out) {
auto buffer = std::make_shared<PoolBuffer>(pool);
http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 713d57a..3f14c96 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -46,7 +46,7 @@ class Status;
class ARROW_EXPORT Buffer {
public:
Buffer(const uint8_t* data, int64_t size)
- : is_mutable_(false), data_(data), size_(size), capacity_(size) {}
+ : is_mutable_(false), data_(data), size_(size), capacity_(size) {}
virtual ~Buffer();
/// An offset into data that is owned by another buffer, but we want to be
@@ -56,7 +56,10 @@ class ARROW_EXPORT Buffer {
/// This method makes no assertions about alignment or padding of the buffer but
/// in general we expected buffers to be aligned and padded to 64 bytes. In the future
/// we might add utility methods to help determine if a buffer satisfies this contract.
- Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size);
+ Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size)
+ : Buffer(parent->data() + offset, size) {
+ parent_ = parent;
+ }
bool is_mutable() const { return is_mutable_; }
@@ -74,6 +77,7 @@ class ARROW_EXPORT Buffer {
int64_t capacity() const { return capacity_; }
const uint8_t* data() const { return data_; }
+ uint8_t* mutable_data() { return mutable_data_; }
int64_t size() const { return size_; }
@@ -82,6 +86,7 @@ class ARROW_EXPORT Buffer {
protected:
bool is_mutable_;
const uint8_t* data_;
+ uint8_t* mutable_data_;
int64_t size_;
int64_t capacity_;
@@ -99,20 +104,24 @@ static inline std::shared_ptr<Buffer> SliceBuffer(
return std::make_shared<Buffer>(buffer, offset, length);
}
+/// Construct a mutable buffer slice. If the parent buffer is not mutable, this
+/// will abort in debug builds
+std::shared_ptr<Buffer> ARROW_EXPORT SliceMutableBuffer(
+ const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+
/// A Buffer whose contents can be mutated. May or may not own its data.
class ARROW_EXPORT MutableBuffer : public Buffer {
public:
- MutableBuffer(uint8_t* data, int64_t size) : Buffer(data, size) {
- is_mutable_ = true;
+ MutableBuffer(uint8_t* data, int64_t size)
+ : Buffer(data, size) {
mutable_data_ = data;
+ is_mutable_ = true;
}
- uint8_t* mutable_data() { return mutable_data_; }
+ MutableBuffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size);
protected:
- MutableBuffer() : Buffer(nullptr, 0), mutable_data_(nullptr) {}
-
- uint8_t* mutable_data_;
+ MutableBuffer() : Buffer(nullptr, 0) {}
};
class ARROW_EXPORT ResizableBuffer : public MutableBuffer {