You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/02 16:07:02 UTC

arrow git commit: ARROW-737: [C++] Enable mutable buffer slices, SliceMutableBuffer function

Repository: arrow
Updated Branches:
  refs/heads/master e333576a0 -> d54ab9a23


ARROW-737: [C++] Enable mutable buffer slices, SliceMutableBuffer function

This patch also results in better microperformance on my laptop.

```
Run on (4 X 1200 MHz CPU s)
2017-04-01 22:35:30
Benchmark                                                  Time           CPU Iterations
----------------------------------------------------------------------------------------
BM_WriteRecordBatch/1/min_time:1.000/real_time         53617 ns      53423 ns      26903   18.2136GB/s
BM_WriteRecordBatch/4/min_time:1.000/real_time         48118 ns      47999 ns      26823    20.295GB/s
BM_WriteRecordBatch/16/min_time:1.000/real_time        46679 ns      46562 ns      30409   20.9209GB/s
BM_WriteRecordBatch/64/min_time:1.000/real_time        50583 ns      50344 ns      27308   19.3061GB/s
BM_WriteRecordBatch/256/min_time:1.000/real_time       93474 ns      93259 ns      11720   10.4474GB/s
BM_WriteRecordBatch/1024/min_time:1.000/real_time     254056 ns     253461 ns       5434   3.84389GB/s
BM_WriteRecordBatch/4k/min_time:1.000/real_time       892292 ns     888924 ns       1210   1120.71MB/s
BM_WriteRecordBatch/8k/min_time:1.000/real_time      1799323 ns    1795023 ns        754   555.764MB/s
BM_ReadRecordBatch/1/min_time:1.000/real_time           2501 ns       2452 ns     586633   390.521GB/s
BM_ReadRecordBatch/4/min_time:1.000/real_time           6921 ns       5577 ns     227670   141.095GB/s
BM_ReadRecordBatch/16/min_time:1.000/real_time         15561 ns      14505 ns      67493    62.758GB/s
BM_ReadRecordBatch/64/min_time:1.000/real_time         48583 ns      48242 ns      30070   20.1008GB/s
BM_ReadRecordBatch/256/min_time:1.000/real_time       184637 ns     183306 ns       6660    5.2891GB/s
BM_ReadRecordBatch/1024/min_time:1.000/real_time      734128 ns     729692 ns       1905   1.33024GB/s
BM_ReadRecordBatch/4k/min_time:1.000/real_time       3027028 ns    3008020 ns        445   330.357MB/s
BM_ReadRecordBatch/8k/min_time:1.000/real_time       6472022 ns    6426801 ns        211   154.511MB/s
```

before

```
Run on (4 X 1200 MHz CPU s)
2017-04-01 22:37:36
Benchmark                                                  Time           CPU Iterations
----------------------------------------------------------------------------------------
BM_WriteRecordBatch/1/min_time:1.000/real_time         59317 ns      59202 ns      22727   16.4633GB/s
BM_WriteRecordBatch/4/min_time:1.000/real_time         56322 ns      56191 ns      24944   17.3389GB/s
BM_WriteRecordBatch/16/min_time:1.000/real_time        52027 ns      51880 ns      26682   18.7703GB/s
BM_WriteRecordBatch/64/min_time:1.000/real_time        56324 ns      56202 ns      24724   17.3384GB/s
BM_WriteRecordBatch/256/min_time:1.000/real_time      108096 ns     107868 ns      11284   9.03423GB/s
BM_WriteRecordBatch/1024/min_time:1.000/real_time     279508 ns     278730 ns       4957   3.49386GB/s
BM_WriteRecordBatch/4k/min_time:1.000/real_time      1013229 ns    1009772 ns       1191   986.944MB/s
BM_WriteRecordBatch/8k/min_time:1.000/real_time      2011309 ns    2005377 ns        661   497.189MB/s
BM_ReadRecordBatch/1/min_time:1.000/real_time           2507 ns       2501 ns     558949   389.563GB/s
BM_ReadRecordBatch/4/min_time:1.000/real_time           5120 ns       5110 ns     275798   190.735GB/s
BM_ReadRecordBatch/16/min_time:1.000/real_time         15800 ns      15706 ns      85481   61.8072GB/s
BM_ReadRecordBatch/64/min_time:1.000/real_time         55678 ns      55476 ns      25022   17.5393GB/s
BM_ReadRecordBatch/256/min_time:1.000/real_time       218083 ns     217596 ns       6163   4.47794GB/s
BM_ReadRecordBatch/1024/min_time:1.000/real_time      875861 ns     873419 ns       1591   1.11497GB/s
BM_ReadRecordBatch/4k/min_time:1.000/real_time       3545586 ns    3538141 ns        383   282.041MB/s
BM_ReadRecordBatch/8k/min_time:1.000/real_time       7118830 ns    7104433 ns        194   140.473MB/s
```

Author: Wes McKinney <we...@twosigma.com>

Closes #479 from wesm/ARROW-737 and squashes the following commits:

b663ca4 [Wes McKinney] Enable mutable buffer slices, SliceMutableBuffer function


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d54ab9a2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d54ab9a2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d54ab9a2

Branch: refs/heads/master
Commit: d54ab9a23aa8d4fe52ce91b117511540cc7491bb
Parents: e333576
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Apr 2 12:06:56 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Apr 2 12:06:56 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/buffer-test.cc | 17 +++++++++++++++++
 cpp/src/arrow/buffer.cc      | 17 ++++++++++++-----
 cpp/src/arrow/buffer.h       | 25 +++++++++++++++++--------
 3 files changed, 46 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index e0a2137..5815ed1 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -168,4 +168,21 @@ TEST_F(TestBuffer, SliceBuffer) {
   ASSERT_EQ(2, buf.use_count());
 }
 
+TEST_F(TestBuffer, SliceMutableBuffer) {
+  std::string data_str = "some data to slice";
+  auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
+
+  std::shared_ptr<MutableBuffer> buffer;
+  ASSERT_OK(AllocateBuffer(default_memory_pool(), 50, &buffer));
+
+  memcpy(buffer->mutable_data(), data, data_str.size());
+
+  std::shared_ptr<Buffer> slice = SliceMutableBuffer(buffer, 5, 10);
+  ASSERT_TRUE(slice->is_mutable());
+  ASSERT_EQ(10, slice->size());
+
+  Buffer expected(data + 5, 10);
+  ASSERT_TRUE(slice->Equals(expected));
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 5962340..fb63798 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -27,11 +27,6 @@
 
 namespace arrow {
 
-Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size)
-    : Buffer(parent->data() + offset, size) {
-  parent_ = parent;
-}
-
 Buffer::~Buffer() {}
 
 Status Buffer::Copy(
@@ -116,6 +111,18 @@ Status PoolBuffer::Resize(int64_t new_size, bool shrink_to_fit) {
   return Status::OK();
 }
 
+std::shared_ptr<Buffer> SliceMutableBuffer(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
+  return std::make_shared<MutableBuffer>(buffer, offset, length);
+}
+
+MutableBuffer::MutableBuffer(
+    const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size)
+    : MutableBuffer(parent->mutable_data() + offset, size) {
+  DCHECK(parent->is_mutable()) << "Must pass mutable buffer";
+  parent_ = parent;
+}
+
 Status AllocateBuffer(
     MemoryPool* pool, int64_t size, std::shared_ptr<MutableBuffer>* out) {
   auto buffer = std::make_shared<PoolBuffer>(pool);

http://git-wip-us.apache.org/repos/asf/arrow/blob/d54ab9a2/cpp/src/arrow/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 713d57a..3f14c96 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -46,7 +46,7 @@ class Status;
 class ARROW_EXPORT Buffer {
  public:
   Buffer(const uint8_t* data, int64_t size)
-      : is_mutable_(false), data_(data), size_(size), capacity_(size) {}
+    : is_mutable_(false), data_(data), size_(size), capacity_(size) {}
   virtual ~Buffer();
 
   /// An offset into data that is owned by another buffer, but we want to be
@@ -56,7 +56,10 @@ class ARROW_EXPORT Buffer {
   /// This method makes no assertions about alignment or padding of the buffer but
   /// in general we expected buffers to be aligned and padded to 64 bytes.  In the future
   /// we might add utility methods to help determine if a buffer satisfies this contract.
-  Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size);
+  Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size)
+    : Buffer(parent->data() + offset, size) {
+    parent_ = parent;
+  }
 
   bool is_mutable() const { return is_mutable_; }
 
@@ -74,6 +77,7 @@ class ARROW_EXPORT Buffer {
 
   int64_t capacity() const { return capacity_; }
   const uint8_t* data() const { return data_; }
+  uint8_t* mutable_data() { return mutable_data_; }
 
   int64_t size() const { return size_; }
 
@@ -82,6 +86,7 @@ class ARROW_EXPORT Buffer {
  protected:
   bool is_mutable_;
   const uint8_t* data_;
+  uint8_t* mutable_data_;
   int64_t size_;
   int64_t capacity_;
 
@@ -99,20 +104,24 @@ static inline std::shared_ptr<Buffer> SliceBuffer(
   return std::make_shared<Buffer>(buffer, offset, length);
 }
 
+/// Construct a mutable buffer slice. If the parent buffer is not mutable, this
+/// will abort in debug builds
+std::shared_ptr<Buffer> ARROW_EXPORT SliceMutableBuffer(
+    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+
 /// A Buffer whose contents can be mutated. May or may not own its data.
 class ARROW_EXPORT MutableBuffer : public Buffer {
  public:
-  MutableBuffer(uint8_t* data, int64_t size) : Buffer(data, size) {
-    is_mutable_ = true;
+  MutableBuffer(uint8_t* data, int64_t size)
+    : Buffer(data, size) {
     mutable_data_ = data;
+    is_mutable_ = true;
   }
 
-  uint8_t* mutable_data() { return mutable_data_; }
+  MutableBuffer(const std::shared_ptr<Buffer>& parent, int64_t offset, int64_t size);
 
  protected:
-  MutableBuffer() : Buffer(nullptr, 0), mutable_data_(nullptr) {}
-
-  uint8_t* mutable_data_;
+  MutableBuffer() : Buffer(nullptr, 0) {}
 };
 
 class ARROW_EXPORT ResizableBuffer : public MutableBuffer {