You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/08/21 16:20:00 UTC

[arrow] branch master updated: ARROW-3098: [C++/Python] Allow seeking at end of BufferReader and FixedSizeBufferWriter

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 27f990a  ARROW-3098: [C++/Python] Allow seeking at end of BufferReader and FixedSizeBufferWriter
27f990a is described below

commit 27f990a503f8d9370037bd12c235aae1c1378fb9
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Tue Aug 21 12:19:55 2018 -0400

    ARROW-3098: [C++/Python] Allow seeking at end of BufferReader and FixedSizeBufferWriter
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #2454 from pitrou/ARROW-3098-seek-to-end and squashes the following commits:
    
    3ad25ba5 <Antoine Pitrou> ARROW-3098:  Allow seeking at end of BufferReader and FixedSizeBufferWriter
---
 cpp/src/arrow/io/io-memory-test.cc | 32 +++++++++++++++++++++++++++++++-
 cpp/src/arrow/io/memory.cc         | 11 +++++++----
 cpp/src/arrow/test-util.h          | 28 ++++++++++++++++------------
 python/pyarrow/tests/test_io.py    |  3 +++
 4 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/io/io-memory-test.cc b/cpp/src/arrow/io/io-memory-test.cc
index b2f3f18..67f3cf5 100644
--- a/cpp/src/arrow/io/io-memory-test.cc
+++ b/cpp/src/arrow/io/io-memory-test.cc
@@ -103,12 +103,42 @@ TEST(TestFixedSizeBufferWriter, Basics) {
   ASSERT_OK(writer.Tell(&position));
   ASSERT_EQ(4, position);
 
+  ASSERT_OK(writer.Seek(1024));
+  ASSERT_OK(writer.Tell(&position));
+  ASSERT_EQ(1024, position);
+
+  // Write out of bounds
+  ASSERT_RAISES(IOError, writer.Write(data.c_str(), 1));
+
+  // Seek out of bounds
   ASSERT_RAISES(IOError, writer.Seek(-1));
-  ASSERT_RAISES(IOError, writer.Seek(1024));
+  ASSERT_RAISES(IOError, writer.Seek(1025));
 
   ASSERT_OK(writer.Close());
 }
 
+TEST(TestBufferReader, Seeking) {
+  std::string data = "data123456";
+
+  auto buffer = std::make_shared<Buffer>(data);
+  BufferReader reader(buffer);
+  int64_t pos;
+  ASSERT_OK(reader.Tell(&pos));
+  ASSERT_EQ(pos, 0);
+
+  ASSERT_OK(reader.Seek(9));
+  ASSERT_OK(reader.Tell(&pos));
+  ASSERT_EQ(pos, 9);
+
+  ASSERT_OK(reader.Seek(10));
+  ASSERT_OK(reader.Tell(&pos));
+  ASSERT_EQ(pos, 10);
+
+  ASSERT_RAISES(IOError, reader.Seek(11));
+  ASSERT_OK(reader.Tell(&pos));
+  ASSERT_EQ(pos, 10);
+}
+
 TEST(TestBufferReader, RetainParentReference) {
   // ARROW-387
   std::string data = "data123456";
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 345a108..ef7a43d 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -151,8 +151,8 @@ class FixedSizeBufferWriter::FixedSizeBufferWriterImpl {
   }
 
   Status Seek(int64_t position) {
-    if (position < 0 || position >= size_) {
-      return Status::IOError("position out of bounds");
+    if (position < 0 || position > size_) {
+      return Status::IOError("Seek out of bounds");
     }
     position_ = position;
     return Status::OK();
@@ -164,6 +164,9 @@ class FixedSizeBufferWriter::FixedSizeBufferWriterImpl {
   }
 
   Status Write(const void* data, int64_t nbytes) {
+    if (position_ + nbytes > size_) {
+      return Status::IOError("Write out of bounds");
+    }
     if (nbytes > memcopy_threshold_ && memcopy_num_threads_ > 1) {
       internal::parallel_memcopy(mutable_data_ + position_,
                                  reinterpret_cast<const uint8_t*>(data), nbytes,
@@ -302,8 +305,8 @@ Status BufferReader::GetSize(int64_t* size) {
 }
 
 Status BufferReader::Seek(int64_t position) {
-  if (position < 0 || position >= size_) {
-    return Status::IOError("position out of bounds");
+  if (position < 0 || position > size_) {
+    return Status::IOError("Seek out of bounds");
   }
 
   position_ = position;
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 69f413e..13c6a61 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -44,20 +44,24 @@
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 
-#define ASSERT_RAISES(ENUM, expr) \
-  do {                            \
-    ::arrow::Status s = (expr);   \
-    if (!s.Is##ENUM()) {          \
-      FAIL() << s.ToString();     \
-    }                             \
+#define STRINGIFY(x) #x
+
+#define ASSERT_RAISES(ENUM, expr)                                         \
+  do {                                                                    \
+    ::arrow::Status s = (expr);                                           \
+    if (!s.Is##ENUM()) {                                                  \
+      FAIL() << "Expected '" STRINGIFY(expr) "' to fail with " STRINGIFY( \
+                    ENUM) ", but got "                                    \
+             << s.ToString();                                             \
+    }                                                                     \
   } while (false)
 
-#define ASSERT_OK(expr)         \
-  do {                          \
-    ::arrow::Status s = (expr); \
-    if (!s.ok()) {              \
-      FAIL() << s.ToString();   \
-    }                           \
+#define ASSERT_OK(expr)                                               \
+  do {                                                                \
+    ::arrow::Status s = (expr);                                       \
+    if (!s.ok()) {                                                    \
+      FAIL() << "'" STRINGIFY(expr) "' failed with " << s.ToString(); \
+    }                                                                 \
   } while (false)
 
 #define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 78685ac..547dee3 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -167,6 +167,9 @@ def test_bytes_reader():
     f.seek(0)
     assert f.tell() == 0
 
+    f.seek(0, 2)
+    assert f.tell() == len(data)
+
     f.seek(5)
     assert f.tell() == 5