You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/06/03 12:31:54 UTC

[arrow] branch master updated: ARROW-2835: [C++] Make file position undefined after ReadAt()

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 518df0c  ARROW-2835: [C++] Make file position undefined after ReadAt()
518df0c is described below

commit 518df0c14994e4d0752493f8f6ebd2e0535fd320
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Jun 3 14:31:43 2019 +0200

    ARROW-2835: [C++] Make file position undefined after ReadAt()
    
    Seeking is required when calling an implicitly-positioned operation such as Read() after ReadAt().
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #4417 from pitrou/ARROW-2835-read-at-seek and squashes the following commits:
    
    d74c60eea <Antoine Pitrou> ARROW-2835:  Make file position undefined after ReadAt()
---
 cpp/src/arrow/io/file-test.cc | 15 +++++++++++++++
 cpp/src/arrow/io/file.cc      | 25 +++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/io/file-test.cc b/cpp/src/arrow/io/file-test.cc
index 5766213..8c1c819 100644
--- a/cpp/src/arrow/io/file-test.cc
+++ b/cpp/src/arrow/io/file-test.cc
@@ -446,6 +446,21 @@ TEST_F(TestReadableFile, ReadAt) {
   ASSERT_RAISES(Invalid, file_->ReadAt(0, 1, &buffer2));
 }
 
+TEST_F(TestReadableFile, SeekingRequired) {
+  std::shared_ptr<Buffer> buffer;
+
+  MakeTestFile();
+  OpenFile();
+
+  ASSERT_OK(file_->ReadAt(0, 4, &buffer));
+  AssertBufferEqual(*buffer, "test");
+
+  ASSERT_RAISES(Invalid, file_->Read(4, &buffer));
+  ASSERT_OK(file_->Seek(0));
+  ASSERT_OK(file_->Read(4, &buffer));
+  AssertBufferEqual(*buffer, "test");
+}
+
 TEST_F(TestReadableFile, NonExistentFile) {
   std::string path = "0xDEADBEEF.txt";
   Status s = ReadableFile::Open(path, &file_);
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index e678ca2..2b7d1a5 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -31,6 +31,7 @@
 #endif
 
 #include <algorithm>
+#include <atomic>
 #include <cerrno>
 #include <cstdint>
 #include <cstring>
@@ -56,7 +57,7 @@ namespace io {
 
 class OSFile {
  public:
-  OSFile() : fd_(-1), is_open_(false), size_(-1) {}
+  OSFile() : fd_(-1), is_open_(false), size_(-1), need_seeking_(false) {}
 
   ~OSFile() {}
 
@@ -134,11 +135,15 @@ class OSFile {
 
   Status Read(int64_t nbytes, int64_t* bytes_read, void* out) {
     RETURN_NOT_OK(CheckClosed());
+    RETURN_NOT_OK(CheckPositioned());
     return internal::FileRead(fd_, reinterpret_cast<uint8_t*>(out), nbytes, bytes_read);
   }
 
   Status ReadAt(int64_t position, int64_t nbytes, int64_t* bytes_read, void* out) {
     RETURN_NOT_OK(CheckClosed());
+    // ReadAt() leaves the file position undefined, so require that we seek
+    // before calling Read() or Write().
+    need_seeking_.store(true);
     return internal::FileReadAt(fd_, reinterpret_cast<uint8_t*>(out), position, nbytes,
                                 bytes_read);
   }
@@ -148,7 +153,11 @@ class OSFile {
     if (pos < 0) {
       return Status::Invalid("Invalid position");
     }
-    return internal::FileSeek(fd_, pos);
+    Status st = internal::FileSeek(fd_, pos);
+    if (st.ok()) {
+      need_seeking_.store(false);
+    }
+    return st;
   }
 
   Status Tell(int64_t* pos) const {
@@ -160,6 +169,7 @@ class OSFile {
     RETURN_NOT_OK(CheckClosed());
 
     std::lock_guard<std::mutex> guard(lock_);
+    RETURN_NOT_OK(CheckPositioned());
     if (length < 0) {
       return Status::IOError("Length must be non-negative");
     }
@@ -186,6 +196,15 @@ class OSFile {
     return SetFileName(ss.str());
   }
 
+  Status CheckPositioned() {
+    if (need_seeking_.load()) {
+      return Status::Invalid(
+          "Need seeking after ReadAt() before "
+          "calling implicitly-positioned operation");
+    }
+    return Status::OK();
+  }
+
   internal::PlatformFilename file_name_;
 
   std::mutex lock_;
@@ -197,6 +216,8 @@ class OSFile {
 
   bool is_open_;
   int64_t size_;
+  // Whether ReadAt made the file position non-deterministic.
+  std::atomic<bool> need_seeking_;
 };
 
 // ----------------------------------------------------------------------