You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/05/03 21:59:30 UTC

kudu git commit: env: add ReadV() API

Repository: kudu
Updated Branches:
  refs/heads/master ca7fffec9 -> 899e6a5e5


env: add ReadV() API

Adds ReadV() methods to RWFile and RandomAccessFile that allows
reading data into multiple Slices in one call. The implementation
leverages the preadv system call when possible and simulates it
with pread calls when unavailable.

Additionally adds ReadV() methods to the block manager abstraction.
These methods will be used in KUDU-463 to support reading
checksums and block data in a single call.

Change-Id: Ib8f7a62c8363b40baa064d9e63be1ece506f1e48
Reviewed-on: http://gerrit.cloudera.org:8080/6779
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/899e6a5e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/899e6a5e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/899e6a5e

Branch: refs/heads/master
Commit: 899e6a5e50586d3936b0c1f3e355ef463571585a
Parents: ca7fffe
Author: Grant Henke <gr...@gmail.com>
Authored: Tue May 2 10:54:18 2017 -0500
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Wed May 3 21:59:05 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/block_manager-test.cc |  16 ++++-
 src/kudu/fs/block_manager.h       |  10 +++-
 src/kudu/fs/file_block_manager.cc |  19 ++++++
 src/kudu/fs/fs-test-util.h        |  13 +++++
 src/kudu/fs/log_block_manager.cc  |  46 +++++++++++++++
 src/kudu/util/env-test.cc         |  96 +++++++++++++++++++++++++++---
 src/kudu/util/env.h               |  24 ++++++++
 src/kudu/util/env_posix.cc        | 103 ++++++++++++++++++++++++++++++++-
 src/kudu/util/file_cache.cc       |  14 +++++
 9 files changed, 326 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/fs/block_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/block_manager-test.cc b/src/kudu/fs/block_manager-test.cc
index dca19d2..a68335d 100644
--- a/src/kudu/fs/block_manager-test.cc
+++ b/src/kudu/fs/block_manager-test.cc
@@ -255,11 +255,23 @@ TYPED_TEST(BlockManagerTest, EndToEndTest) {
   uint64_t sz;
   ASSERT_OK(read_block->Size(&sz));
   ASSERT_EQ(test_data.length(), sz);
-  gscoped_ptr<uint8_t[]> scratch(new uint8_t[test_data.length()]);
-  Slice data(scratch.get(), test_data.length());
+  uint8_t scratch[test_data.length()];
+  Slice data(scratch, test_data.length());
   ASSERT_OK(read_block->Read(0, &data));
   ASSERT_EQ(test_data, data);
 
+  // Read the data back into multiple slices
+  size_t size1 = 5;
+  uint8_t scratch1[size1];
+  Slice data1(scratch1, size1);
+  size_t size2 = 4;
+  uint8_t scratch2[size2];
+  Slice data2(scratch2, size2);
+  vector<Slice> results = { data1, data2 };
+  ASSERT_OK(read_block->ReadV(0, &results));
+  ASSERT_EQ(test_data.substr(0, size1), data1);
+  ASSERT_EQ(test_data.substr(size1, size2), data2);
+
   // We don't actually do anything with the result of this call; we just want
   // to make sure it doesn't trigger a crash (see KUDU-1931).
   LOG(INFO) << "Block memory footprint: " << read_block->memory_footprint();

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/fs/block_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/block_manager.h b/src/kudu/fs/block_manager.h
index 2c92a5f..8165275 100644
--- a/src/kudu/fs/block_manager.h
+++ b/src/kudu/fs/block_manager.h
@@ -144,11 +144,17 @@ class ReadableBlock : public Block {
   virtual Status Size(uint64_t* sz) const = 0;
 
   // Reads exactly 'result.size' bytes beginning from 'offset' in the block,
-  // returning an error if fewer bytes exist. Sets "result" to the data that
-  // was read.
+  // returning an error if fewer bytes exist.
+  // Sets "result" to the data that was read.
   // If an error was encountered, returns a non-OK status.
   virtual Status Read(uint64_t offset, Slice* result) const = 0;
 
+  // Reads exactly the "results" aggregate bytes, based on each Slice's "size",
+  // beginning from 'offset' in the block, returning an error if fewer bytes exist.
+  // Sets each "result" to the data that was read.
+  // If an error was encountered, returns a non-OK status.
+  virtual Status ReadV(uint64_t offset, vector<Slice>* results) const = 0;
+
   // Returns the memory usage of this object including the object itself.
   virtual size_t memory_footprint() const = 0;
 };

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/fs/file_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/file_block_manager.cc b/src/kudu/fs/file_block_manager.cc
index 79e84d7..937f4ba 100644
--- a/src/kudu/fs/file_block_manager.cc
+++ b/src/kudu/fs/file_block_manager.cc
@@ -384,6 +384,8 @@ class FileReadableBlock : public ReadableBlock {
 
   virtual Status Read(uint64_t offset, Slice* result) const OVERRIDE;
 
+  virtual Status ReadV(uint64_t offset, vector<Slice>* results) const OVERRIDE;
+
   virtual size_t memory_footprint() const OVERRIDE;
 
  private:
@@ -453,6 +455,23 @@ Status FileReadableBlock::Read(uint64_t offset, Slice* result) const {
   return Status::OK();
 }
 
+Status FileReadableBlock::ReadV(uint64_t offset, vector<Slice>* results) const {
+  DCHECK(!closed_.Load());
+
+  RETURN_NOT_OK(reader_->ReadV(offset, results));
+
+  if (block_manager_->metrics_) {
+    // Calculate the read amount of data
+    size_t bytes_read = accumulate(results->begin(), results->end(), static_cast<size_t>(0),
+                                   [&](int sum, const Slice& curr) {
+                                     return sum + curr.size();
+                                   });
+    block_manager_->metrics_->total_bytes_read->IncrementBy(bytes_read);
+  }
+
+  return Status::OK();
+}
+
 size_t FileReadableBlock::memory_footprint() const {
   DCHECK(reader_);
   return kudu_malloc_usable_size(this) + reader_->memory_footprint();

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/fs/fs-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/fs-test-util.h b/src/kudu/fs/fs-test-util.h
index cdfa07e..23baa44 100644
--- a/src/kudu/fs/fs-test-util.h
+++ b/src/kudu/fs/fs-test-util.h
@@ -19,6 +19,8 @@
 #define KUDU_FS_FS_TEST_UTIL_H
 
 #include <memory>
+#include <numeric>
+#include <vector>
 
 #include "kudu/fs/block_manager.h"
 #include "kudu/util/malloc.h"
@@ -67,6 +69,17 @@ class CountingReadableBlock : public ReadableBlock {
     return Status::OK();
   }
 
+  virtual Status ReadV(uint64_t offset, std::vector<Slice>* results) const OVERRIDE {
+    RETURN_NOT_OK(block_->ReadV(offset, results));
+    // Calculate the read amount of data
+    size_t length = std::accumulate(results->begin(), results->end(), static_cast<size_t>(0),
+                               [&](int sum, const Slice& curr) {
+                                 return sum + curr.size();
+                               });
+    *bytes_read_ += length;
+    return Status::OK();
+  }
+
   virtual size_t memory_footprint() const OVERRIDE {
     return block_->memory_footprint();
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 22ef9ee..5f8f012 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -23,6 +23,7 @@
 #include <mutex>
 #include <unordered_map>
 #include <unordered_set>
+#include <vector>
 
 #include "kudu/fs/block_manager_metrics.h"
 #include "kudu/fs/block_manager_util.h"
@@ -289,6 +290,9 @@ class LogBlockContainer {
   // See RWFile::Read().
   Status ReadData(int64_t offset, Slice* result) const;
 
+  // See RWFile::ReadV().
+  Status ReadVData(int64_t offset, vector<Slice>* results) const;
+
   // Appends 'pb' to this container's metadata file.
   //
   // The on-disk effects of this call are made durable only after SyncMetadata().
@@ -836,6 +840,12 @@ Status LogBlockContainer::ReadData(int64_t offset, Slice* result) const {
   return data_file_->Read(offset, result);
 }
 
+Status LogBlockContainer::ReadVData(int64_t offset, vector<Slice>* results) const {
+  DCHECK_GE(offset, 0);
+
+  return data_file_->ReadV(offset, results);
+}
+
 Status LogBlockContainer::AppendMetadata(const BlockRecordPB& pb) {
   // Note: We don't check for sufficient disk space for metadata writes in
   // order to allow for block deletion on full disks.
@@ -1249,6 +1259,8 @@ class LogReadableBlock : public ReadableBlock {
 
   virtual Status Read(uint64_t offset, Slice* result) const OVERRIDE;
 
+  virtual Status ReadV(uint64_t offset, vector<Slice>* results) const OVERRIDE;
+
   virtual size_t memory_footprint() const OVERRIDE;
 
  private:
@@ -1332,6 +1344,40 @@ Status LogReadableBlock::Read(uint64_t offset, Slice* result) const {
   return Status::OK();
 }
 
+Status LogReadableBlock::ReadV(uint64_t offset, vector<Slice>* results) const {
+  DCHECK(!closed_.Load());
+
+  size_t read_length = accumulate(results->begin(), results->end(), static_cast<size_t>(0),
+                                  [&](int sum, const Slice& curr) {
+                                    return sum + curr.size();
+                                  });
+
+  uint64_t read_offset = log_block_->offset() + offset;
+  if (log_block_->length() < offset + read_length) {
+    return Status::IOError("Out-of-bounds read",
+                           Substitute("read of [$0-$1) in block [$2-$3)",
+                                      read_offset,
+                                      read_offset + read_length,
+                                      log_block_->offset(),
+                                      log_block_->offset() + log_block_->length()));
+  }
+
+  MicrosecondsInt64 start_time = GetMonoTimeMicros();
+  RETURN_NOT_OK(container_->ReadVData(read_offset, results));
+  MicrosecondsInt64 end_time = GetMonoTimeMicros();
+
+  int64_t dur = end_time - start_time;
+  TRACE_COUNTER_INCREMENT("lbm_read_time_us", dur);
+
+  const char* counter = BUCKETED_COUNTER_NAME("lbm_reads", dur);
+  TRACE_COUNTER_INCREMENT(counter, 1);
+
+  if (container_->metrics()) {
+    container_->metrics()->generic_metrics.total_bytes_read->IncrementBy(read_length);
+  }
+  return Status::OK();
+}
+
 size_t LogReadableBlock::memory_footprint() const {
   return kudu_malloc_usable_size(this);
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/util/env-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env-test.cc b/src/kudu/util/env-test.cc
index a135f4a..d8ee720 100644
--- a/src/kudu/util/env-test.cc
+++ b/src/kudu/util/env-test.cc
@@ -428,6 +428,72 @@ TEST_F(TestEnv, TestReadFully) {
   ASSERT_STR_CONTAINS(status.ToString(), "EOF");
 }
 
+TEST_F(TestEnv, TestReadVFully) {
+  // Create the file.
+  unique_ptr<RWFile> file;
+  ASSERT_OK(env_->NewRWFile(GetTestPath("foo"), &file));
+
+  // Append to it.
+  string kTestData = "abcde12345";
+  ASSERT_OK(file->Write(0, kTestData));
+
+  // Setup read parameters
+  size_t size1 = 5;
+  uint8_t scratch1[size1];
+  Slice result1(scratch1, size1);
+  size_t size2 = 5;
+  uint8_t scratch2[size2];
+  Slice result2(scratch2, size2);
+  vector<Slice> results = { result1, result2 };
+
+  // Force a short read
+  FLAGS_env_inject_short_read_bytes = 3;
+
+  // Verify that Read fully reads the whole requested data.
+  ASSERT_OK(file->ReadV(0, &results));
+  ASSERT_EQ(result1, "abcde");
+  ASSERT_EQ(result2, "12345");
+
+  // Turn short reads off again
+  FLAGS_env_inject_short_read_bytes = 0;
+
+  // Verify that Read fails with an IOError at EOF.
+  Status status = file->ReadV(5, &results);
+  ASSERT_FALSE(status.ok());
+  ASSERT_TRUE(status.IsIOError());
+  ASSERT_STR_CONTAINS(status.ToString(), "EOF");
+}
+
+TEST_F(TestEnv, TestIOVMax) {
+  Env* env = Env::Default();
+  const string kTestPath = GetTestPath("test");
+
+  const size_t slice_count = IOV_MAX + 42;
+  const size_t slice_size = 5;
+  const size_t data_size = slice_count * slice_size;
+
+  NO_FATALS(WriteTestFile(env, kTestPath, data_size));
+
+  // Reopen for read
+  shared_ptr<RandomAccessFile> file;
+  ASSERT_OK(env_util::OpenFileForRandom(env, kTestPath, &file));
+
+  // Setup more results slices than IOV_MAX
+  uint8_t scratch[data_size];
+  vector<Slice> results;
+  for (size_t i = 0; i < slice_count; i++) {
+    size_t shift = slice_size * i;
+    results.emplace_back(scratch + shift, slice_size);
+  }
+
+  // Force a short read too
+  FLAGS_env_inject_short_read_bytes = 3;
+
+  // Verify all the data is read
+  ASSERT_OK(file->ReadV(0, &results));
+  VerifyTestData(Slice(scratch, data_size), 0);
+}
+
 TEST_F(TestEnv, TestAppendVector) {
   WritableFileOptions opts;
   LOG(INFO) << "Testing AppendVector() only, NO pre-allocation";
@@ -694,25 +760,37 @@ TEST_F(TestEnv, TestRWFile) {
   ASSERT_OK(file->Write(0, kTestData));
 
   // Read from it.
-  unique_ptr<uint8_t[]> scratch(new uint8_t[kTestData.length()]);
-  Slice result(scratch.get(), kTestData.length());
+  uint8_t scratch[kTestData.length()];
+  Slice result(scratch, kTestData.length());
   ASSERT_OK(file->Read(0, &result));
   ASSERT_EQ(result, kTestData);
   uint64_t sz;
   ASSERT_OK(file->Size(&sz));
   ASSERT_EQ(kTestData.length(), sz);
 
+  // Read into multiple buffers
+  size_t size1 = 3;
+  uint8_t scratch1[size1];
+  Slice result1(scratch1, size1);
+  size_t size2 = 2;
+  uint8_t scratch2[size2];
+  Slice result2(scratch2, size2);
+  vector<Slice> results = { result1, result2 };
+  ASSERT_OK(file->ReadV(0, &results));
+  ASSERT_EQ(result1, "abc");
+  ASSERT_EQ(result2, "de");
+
   // Write past the end of the file and rewrite some of the interior.
   ASSERT_OK(file->Write(kTestData.length() * 2, kTestData));
   ASSERT_OK(file->Write(kTestData.length(), kTestData));
   ASSERT_OK(file->Write(1, kTestData));
   string kNewTestData = "aabcdebcdeabcde";
-  unique_ptr<uint8_t[]> scratch2(new uint8_t[kNewTestData.length()]);
-  Slice result2(scratch2.get(), kNewTestData.length());
-  ASSERT_OK(file->Read(0, &result2));
+  uint8_t scratch3[kNewTestData.length()];
+  Slice result3(scratch3, kNewTestData.length());
+  ASSERT_OK(file->Read(0, &result3));
 
   // Retest.
-  ASSERT_EQ(result2, kNewTestData);
+  ASSERT_EQ(result3, kNewTestData);
   ASSERT_OK(file->Size(&sz));
   ASSERT_EQ(kNewTestData.length(), sz);
 
@@ -724,9 +802,9 @@ TEST_F(TestEnv, TestRWFile) {
   // Reopen it without truncating the existing data.
   opts.mode = Env::OPEN_EXISTING;
   ASSERT_OK(env_->NewRWFile(opts, GetTestPath("foo"), &file));
-  unique_ptr<uint8_t[]> scratch3(new uint8_t[kNewTestData.length()]);
-  Slice result3(scratch3.get(), kNewTestData.length());
-  ASSERT_OK(file->Read(0, &result3));
+  uint8_t scratch4[kNewTestData.length()];
+  Slice result4(scratch4, kNewTestData.length());
+  ASSERT_OK(file->Read(0, &result4));
   ASSERT_EQ(result3, kNewTestData);
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/util/env.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/env.h b/src/kudu/util/env.h
index d7b68aa..6d1e1fd 100644
--- a/src/kudu/util/env.h
+++ b/src/kudu/util/env.h
@@ -384,6 +384,18 @@ class RandomAccessFile {
   // Safe for concurrent use by multiple threads.
   virtual Status Read(uint64_t offset, Slice* result) const = 0;
 
+  // Reads up to the "results" aggregate size, based on each Slice's "size",
+  // from the file starting at 'offset'.
+  // Sets each "result.data" to the data that was read.
+  // If an error was encountered, returns a non-OK status.
+  //
+  // This method will internally retry on EINTR and "short reads" in order to
+  // fully read the requested number of bytes. In the event that it is not
+  // possible to read exactly 'length' bytes, an IOError is returned.
+  //
+  // Safe for concurrent use by multiple threads.
+  virtual Status ReadV(uint64_t offset, std::vector<Slice>* results) const = 0;
+
   // Returns the size of the file
   virtual Status Size(uint64_t *size) const = 0;
 
@@ -512,6 +524,18 @@ class RWFile {
   // Safe for concurrent use by multiple threads.
   virtual Status Read(uint64_t offset, Slice* result) const = 0;
 
+  // Reads up to the "results" aggregate size, based on each Slice's "size",
+  // from the file starting at 'offset'.
+  // Sets each "result.data" to the data that was read.
+  // If an error was encountered, returns a non-OK status.
+  //
+  // This method will internally retry on EINTR and "short reads" in order to
+  // fully read the requested number of bytes. In the event that it is not
+  // possible to read exactly 'length' bytes, an IOError is returned.
+  //
+  // Safe for concurrent use by multiple threads.
+  virtual Status ReadV(uint64_t offset, std::vector<Slice>* results) const = 0;
+
   // Writes 'data' to the file position given by 'offset'.
   virtual Status Write(uint64_t offset, const Slice& data) = 0;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/util/env_posix.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc
index 8932c83..d2d658f 100644
--- a/src/kudu/util/env_posix.cc
+++ b/src/kudu/util/env_posix.cc
@@ -176,6 +176,27 @@ int fallocate(int fd, int mode, off_t offset, off_t len) {
 }
 #endif
 
+#if defined(__APPLE__)
+// Simulates Linux's preadv API on OS X.
+ssize_t preadv(int fd, const struct iovec* iovec, int count, off_t offset) {
+  ssize_t total_read_bytes = 0;
+  for (int i = 0; i < count; i++) {
+    ssize_t r;
+    RETRY_ON_EINTR(r, pread(fd, iovec[i].iov_base, iovec[i].iov_len, offset));
+    if (r < 0) {
+      return r;
+    }
+    total_read_bytes += r;
+    if (static_cast<size_t>(r) < iovec[i].iov_len) {
+      break;
+    }
+    offset += iovec[i].iov_len;
+  }
+  return total_read_bytes;
+}
+#endif
+
+
 // Close file descriptor when object goes out of scope.
 class ScopedFdCloser {
  public:
@@ -269,15 +290,20 @@ Status DoRead(int fd, const string& filename, uint64_t offset, Slice* result) {
     }
     ssize_t r;
     RETRY_ON_EINTR(r, pread(fd, dst, req, cur_offset));
-    if (r < 0) {
+    if (PREDICT_FALSE(r < 0)) {
       // An error: return a non-ok status.
       return IOError(filename, errno);
     }
-    if (r == 0) {
+    if (PREDICT_FALSE(r == 0)) {
       // EOF
       return Status::IOError(Substitute("EOF trying to read $0 bytes at offset $1",
                                         result->size(), offset));
     }
+    if (PREDICT_TRUE(r == rem)) {
+      // All requested bytes were read.
+      // This is almost always the case.
+      return Status::OK();
+    }
     DCHECK_LE(r, rem);
     dst += r;
     rem -= r;
@@ -287,6 +313,71 @@ Status DoRead(int fd, const string& filename, uint64_t offset, Slice* result) {
   return Status::OK();
 }
 
+Status DoReadV(int fd, const string& filename, uint64_t offset, vector<Slice>* results) {
+  ThreadRestrictions::AssertIOAllowed();
+
+  // Convert the results into the iovec vector to request
+  // and calculate the total bytes requested
+  size_t bytes_req = 0;
+  size_t iov_size = results->size();
+  struct iovec iov[iov_size];
+  for (size_t i = 0; i < iov_size; i++) {
+    Slice& result = (*results)[i];
+    bytes_req += result.size();
+    iov[i] = { result.mutable_data(), result.size() };
+  }
+
+  uint64_t cur_offset = offset;
+  size_t completed_iov = 0;
+  size_t rem = bytes_req;
+  while (rem > 0) {
+    // Never request more than IOV_MAX in one request
+    size_t iov_count = std::min(iov_size - completed_iov, static_cast<size_t>(IOV_MAX));
+    ssize_t r;
+    RETRY_ON_EINTR(r, preadv(fd, iov + completed_iov, iov_count, cur_offset));
+
+    // Fake a short read for testing
+    if (PREDICT_FALSE(FLAGS_env_inject_short_read_bytes > 0 && rem == bytes_req)) {
+      DCHECK_LT(FLAGS_env_inject_short_read_bytes, r);
+      r -= FLAGS_env_inject_short_read_bytes;
+    }
+
+    if (PREDICT_FALSE(r < 0)) {
+      // An error: return a non-ok status.
+      return IOError(filename, errno);
+    }
+    if (PREDICT_FALSE(r == 0)) {
+      // EOF.
+      return Status::IOError(
+          Substitute("EOF trying to read $0 bytes at offset $1", bytes_req, offset));
+    }
+    if (PREDICT_TRUE(r == rem)) {
+      // All requested bytes were read. This is almost always the case.
+      return Status::OK();
+    }
+    DCHECK_LE(r, rem);
+    // Adjust iovec vector based on bytes read for the next request
+    ssize_t bytes_rem = r;
+    for (size_t i = completed_iov; i < iov_size; i++) {
+      if (bytes_rem >= iov[i].iov_len) {
+        // The full length of this iovec was read
+        completed_iov++;
+        bytes_rem -= iov[i].iov_len;
+      } else {
+        // Partially read this result.
+        // Adjust the iov_len and iov_base to request only the missing data.
+        iov[i].iov_base = static_cast<uint8_t *>(iov[i].iov_base) + bytes_rem;
+        iov[i].iov_len -= bytes_rem;
+        break; // Don't need to adjust remaining iovec's
+      }
+    }
+    cur_offset += r;
+    rem -= r;
+  }
+  DCHECK_EQ(0, rem);
+  return Status::OK();
+}
+
 class PosixSequentialFile: public SequentialFile {
  private:
   std::string filename_;
@@ -342,6 +433,10 @@ class PosixRandomAccessFile: public RandomAccessFile {
     return DoRead(fd_, filename_, offset, result);
   }
 
+  virtual Status ReadV(uint64_t offset, vector<Slice>* results) const OVERRIDE {
+    return DoReadV(fd_, filename_, offset, results);
+  }
+
   virtual Status Size(uint64_t *size) const OVERRIDE {
     TRACE_EVENT1("io", "PosixRandomAccessFile::Size", "path", filename_);
     ThreadRestrictions::AssertIOAllowed();
@@ -584,6 +679,10 @@ class PosixRWFile : public RWFile {
     return DoRead(fd_, filename_, offset, result);
   }
 
+  virtual Status ReadV(uint64_t offset, vector<Slice>* results) const OVERRIDE {
+    return DoReadV(fd_, filename_, offset, results);
+  }
+
   virtual Status Write(uint64_t offset, const Slice& data) OVERRIDE {
     MAYBE_RETURN_FAILURE(FLAGS_env_inject_io_error,
                          Status::IOError(Env::kInjectedFailureStatusMsg));

http://git-wip-us.apache.org/repos/asf/kudu/blob/899e6a5e/src/kudu/util/file_cache.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index 6abdf84..2aa2aa1 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -20,6 +20,7 @@
 #include <memory>
 #include <mutex>
 #include <string>
+#include <vector>
 
 #include <gflags/gflags.h>
 
@@ -45,6 +46,7 @@ TAG_FLAG(file_cache_expiry_period_ms, advanced);
 using std::shared_ptr;
 using std::string;
 using std::unique_ptr;
+using std::vector;
 using strings::Substitute;
 
 namespace kudu {
@@ -212,6 +214,12 @@ class Descriptor<RWFile> : public RWFile {
     return opened.file()->Read(offset, result);
   }
 
+  Status ReadV(uint64_t offset, vector<Slice>* results) const override {
+    ScopedOpenedDescriptor<RWFile> opened(&base_);
+    RETURN_NOT_OK(ReopenFileIfNecessary(&opened));
+    return opened.file()->ReadV(offset, results);
+  }
+
   Status Write(uint64_t offset, const Slice& data) override {
     ScopedOpenedDescriptor<RWFile> opened(&base_);
     RETURN_NOT_OK(ReopenFileIfNecessary(&opened));
@@ -330,6 +338,12 @@ class Descriptor<RandomAccessFile> : public RandomAccessFile {
     return opened.file()->Read(offset, result);
   }
 
+  Status ReadV(uint64_t offset, vector<Slice>* results) const override {
+    ScopedOpenedDescriptor<RandomAccessFile> opened(&base_);
+    RETURN_NOT_OK(ReopenFileIfNecessary(&opened));
+    return opened.file()->ReadV(offset, results);
+  }
+
   Status Size(uint64_t *size) const override {
     ScopedOpenedDescriptor<RandomAccessFile> opened(&base_);
     RETURN_NOT_OK(ReopenFileIfNecessary(&opened));