You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/04/29 00:54:16 UTC

[1/2] kudu git commit: env: add RWFile::GetExtentMap for analyzing file extents

Repository: kudu
Updated Branches:
  refs/heads/master eccafbcfb -> eb79a7183


env: add RWFile::GetExtentMap for analyzing file extents

This patch introduces a method to get the extent metadata of a file provided
it resides on an extent-based filesystem (such as ext4 or xfs). Each extent
is an offset and length into the file, and represents a chunk of filesystem
that has been allocated for the file. Gaps between extents are expected to
be unallocated and may represent punched out holes.

On Linux, the extent listing is retrieved via repeated calls to the
FS_IOC_FIEMAP ioctl, though only some of the information returned is
actually used.

Originally I intended to use this in the log block manager for finding
extra allocated space in container files. I ended up using a coarser
heuristic, but I'd like to merge this anyway as I think it'll be useful in
future as a more precise way of repairing extra allocated space.

Change-Id: I35bd1bdb9e1a839af2ab95ea73b79217c1f4a2b3
Reviewed-on: http://gerrit.cloudera.org:8080/6583
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <da...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/a57cb4ba
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/a57cb4ba
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/a57cb4ba

Branch: refs/heads/master
Commit: a57cb4ba7c714d8743bd80b04b33675718418bfe
Parents: eccafbc
Author: Adar Dembo <ad...@cloudera.com>
Authored: Tue Feb 21 00:36:45 2017 -0800
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Sat Apr 29 00:53:44 2017 +0000

----------------------------------------------------------------------
 src/kudu/util/env-test.cc   | 69 ++++++++++++++++++++++++++++++++++++++--
 src/kudu/util/env.h         | 14 ++++++++
 src/kudu/util/env_posix.cc  | 59 ++++++++++++++++++++++++++++++++++
 src/kudu/util/file_cache.cc |  6 ++++
 4 files changed, 145 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/a57cb4ba/src/kudu/util/env-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env-test.cc b/src/kudu/util/env-test.cc
index 7b216dc..c326dcc 100644
--- a/src/kudu/util/env-test.cc
+++ b/src/kudu/util/env-test.cc
@@ -22,6 +22,7 @@
 #include <string>
 
 #include <glog/logging.h>
+#include <glog/stl_logging.h>
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/bind.h"
@@ -48,12 +49,15 @@
 #define FALLOC_FL_PUNCH_HOLE  0x02 /* de-allocates range */
 #endif
 
+DECLARE_bool(never_fsync);
+
 namespace kudu {
 
 using std::shared_ptr;
 using std::string;
 using std::unique_ptr;
 using std::vector;
+using strings::Substitute;
 
 static const uint64_t kOneMb = 1024 * 1024;
 static const uint64_t kTwoMb = 2 * kOneMb;
@@ -162,7 +166,7 @@ class TestEnv : public KuduTest {
 
     srand(123);
 
-    const string test_descr = strings::Substitute(
+    const string test_descr = Substitute(
         "appending a vector of slices(number of slices=$0,size of slice=$1 b) $2 times",
         num_slices, slice_size, iterations);
     LOG_TIMING(INFO, test_descr)  {
@@ -664,8 +668,8 @@ TEST_F(TestEnv, TestGlob) {
   }
 
   for (const auto& matcher : matchers) {
-    SCOPED_TRACE(strings::Substitute("pattern: $0, expected matches: $1",
-                                     matcher.first, matcher.second));
+    SCOPED_TRACE(Substitute("pattern: $0, expected matches: $1",
+                            matcher.first, matcher.second));
     vector<string> matches;
     ASSERT_OK(env_->Glob(JoinPathSegments(dir, matcher.first), &matches));
     ASSERT_EQ(matcher.second, matches.size());
@@ -857,4 +861,63 @@ TEST_F(TestEnv, TestChangeDir) {
   ASSERT_EQ(orig_dir, cwd);
 }
 
+TEST_F(TestEnv, TestGetExtentMap) {
+  // In order to force filesystems that use delayed allocation to write out the
+  // extents, we must Sync() after the file is done growing, and that should
+  // trigger a real fsync() to the filesystem.
+  FLAGS_never_fsync = false;
+
+  const string kTestFilePath = GetTestPath("foo");
+  const int kFileSizeBytes = 1024*1024;
+
+  // Create a test file of a particular size.
+  unique_ptr<RWFile> f;
+  ASSERT_OK(env_->NewRWFile(kTestFilePath, &f));
+  ASSERT_OK(f->PreAllocate(0, kFileSizeBytes, RWFile::CHANGE_FILE_SIZE));
+  ASSERT_OK(f->Sync());
+
+  // The number and distribution of extents differs depending on the
+  // filesystem; this just provides coverage of the code path.
+  RWFile::ExtentMap extents;
+  Status s = f->GetExtentMap(&extents);
+  if (s.IsNotSupported()) {
+    LOG(INFO) << "GetExtentMap() not supported, skipping test";
+    return;
+  }
+  ASSERT_OK(s);
+  SCOPED_TRACE(extents);
+  int num_extents = extents.size();
+  ASSERT_GT(num_extents, 0) <<
+      "There should have been at least one extent in the file";
+
+  uint64_t fs_block_size;
+  ASSERT_OK(env_->GetBlockSize(kTestFilePath, &fs_block_size));
+
+  // Look for an extent to punch. We want an extent that's at least three times
+  // the block size so that we can punch out the "middle" fs block and thus
+  // split the extent in half.
+  uint64_t found_offset = 0;
+  for (const auto& e : extents) {
+    if (e.second >= (fs_block_size * 3)) {
+      found_offset = e.first + fs_block_size;
+      break;
+    }
+  }
+  ASSERT_GT(found_offset, 0) << "Couldn't find extent to split";
+
+  // Punch out a hole and split the extent.
+  s = f->PunchHole(found_offset, fs_block_size);
+  if (s.IsNotSupported()) {
+    LOG(INFO) << "PunchHole() not supported, skipping this part of the test";
+    return;
+  }
+  ASSERT_OK(s);
+  ASSERT_OK(f->Sync());
+
+  // Test the extent map; there should be one more extent.
+  ASSERT_OK(f->GetExtentMap(&extents));
+  ASSERT_EQ(num_extents + 1, extents.size()) <<
+      "Punching a hole should have increased the number of extents by one";
+}
+
 }  // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/a57cb4ba/src/kudu/util/env.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/env.h b/src/kudu/util/env.h
index 20316d7..1ed393d 100644
--- a/src/kudu/util/env.h
+++ b/src/kudu/util/env.h
@@ -15,6 +15,7 @@
 
 #include <cstdarg>
 #include <cstdint>
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
@@ -577,6 +578,19 @@ class RWFile {
   // Retrieves the file's size.
   virtual Status Size(uint64_t* size) const = 0;
 
+  // Retrieve a map of the file's live extents.
+  //
+  // Each map entry is an offset and size representing a section of live file
+  // data. Any byte offset not contained in a map entry implicitly belongs to a
+  // "hole" in the (sparse) file.
+  //
+  // If the underlying filesystem does not support extents, map entries
+  // represent runs of adjacent fixed-size filesystem blocks instead. If the
+  // platform doesn't support fetching extents at all, a NotSupported status
+  // will be returned.
+  typedef std::map<uint64_t, uint64_t> ExtentMap;
+  virtual Status GetExtentMap(ExtentMap* out) const = 0;
+
   // Returns the filename provided when the RWFile was constructed.
   virtual const std::string& filename() const = 0;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/a57cb4ba/src/kudu/util/env_posix.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_posix.cc b/src/kudu/util/env_posix.cc
index 4da3d47..58b8f6c 100644
--- a/src/kudu/util/env_posix.cc
+++ b/src/kudu/util/env_posix.cc
@@ -57,7 +57,10 @@
 #include <sys/sysctl.h>
 #else
 #include <linux/falloc.h>
+#include <linux/fiemap.h>
+#include <linux/fs.h>
 #include <linux/magic.h>
+#include <sys/ioctl.h>
 #include <sys/sysinfo.h>
 #include <sys/vfs.h>
 #endif  // defined(__APPLE__)
@@ -722,6 +725,62 @@ class PosixRWFile : public RWFile {
     return Status::OK();
   }
 
+  virtual Status GetExtentMap(ExtentMap* out) const OVERRIDE {
+#ifdef __APPLE__
+    return Status::NotSupported("GetExtentMap not supported on this platform");
+#endif
+    TRACE_EVENT1("io", "PosixRWFile::GetExtentMap", "path", filename_);
+    ThreadRestrictions::AssertIOAllowed();
+
+    // This allocation size is arbitrary.
+    static const int kBufSize = 4096;
+    uint8_t buf[kBufSize] = { 0 };
+
+    struct fiemap* fm = reinterpret_cast<struct fiemap*>(buf);
+    struct fiemap_extent* fme = &fm->fm_extents[0];
+    int avail_extents_in_buffer = (kBufSize - sizeof(*fm)) / sizeof(*fme);
+    bool saw_last_extent = false;
+    ExtentMap extents;
+    do {
+      // Fetch another block of extents.
+      fm->fm_length = FIEMAP_MAX_OFFSET;
+      fm->fm_extent_count = avail_extents_in_buffer;
+      if (ioctl(fd_, FS_IOC_FIEMAP, fm) == -1) {
+        return IOError(filename_, errno);
+      }
+
+      // No extents returned, this file must have no extents.
+      if (fm->fm_mapped_extents == 0) {
+        break;
+      }
+
+      // Parse the extent block.
+      uint64_t last_extent_end_offset;
+      for (int i = 0; i < fm->fm_mapped_extents; i++) {
+        if (fme[i].fe_flags & FIEMAP_EXTENT_LAST) {
+          // This should really be the last extent.
+          CHECK_EQ(fm->fm_mapped_extents - 1, i);
+
+          saw_last_extent = true;
+        }
+        InsertOrDie(&extents, fme[i].fe_logical, fme[i].fe_length);
+        VLOG(3) << Substitute("File $0 extent $1: o $2, l $3 $4",
+                              filename_, i,
+                              fme[i].fe_logical, fme[i].fe_length,
+                              saw_last_extent ? "(final)" : "");
+        last_extent_end_offset = fme[i].fe_logical + fme[i].fe_length;
+        if (saw_last_extent) {
+          break;
+        }
+      }
+
+      fm->fm_start = last_extent_end_offset;
+    } while (!saw_last_extent);
+
+    out->swap(extents);
+    return Status::OK();
+  }
+
   virtual const string& filename() const OVERRIDE {
     return filename_;
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/a57cb4ba/src/kudu/util/file_cache.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index 6d4e5d6..fc272af 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -260,6 +260,12 @@ class Descriptor<RWFile> : public RWFile {
     return opened.file()->Size(size);
   }
 
+  Status GetExtentMap(ExtentMap* out) const override {
+    ScopedOpenedDescriptor<RWFile> opened(&base_);
+    RETURN_NOT_OK(ReopenFileIfNecessary(&opened));
+    return opened.file()->GetExtentMap(out);
+  }
+
   const string& filename() const override {
     return base_.filename();
   }


[2/2] kudu git commit: subprocess: add KillAndWait() and allow customization of exit signal

Posted by ad...@apache.org.
subprocess: add KillAndWait() and allow customization of exit signal

This patch does two things:
- It introduces a KillAndWait() method that terminates and fully reaps a
  process.
- It allows one to customize the exit signal delivered to a subprocess when
  it goes out of scope. The default signal is SIGKILL which doesn't let
  subprocesses clean up after themselves.

Change-Id: Iaf31bd4ea6de2917521a9852714fb33cfbec1f61
Reviewed-on: http://gerrit.cloudera.org:8080/6741
Tested-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: David Ribeiro Alves <da...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/eb79a718
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/eb79a718
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/eb79a718

Branch: refs/heads/master
Commit: eb79a7183bb4dc69562dbc5e24006be777d23d62
Parents: a57cb4b
Author: Adar Dembo <ad...@cloudera.com>
Authored: Wed Apr 26 16:47:16 2017 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Sat Apr 29 00:53:46 2017 +0000

----------------------------------------------------------------------
 src/kudu/util/subprocess-test.cc | 48 +++++++++++++++++++++++++++++++
 src/kudu/util/subprocess.cc      | 54 ++++++++++++++++++++++++++++++-----
 src/kudu/util/subprocess.h       | 19 +++++++++++-
 3 files changed, 113 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/eb79a718/src/kudu/util/subprocess-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/subprocess-test.cc b/src/kudu/util/subprocess-test.cc
index dcfd43a..fb3d183 100644
--- a/src/kudu/util/subprocess-test.cc
+++ b/src/kudu/util/subprocess-test.cc
@@ -213,4 +213,52 @@ TEST_F(SubprocessTest, TestGetExitStatusSignaled) {
   }
 }
 
+TEST_F(SubprocessTest, TestSubprocessDestroyWithCustomSignal) {
+  string kTestFile = GetTestPath("foo");
+
+  // Start a subprocess that creates kTestFile immediately and deletes it on exit.
+  //
+  // Note: it's important that the shell not invoke a command while waiting
+  // to be killed (i.e. "sleep 60"); if it did, the signal could be delivered
+  // just after the command starts but just before the shell decides to forward
+  // signals to it, and we wind up with a deadlock.
+  vector<string> argv = {
+      "/bin/bash",
+      "-c",
+      Substitute(
+          // Delete kTestFile on exit.
+          "trap \"rm $0\" EXIT;"
+          // Create kTestFile on start.
+          "touch $0;"
+          // Spin in a tight loop waiting to be killed.
+          "while true;"
+          "  do FOO=$$((FOO + 1));"
+          "done", kTestFile)
+  };
+
+  {
+    Subprocess s(argv);
+    ASSERT_OK(s.Start());
+    AssertEventually([&]{
+        ASSERT_TRUE(env_->FileExists(kTestFile));
+    });
+  }
+
+  // The subprocess went out of scope and was killed with SIGKILL, so it left
+  // kTestFile behind.
+  ASSERT_TRUE(env_->FileExists(kTestFile));
+
+  ASSERT_OK(env_->DeleteFile(kTestFile));
+  {
+    Subprocess s(argv, SIGTERM);
+    ASSERT_OK(s.Start());
+    AssertEventually([&]{
+        ASSERT_TRUE(env_->FileExists(kTestFile));
+    });
+  }
+
+  // The subprocess was killed with SIGTERM, giving it a chance to delete kTestFile.
+  ASSERT_FALSE(env_->FileExists(kTestFile));
+}
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/eb79a718/src/kudu/util/subprocess.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/subprocess.cc b/src/kudu/util/subprocess.cc
index 897678d..ec032cd 100644
--- a/src/kudu/util/subprocess.cc
+++ b/src/kudu/util/subprocess.cc
@@ -46,8 +46,10 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/debug-util.h"
 #include "kudu/util/errno.h"
+#include "kudu/util/monotime.h"
 #include "kudu/util/path_util.h"
 #include "kudu/util/signal.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/status.h"
 
 using std::string;
@@ -64,6 +66,8 @@ using ::operator<<;
 
 namespace {
 
+static double kProcessWaitTimeoutSeconds = 5.0;
+
 static const char* kProcSelfFd =
 #if defined(__APPLE__)
   "/dev/fd";
@@ -235,13 +239,14 @@ Status ReadFdsFully(const string& progname,
 
 } // anonymous namespace
 
-Subprocess::Subprocess(vector<string> argv)
+Subprocess::Subprocess(vector<string> argv, int sig_on_destruct)
     : program_(argv[0]),
       argv_(std::move(argv)),
       state_(kNotStarted),
       child_pid_(-1),
       fd_state_(),
-      child_fds_() {
+      child_fds_(),
+      sig_on_destruct_(sig_on_destruct) {
   // By convention, the first argument in argv is the base name of the program.
   argv_[0] = BaseName(argv_[0]);
 
@@ -255,11 +260,12 @@ Subprocess::Subprocess(vector<string> argv)
 
 Subprocess::~Subprocess() {
   if (state_ == kRunning) {
-    LOG(WARNING) << "Child process " << child_pid_
-                 << "(" << JoinStrings(argv_, " ") << ") "
-                 << " was orphaned. Sending SIGKILL...";
-    WARN_NOT_OK(Kill(SIGKILL), "Failed to send SIGKILL");
-    WARN_NOT_OK(Wait(), "Failed to Wait()");
+    LOG(WARNING) << Substitute(
+        "Child process $0 ($1) was orphaned. Sending signal $2...",
+        child_pid_, JoinStrings(argv_, " "), sig_on_destruct_);
+    WARN_NOT_OK(KillAndWait(sig_on_destruct_),
+                Substitute("Failed to KillAndWait() with signal $0",
+                           sig_on_destruct_));
   }
 
   for (int i = 0; i < 3; ++i) {
@@ -498,6 +504,40 @@ Status Subprocess::Kill(int signal) {
   return Status::OK();
 }
 
+Status Subprocess::KillAndWait(int signal) {
+  string procname = Substitute("$0 (pid $1)", argv0(), pid());
+
+  // This is a fatal error because all errors in Kill() are signal-independent,
+  // so Kill(SIGKILL) is just as likely to fail if this did.
+  RETURN_NOT_OK_PREPEND(
+      Kill(signal), Substitute("Failed to send signal $0 to $1",
+                               signal, procname));
+  if (signal == SIGKILL) {
+    RETURN_NOT_OK_PREPEND(
+        Wait(), Substitute("Failed to wait on $0", procname));
+  } else {
+    Status s;
+    Stopwatch sw;
+    sw.start();
+    do {
+      s = WaitNoBlock();
+      if (s.ok()) {
+        break;
+      } else if (!s.IsTimedOut()) {
+        // An unexpected error in WaitNoBlock() is likely to manifest repeatedly,
+        // so there's no point in retrying this.
+        RETURN_NOT_OK_PREPEND(
+            s, Substitute("Unexpected failure while waiting on $0", procname));
+      }
+      SleepFor(MonoDelta::FromMilliseconds(10));
+    } while (sw.elapsed().wall_seconds() < kProcessWaitTimeoutSeconds);
+    if (s.IsTimedOut()) {
+      return KillAndWait(SIGKILL);
+    }
+  }
+  return Status::OK();
+}
+
 Status Subprocess::GetExitStatus(int* exit_status, string* info_str) const {
   if (state_ != kExited) {
     const string err_str = "Sub-process termination hasn't yet been detected";

http://git-wip-us.apache.org/repos/asf/kudu/blob/eb79a718/src/kudu/util/subprocess.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/subprocess.h b/src/kudu/util/subprocess.h
index 28927dc..9834e3a 100644
--- a/src/kudu/util/subprocess.h
+++ b/src/kudu/util/subprocess.h
@@ -17,6 +17,8 @@
 #ifndef KUDU_UTIL_SUBPROCESS_H
 #define KUDU_UTIL_SUBPROCESS_H
 
+#include <signal.h>
+
 #include <map>
 #include <string>
 #include <vector>
@@ -47,7 +49,11 @@ namespace kudu {
 // will be forcibly SIGKILLed to avoid orphaning processes.
 class Subprocess {
  public:
-  explicit Subprocess(std::vector<std::string> argv);
+  // Constructs a new Subprocess that will execute 'argv' on Start().
+  //
+  // If the process isn't explicitly killed, 'sig_on_destroy' will be delivered
+  // to it when the Subprocess goes out of scope.
+  explicit Subprocess(std::vector<std::string> argv, int sig_on_destruct = SIGKILL);
   ~Subprocess();
 
   // Disable subprocess stream output.  Must be called before subprocess starts.
@@ -100,6 +106,12 @@ class Subprocess {
   // in order to reap it. Only call after starting.
   Status Kill(int signal) WARN_UNUSED_RESULT;
 
+  // Sends a signal to the subprocess and waits for it to exit.
+  //
+  // If the signal is not SIGKILL and the process doesn't appear to be exiting,
+  // retries with SIGKILL.
+  Status KillAndWait(int signal);
+
   // Retrieve exit status of the process awaited by Wait() and/or WaitNoBlock()
   // methods. Must be called only after calling Wait()/WaitNoBlock().
   Status GetExitStatus(int* exit_status, std::string* info_str = nullptr) const
@@ -140,6 +152,7 @@ class Subprocess {
   int ReleaseChildStderrFd() { return ReleaseChildFd(STDERR_FILENO); }
 
   pid_t pid() const;
+  const std::string& argv0() const { return argv_[0]; }
 
  private:
   enum State {
@@ -167,6 +180,10 @@ class Subprocess {
   // Only valid if state_ == kExited.
   int wait_status_;
 
+  // Custom signal to deliver when the subprocess goes out of scope, provided
+  // the process hasn't already been killed.
+  int sig_on_destruct_;
+
   DISALLOW_COPY_AND_ASSIGN(Subprocess);
 };