You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2013/05/30 23:55:02 UTC

git commit: Added a retry option to cgroups::mount in order to deal with a bug with the kernel unmounting the hierarchy asynchronously causing subsequent mounts to fail.

Updated Branches:
  refs/heads/master 11e8a1dba -> 85b1f4af1


Added a retry option to cgroups::mount in order to deal with a bug
with the kernel unmounting the hierarchy asynchronously causing
subsequent mounts to fail.

From: Thomas Marshall <tw...@gmail.com>
Review: https://reviews.apache.org/r/11547


Project: http://git-wip-us.apache.org/repos/asf/incubator-mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mesos/commit/85b1f4af
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mesos/tree/85b1f4af
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mesos/diff/85b1f4af

Branch: refs/heads/master
Commit: 85b1f4af1ae54812993863422f8c087448360b79
Parents: 11e8a1d
Author: Benjamin Hindman <be...@twitter.com>
Authored: Thu May 30 14:53:40 2013 -0700
Committer: Benjamin Hindman <be...@twitter.com>
Committed: Thu May 30 14:53:40 2013 -0700

----------------------------------------------------------------------
 src/linux/cgroups.cpp       |   87 ++++++++++++++++++++++----------------
 src/linux/cgroups.hpp       |    9 +++-
 src/tests/cgroups_tests.cpp |   12 ++++-
 3 files changed, 66 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/85b1f4af/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index cfdc3b2..8d94fe6 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -169,7 +169,45 @@ static Try<map<string, SubsystemInfo> > subsystems()
 //          Error if the operation fails.
 static Try<Nothing> mount(const string& hierarchy, const string& subsystems)
 {
-  return fs::mount(subsystems, hierarchy, "cgroup", 0, subsystems.c_str());
+  if (os::exists(hierarchy)) {
+    return Error("'" + hierarchy + "' already exists in the file system");
+  }
+
+  // Make sure all subsystems are enabled and not busy.
+  foreach (const string& subsystem, strings::tokenize(subsystems, ",")) {
+    Try<bool> result = enabled(subsystem);
+    if (result.isError()) {
+      return Error(result.error());
+    } else if (!result.get()) {
+      return Error("'" + subsystem + "' is not enabled by the kernel");
+    }
+
+    result = busy(subsystem);
+    if (result.isError()) {
+      return Error(result.error());
+    } else if (result.get()) {
+      return Error(
+          "'" + subsystem + "' is already attached to another hierarchy");
+    }
+  }
+
+  // Create the directory for the hierarchy.
+  Try<Nothing> mkdir = os::mkdir(hierarchy);
+  if (mkdir.isError()) {
+    return Error(
+        "Failed to create directory '" + hierarchy + "': " + mkdir.error());
+  }
+
+  // Mount the virtual file system (attach subsystems).
+  Try<Nothing> result =
+    fs::mount(subsystems, hierarchy, "cgroup", 0, subsystems.c_str());
+  if (result.isError()) {
+    // Do a best effort rmdir of hierarchy (ignoring success or failure).
+    os::rmdir(hierarchy);
+    return result;
+  }
+
+  return Nothing();
 }
 
 
@@ -591,46 +629,21 @@ Try<set<string> > subsystems(const string& hierarchy)
 }
 
 
-Try<Nothing> mount(const string& hierarchy, const string& subsystems)
+Try<Nothing> mount(const string& hierarchy, const string& subsystems, int retry)
 {
-  if (os::exists(hierarchy)) {
-    return Error("'" + hierarchy + "' already exists in the file system");
-  }
-
-  // Make sure all subsystems are enabled and not busy.
-  foreach (const string& subsystem, strings::tokenize(subsystems, ",")) {
-    Try<bool> result = enabled(subsystem);
-    if (result.isError()) {
-      return Error(result.error());
-    } else if (!result.get()) {
-      return Error("'" + subsystem + "' is not enabled by the kernel");
-    }
+  Try<Nothing> mounted = internal::mount(hierarchy, subsystems);
 
-    result = busy(subsystem);
-    if (result.isError()) {
-      return Error(result.error());
-    } else if (result.get()) {
-      return Error(
-          "'" + subsystem + "' is already attached to another hierarchy");
-    }
-  }
-
-  // Create the directory for the hierarchy.
-  Try<Nothing> mkdir = os::mkdir(hierarchy);
-  if (mkdir.isError()) {
-    return Error(
-        "Failed to create directory '" + hierarchy + "': " + mkdir.error());
+  // TODO(tmarshall) The retry option was added as a fix for a kernel
+  // bug in Ubuntu 12.04 that resulted in cgroups not being entirely
+  // cleaned up even once they have been completely unmounted from the
+  // file system. We should reevaluate this in the future, and
+  // hopefully remove it once the bug is no longer an issue.
+  if (mounted.isError() && retry > 0) {
+    os::sleep(Milliseconds(100));
+    return cgroups::mount(hierarchy, subsystems, retry - 1);
   }
 
-  // Mount the virtual file system (attach subsystems).
-  Try<Nothing> result = internal::mount(hierarchy, subsystems);
-  if (result.isError()) {
-    // Do a best effort rmdir of hierarchy (ignoring success or failure).
-    os::rmdir(hierarchy);
-    return result;
-  }
-
-  return Nothing();
+  return mounted;
 }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/85b1f4af/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 84edc97..3989712 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -118,12 +118,17 @@ Try<std::set<std::string> > subsystems(const std::string& hierarchy);
 // hierarchy already exists.  Also, the function will return error if
 // a subsystem in the given subsystem list has already been attached
 // to another hierarchy. On success, the cgroups virtual file system
-// will be mounted with the proper subsystems attached.
+// will be mounted with the proper subsystems attached. On failure,
+// mount will be retried the specified number of times.
 // @param   hierarchy   Path to the hierarchy root.
 // @param   subsystems  Comma-separated subsystem names.
+// @param   retry       Number of times to retry mount.
 // @return  Some if the operation succeeds.
 //          Error if the operation fails.
-Try<Nothing> mount(const std::string& hierarchy, const std::string& subsystems);
+Try<Nothing> mount(
+    const std::string& hierarchy,
+    const std::string& subsystems,
+    int retry = 0);
 
 
 // Unmount a hierarchy and remove the directory associated with

http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/85b1f4af/src/tests/cgroups_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_tests.cpp b/src/tests/cgroups_tests.cpp
index f6b23c4..bc5f961 100644
--- a/src/tests/cgroups_tests.cpp
+++ b/src/tests/cgroups_tests.cpp
@@ -112,8 +112,10 @@ protected:
     Result<std::string> hierarchy_ = cgroups::hierarchy(subsystems);
     ASSERT_FALSE(hierarchy_.isError());
     if (hierarchy_.isNone()) {
-      // Try to mount a hierarchy for testing.
-      ASSERT_SOME(cgroups::mount(TEST_CGROUPS_HIERARCHY, subsystems))
+      // Try to mount a hierarchy for testing, retrying as necessary since the
+      // previous unmount might not have taken effect yet due to a bug in
+      // Ubuntu 12.04.
+      ASSERT_SOME(cgroups::mount(TEST_CGROUPS_HIERARCHY, subsystems, 10))
         << "-------------------------------------------------------------\n"
         << "We cannot run any cgroups tests that require\n"
         << "a hierarchy with subsystems '" << subsystems << "'\n"
@@ -247,7 +249,11 @@ TEST_F(CgroupsNoHierarchyTest, ROOT_CGROUPS_NOHIERARCHY_MountUnmountHierarchy)
 {
   EXPECT_ERROR(cgroups::mount("/tmp", "cpu"));
   EXPECT_ERROR(cgroups::mount(TEST_CGROUPS_HIERARCHY, "invalid"));
-  ASSERT_SOME(cgroups::mount(TEST_CGROUPS_HIERARCHY, "cpu,memory"));
+
+  // Try to mount a valid hierarchy, retrying as necessary since the
+  // previous unmount might not have taken effect yet due to a bug in
+  // Ubuntu 12.04.
+  ASSERT_SOME(cgroups::mount(TEST_CGROUPS_HIERARCHY, "cpu,memory", 10));
   EXPECT_ERROR(cgroups::mount(TEST_CGROUPS_HIERARCHY, "cpuset"));
   EXPECT_ERROR(cgroups::unmount("/tmp"));
   ASSERT_SOME(cgroups::unmount(TEST_CGROUPS_HIERARCHY));