You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/03/04 23:26:59 UTC

svn commit: r1452569 - in /incubator/mesos/trunk/src: slave/cgroups_isolation_module.cpp slave/cgroups_isolation_module.hpp slave/slave.cpp slave/slave.hpp tests/balloon_framework_test.sh

Author: vinodkone
Date: Mon Mar  4 22:26:58 2013
New Revision: 1452569

URL: http://svn.apache.org/r1452569
Log:
Properly cleaned up cgroups.

Review: https://reviews.apache.org/r/9408

Modified:
    incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp
    incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp
    incubator/mesos/trunk/src/slave/slave.cpp
    incubator/mesos/trunk/src/slave/slave.hpp
    incubator/mesos/trunk/src/tests/balloon_framework_test.sh

Modified: incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp?rev=1452569&r1=1452568&r2=1452569&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp Mon Mar  4 22:26:58 2013
@@ -38,6 +38,7 @@
 #include <stout/foreach.hpp>
 #include <stout/hashmap.hpp>
 #include <stout/lambda.hpp>
+#include <stout/none.hpp>
 #include <stout/numify.hpp>
 #include <stout/option.hpp>
 #include <stout/os.hpp>
@@ -177,7 +178,8 @@ std::ostream& operator << (std::ostream&
 
 CgroupsIsolationModule::CgroupsIsolationModule()
   : ProcessBase(ID::generate("cgroups-isolation-module")),
-    initialized(false)
+    initialized(false),
+    lockFile(None())
 {
   // Spawn the reaper, note that it might send us a message before we
   // actually get spawned ourselves, but that's okay, the message will
@@ -302,23 +304,27 @@ void CgroupsIsolationModule::initialize(
 
   // Try and put an _advisory_ file lock on the tasks' file of our
   // root cgroup to check and see if another slave is already running.
-  Try<int> fd = os::open(path::join(hierarchy, "mesos", "tasks"), O_RDONLY);
-  CHECK_SOME(fd);
-  Try<Nothing> cloexec = os::cloexec(fd.get());
+  Try<int> open = os::open(path::join(hierarchy, "mesos", "tasks"), O_RDONLY);
+  CHECK_SOME(open);
+
+  lockFile = open.get();
+  Try<Nothing> cloexec = os::cloexec(lockFile.get());
   CHECK_SOME(cloexec);
-  if (flock(fd.get(), LOCK_EX | LOCK_NB) != 0) {
+  if (flock(lockFile.get(), LOCK_EX | LOCK_NB) != 0) {
     EXIT(1) << "Another mesos-slave appears to be running!";
   }
 
   // Cleanup any orphaned cgroups created in previous executions (this
   // should be safe because we've been able to acquire the file lock).
+  // TODO(vinod): Wait for all the orphaned cgroups to be destroyed
+  // before moving on with the rest of the initialization.
   Try<vector<string> > cgroups = cgroups::get(hierarchy, "mesos");
   CHECK_SOME(cgroups) << "Failed to get nested cgroups of 'mesos'";
   foreach (const string& cgroup, cgroups.get()) {
     LOG(INFO) << "Removing orphaned cgroup '" << cgroup << "'";
     cgroups::destroy(hierarchy, cgroup)
       .onAny(defer(PID<CgroupsIsolationModule>(this),
-                   &CgroupsIsolationModule::destroyWaited,
+                   &CgroupsIsolationModule::_destroy,
                    cgroup,
                    lambda::_1));
   }
@@ -423,6 +429,17 @@ void CgroupsIsolationModule::initialize(
 }
 
 
+void CgroupsIsolationModule::finalize()
+{
+  // Unlock the advisory file.
+  CHECK_SOME(lockFile) << "Uninitialized file descriptor!";
+  if (flock(lockFile.get(), LOCK_UN) != 0) {
+    PLOG(FATAL)
+      << "Failed to unlock '" << path::join(hierarchy, "mesos", "tasks");
+  }
+}
+
+
 void CgroupsIsolationModule::launchExecutor(
     const FrameworkID& frameworkId,
     const FrameworkInfo& frameworkInfo,
@@ -539,19 +556,17 @@ void CgroupsIsolationModule::killExecuto
     info->oomNotifier.discard();
   }
 
+  info->killed = true;
+
   // Destroy the cgroup that is associated with the executor. Here, we don't
   // wait for it to succeed as we don't want to block the isolation module.
   // Instead, we register a callback which will be invoked when its result is
   // ready.
   cgroups::destroy(hierarchy, info->name())
     .onAny(defer(PID<CgroupsIsolationModule>(this),
-                 &CgroupsIsolationModule::destroyWaited,
-                 info->name(),
+                 &CgroupsIsolationModule::_killExecutor,
+                 info,
                  lambda::_1));
-
-  // We do not unregister the cgroup info here, instead, we ask the process
-  // exit handler to unregister the cgroup info.
-  info->killed = true;
 }
 
 
@@ -630,25 +645,16 @@ void CgroupsIsolationModule::processExit
     FrameworkID frameworkId = info->frameworkId;
     ExecutorID executorId = info->executorId;
 
-    LOG(INFO) << "Telling slave of terminated executor " << executorId
-              << " of framework " << frameworkId;
+    LOG(INFO) << "Executor " << executorId
+              << " of framework " << frameworkId
+              << " terminated with status " << status;
 
-    // TODO(vinod): Consider sending this message when the cgroup is
-    // completely destroyed (i.e., inside destroyWaited()).
-    // The tricky bit is to get the exit 'status' of the executor process.
-    dispatch(slave,
-             &Slave::executorTerminated,
-             info->frameworkId,
-             info->executorId,
-             status,
-             info->destroyed,
-             info->reason);
+    // Set the exit status, so that '_killExecutor()' can send it to the slave.
+    info->status = status;
 
     if (!info->killed) {
       killExecutor(frameworkId, executorId);
     }
-
-    unregisterCgroupInfo(frameworkId, executorId);
   }
 }
 
@@ -895,14 +901,54 @@ void CgroupsIsolationModule::oom(
 }
 
 
-void CgroupsIsolationModule::destroyWaited(
+void CgroupsIsolationModule::_destroy(
     const string& cgroup,
     const Future<bool>& future)
 {
+  CHECK(initialized) << "Cannot destroy cgroups before initialization";
+
   if (future.isReady()) {
-    LOG(INFO) << "Successfully destroyed the cgroup " << cgroup;
+    LOG(INFO) << "Successfully destroyed cgroup " << cgroup;
+  } else {
+    LOG(FATAL) << "Failed to destroy cgroup " << cgroup
+               << ": " << future.failure();
+  }
+}
+
+
+void CgroupsIsolationModule::_killExecutor(
+    CgroupInfo* info,
+    const Future<bool>& future)
+{
+  CHECK(initialized) << "Cannot kill executors before initialization";
+
+  CHECK_NOTNULL(info);
+
+  if (future.isReady()) {
+    LOG(INFO) << "Successfully destroyed cgroup " << info->name();
+
+    CHECK(info->killed)
+      << "Unexpectedly alive executor " << info->executorId
+      << " of framework " << info->frameworkId;
+
+    // NOTE: The exit status of the executor might not be set if this
+    // function is called before 'processTerminated()' is called.
+    // TODO(vinod): When reaper returns a future instead of issuing a callback,
+    // wait for that future to be ready and grab the exit status.
+    dispatch(slave,
+             &Slave::executorTerminated,
+             info->frameworkId,
+             info->executorId,
+             info->status,
+             info->destroyed,
+             info->reason);
+
+    // We make a copy here because 'info' will be deleted when we unregister.
+    unregisterCgroupInfo(
+        utils::copy(info->frameworkId),
+        utils::copy(info->executorId));
   } else {
-    LOG(FATAL) << "Failed to destroy the cgroup " << cgroup
+    LOG(FATAL) << "Failed to destroy cgroup " << info->name()
                << ": " << future.failure();
   }
 }
@@ -919,6 +965,7 @@ CgroupsIsolationModule::CgroupInfo* Cgro
   info->pid = -1;
   info->killed = false;
   info->destroyed = false;
+  info->status = -1;
   info->reason = "";
   if (subsystems.contains("cpuset")) {
     info->cpuset = new Cpuset();

Modified: incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp?rev=1452569&r1=1452568&r2=1452569&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp Mon Mar  4 22:26:58 2013
@@ -87,6 +87,8 @@ public:
       bool local,
       const process::PID<Slave>& slave);
 
+  virtual void finalize();
+
   virtual void launchExecutor(
       const FrameworkID& frameworkId,
       const FrameworkInfo& frameworkInfo,
@@ -155,6 +157,8 @@ private:
 
     std::string reason; // The reason behind the destruction.
 
+    int status; // Exit status of the executor.
+
     // Used to cancel the OOM listening.
     process::Future<uint64_t> oomNotifier;
 
@@ -215,9 +219,17 @@ private:
       const std::string& tag);
 
   // This callback is invoked when destroy cgroup has a result.
+  // @param   info        The information of cgroup that is being destroyed.
+  // @param   future      The future describing the destroy process.
+  void _killExecutor(
+      CgroupInfo* info,
+      const process::Future<bool>& future);
+
+  // This callback is invoked when destroying orphaned cgroups from the
+  // previous slave execution.
   // @param   cgroup        The cgroup that is being destroyed.
   // @param   future        The future describing the destroy process.
-  void destroyWaited(
+  void _destroy(
       const std::string& cgroup,
       const process::Future<bool>& future);
 
@@ -255,6 +267,10 @@ private:
   bool initialized;
   Reaper* reaper;
 
+  // File descriptor to 'mesos/tasks' file in the cgroup on which we place
+  // an advisory lock.
+  Option<int> lockFile;
+
   // The cgroup information for each live executor.
   hashmap<FrameworkID, hashmap<ExecutorID, CgroupInfo*> > infos;
 

Modified: incubator/mesos/trunk/src/slave/slave.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.cpp?rev=1452569&r1=1452568&r2=1452569&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.cpp (original)
+++ incubator/mesos/trunk/src/slave/slave.cpp Mon Mar  4 22:26:58 2013
@@ -357,6 +357,7 @@ void Slave::finalize()
   }
 
   // Stop the isolation module.
+  // TODO(vinod): Wait until all the executors have terminated.
   terminate(isolationModule);
   wait(isolationModule);
 }
@@ -829,7 +830,9 @@ void Slave::registerExecutor(
     // RunTaskMessages.
     dispatch(isolationModule,
              &IsolationModule::resourcesChanged,
-             framework->id, executor->id, executor->resources);
+             framework->id,
+             executor->id,
+             executor->resources);
 
     // Tell executor it's registered and give it any queued tasks.
     ExecutorRegisteredMessage message;
@@ -1164,6 +1167,14 @@ void Slave::executorTerminated(
     .onAny(defer(self(), &Self::detachFile, params::_1, executor->directory));
 
   framework->destroyExecutor(executor->id);
+
+  // Cleanup if this framework has no executors running.
+  if (framework->executors.size() == 0) {
+    frameworks.erase(framework->id);
+
+    // Pass ownership of the framework pointer.
+    completedFrameworks.push_back(std::tr1::shared_ptr<Framework>(framework));
+  }
 }
 
 
@@ -1197,7 +1208,9 @@ void Slave::shutdownExecutor(Framework* 
   delay(flags.executor_shutdown_grace_period,
         self(),
         &Slave::shutdownExecutorTimeout,
-        framework->id, executor->id, executor->uuid);
+        framework->id,
+        executor->id,
+        executor->uuid);
 }
 
 
@@ -1221,20 +1234,6 @@ void Slave::shutdownExecutorTimeout(
              &IsolationModule::killExecutor,
              framework->id,
              executor->id);
-
-    // Schedule the executor directory to get garbage collected.
-    gc.schedule(flags.gc_delay, executor->directory)
-      .onAny(defer(self(), &Self::detachFile, params::_1, executor->directory));;
-
-    framework->destroyExecutor(executor->id);
-  }
-
-  // Cleanup if this framework has no executors running.
-  if (framework->executors.size() == 0) {
-    frameworks.erase(framework->id);
-
-    // Pass ownership of the framework pointer.
-    completedFrameworks.push_back(std::tr1::shared_ptr<Framework>(framework));
   }
 }
 

Modified: incubator/mesos/trunk/src/slave/slave.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.hpp?rev=1452569&r1=1452568&r2=1452569&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.hpp (original)
+++ incubator/mesos/trunk/src/slave/slave.hpp Mon Mar  4 22:26:58 2013
@@ -218,6 +218,8 @@ private:
   Attributes attributes;
 
   hashmap<FrameworkID, Framework*> frameworks;
+
+  // TODO(bmahler): Use the Owned abstraction.
   boost::circular_buffer<std::tr1::shared_ptr<Framework> > completedFrameworks;
 
   IsolationModule* isolationModule;

Modified: incubator/mesos/trunk/src/tests/balloon_framework_test.sh
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/tests/balloon_framework_test.sh?rev=1452569&r1=1452568&r2=1452569&view=diff
==============================================================================
--- incubator/mesos/trunk/src/tests/balloon_framework_test.sh (original)
+++ incubator/mesos/trunk/src/tests/balloon_framework_test.sh Mon Mar  4 22:26:58 2013
@@ -9,12 +9,45 @@ source ${MESOS_SOURCE_DIR}/support/atexi
 # TODO(benh): Look for an existing hierarchy first.
 HIERARCHY=/cgroup
 
-# Check if the hierarchy exists, if it doesn't we want to make sure we
-# remove it if we create it.
+# Check if the hierarchy exists. If it doesn't, we want to make sure we
+# remove it, since we will create it.
+unmount=false
 if [[ ! -d ${HIERARCHY} ]]; then
-    atexit "(rmdir ${HIERARCHY}/mesos; unmount ${HIERARCHY}; rmdir ${HIERARCHY})"
+  unmount=true
 fi
 
+MASTER_PID=
+SLAVE_PID=
+
+# This function ensures that we first kill the slave (if present) and
+# then cleanup the cgroups. This is necessary because a running slave
+# holds an advisory lock that disallows cleaning up cgroups.
+# This function is not pure, but depends on state from the environment
+# (e.g. ${SLAVE_PID}) because we do not all possible values about when we
+# register with 'atexit'.
+function cleanup() {
+  # Make sure we kill the master on exit.
+  if [[ ! -z ${MASTER_PID} ]]; then
+    kill ${MASTER_PID}
+  fi
+
+  # Make sure we kill the slave on exit.
+  if [[ ! -z ${SLAVE_PID} ]]; then
+    kill ${SLAVE_PID}
+  fi
+
+  # Make sure we cleanup any cgroups we created on exiting.
+  find ${HIERARCHY}/mesos/* -depth -type d | xargs -r rmdir
+
+  # Make sure we cleanup the hierarchy, if we created it.
+  # NOTE: We do a sleep here, to ensure the hierarchy is not busy.
+  if ${unmount}; then
+   rmdir ${HIERARCHY}/mesos; sleep 1; umount ${HIERARCHY}; rmdir ${HIERARCHY}
+  fi
+}
+
+atexit cleanup
+
 # Launch master.
 ${MESOS_BUILD_DIR}/src/mesos-master --port=5432 &
 MASTER_PID=${!}
@@ -29,8 +62,6 @@ if [[ ${STATUS} -ne 0 ]]; then
   exit 2
 fi
 
-# Make sure we kill the master on exit.
-atexit "kill ${MASTER_PID}"
 
 # Launch slave.
 ${MESOS_BUILD_DIR}/src/mesos-slave \
@@ -50,12 +81,6 @@ if [[ ${STATUS} -ne 0 ]]; then
   exit 2
 fi
 
-# Make sure we kill the slave on exit.
-atexit "kill ${SLAVE_PID}"
-
-# Make sure we cleanup any cgroups we created on exiting.
-atexit "find ${HIERARCHY}/mesos/* -depth -type d | xargs -r rmdir"
-
 # The main event!
 ${MESOS_BUILD_DIR}/src/balloon-framework localhost:5432 1024
 STATUS=${?}