You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/02/12 02:31:09 UTC

[03/11] git commit: Containerizer - isolators (part 3).

Containerizer - isolators (part 3).

Isolators perform isolator for the MesosContainerizer.

Isolator interface and implementations of Posix CPU and Mem isolators
(no isolation, just usage())

Review: https://reviews.apache.org/r/16150


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d5266b8c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d5266b8c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d5266b8c

Branch: refs/heads/master
Commit: d5266b8c9c76cccea6b7a70c78ec4c81b4a9a6b4
Parents: 71c6b5c
Author: Ian Downes <ia...@gmail.com>
Authored: Tue Feb 11 16:28:20 2014 -0800
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Feb 11 16:59:50 2014 -0800

----------------------------------------------------------------------
 src/Makefile.am                             |   3 +
 src/slave/containerizer/isolator.cpp        | 104 ++++++++++
 src/slave/containerizer/isolator.hpp        | 144 +++++++++++++
 src/slave/containerizer/isolators/posix.hpp | 252 +++++++++++++++++++++++
 4 files changed, 503 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index a4b5a52..d9cb9e9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -177,6 +177,7 @@ libmesos_no_3rdparty_la_SOURCES =					\
 	slave/slave.cpp							\
 	slave/http.cpp							\
 	slave/containerizer/containerizer.cpp				\
+	slave/containerizer/isolator.cpp				\
 	slave/containerizer/launcher.cpp				\
 	slave/containerizer/mesos_containerizer.cpp			\
 	slave/status_update_manager.cpp					\
@@ -233,6 +234,8 @@ libmesos_no_3rdparty_la_SOURCES += common/attributes.hpp		\
 	messages/messages.hpp slave/constants.hpp			\
 	slave/containerizer/cgroups_launcher.hpp			\
 	slave/containerizer/containerizer.hpp				\
+	slave/containerizer/isolator.hpp				\
+	slave/containerizer/isolators/posix.hpp				\
 	slave/containerizer/launcher.hpp				\
 	slave/containerizer/mesos_containerizer.hpp			\
 	slave/flags.hpp slave/gc.hpp slave/monitor.hpp			\

http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/slave/containerizer/isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolator.cpp b/src/slave/containerizer/isolator.cpp
new file mode 100644
index 0000000..f7935b3
--- /dev/null
+++ b/src/slave/containerizer/isolator.cpp
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <process/dispatch.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+using namespace process;
+
+using std::string;
+using std::list;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+
+Isolator::Isolator(Owned<IsolatorProcess> _process)
+  : process(_process)
+{
+  process::spawn(CHECK_NOTNULL(process.get()));
+}
+
+
+Isolator::~Isolator()
+{
+  process::terminate(process.get());
+  process::wait(process.get());
+}
+
+
+Future<Nothing> Isolator::recover(const list<state::RunState>& state)
+{
+  return dispatch(process.get(), &IsolatorProcess::recover, state);
+}
+
+
+Future<Nothing> Isolator::prepare(
+    const ContainerID& containerId,
+    const ExecutorInfo& executorInfo)
+{
+  return dispatch(process.get(),
+                  &IsolatorProcess::prepare,
+                  containerId,
+                  executorInfo);
+}
+
+
+Future<Option<CommandInfo> > Isolator::isolate(
+    const ContainerID& containerId,
+    pid_t pid)
+{
+  return dispatch(process.get(), &IsolatorProcess::isolate, containerId, pid);
+}
+
+
+Future<Limitation> Isolator::watch(const ContainerID& containerId)
+{
+  return dispatch(process.get(), &IsolatorProcess::watch, containerId);
+}
+
+
+Future<Nothing> Isolator::update(
+    const ContainerID& containerId,
+    const Resources& resources)
+{
+  return dispatch(
+      process.get(),
+      &IsolatorProcess::update,
+      containerId,
+      resources);
+}
+
+
+Future<ResourceStatistics> Isolator::usage(
+    const ContainerID& containerId) const
+{
+  return dispatch(process.get(), &IsolatorProcess::usage, containerId);
+}
+
+
+Future<Nothing> Isolator::cleanup(const ContainerID& containerId)
+{
+  return dispatch(process.get(), &IsolatorProcess::cleanup, containerId);
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/slave/containerizer/isolator.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolator.hpp b/src/slave/containerizer/isolator.hpp
new file mode 100644
index 0000000..fc6c9ab
--- /dev/null
+++ b/src/slave/containerizer/isolator.hpp
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ISOLATOR_HPP__
+#define __ISOLATOR_HPP__
+
+#include <list>
+#include <string>
+
+#include <process/dispatch.hpp>
+#include <process/future.hpp>
+#include <process/owned.hpp>
+#include <process/process.hpp>
+
+#include <stout/try.hpp>
+
+#include "slave/flags.hpp"
+#include "slave/state.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// Forward declaration.
+class IsolatorProcess;
+
+// Information when an executor is impacted by a resource limitation and should
+// be terminated. Intended to support resources like memory where the Linux
+// kernel may invoke the OOM killer, killing some/all of a container's
+// processes.
+struct Limitation
+{
+  Limitation(
+      const Resource& _resource,
+      const std::string& _message)
+    : resource(_resource),
+      message(_message) {}
+
+  // Resource (type and value) that triggered the limitation.
+  const Resource resource;
+  // Description of the limitation.
+  const std::string message;
+};
+
+
+class Isolator
+{
+public:
+  Isolator(process::Owned<IsolatorProcess> process);
+  ~Isolator();
+
+  // Recover containers from the run states.
+  process::Future<Nothing> recover(
+      const std::list<state::RunState>& states);
+
+  // Prepare for isolation of the executor.
+  process::Future<Nothing> prepare(
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo);
+
+  // Isolate the executor. Any steps that require execution in the
+  // containerized context (e.g. inside a network namespace) can be returned in
+  // the optional CommandInfo and they will be run by the Launcher.  This could
+  // be a simple command or a URI (including a local file) that will be fetched
+  // and executed.
+  process::Future<Option<CommandInfo> > isolate(
+      const ContainerID& containerId,
+      pid_t pid);
+
+  // Watch the containerized executor and report if any resource constraint
+  // impacts the container, e.g., the kernel killing some processes.
+  process::Future<Limitation> watch(const ContainerID& containerId);
+
+  // Update the resources allocated to the container.
+  process::Future<Nothing> update(
+      const ContainerID& containerId,
+      const Resources& resources);
+
+  // Gather resource usage statistics for the container.
+  process::Future<ResourceStatistics> usage(
+      const ContainerID& containerId) const;
+
+  // Clean up a terminated container. This is called after the executor and all
+  // processes in the container have terminated.
+  process::Future<Nothing> cleanup(const ContainerID& containerId);
+
+private:
+  Isolator(const Isolator&); // Not copyable.
+  Isolator& operator=(const Isolator&); // Not assignable.
+
+  process::Owned<IsolatorProcess> process;
+};
+
+
+class IsolatorProcess : public process::Process<IsolatorProcess>
+{
+public:
+  virtual ~IsolatorProcess() {}
+
+  virtual process::Future<Nothing> recover(
+      const std::list<state::RunState>& state) = 0;
+
+  virtual process::Future<Nothing> prepare(
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo) = 0;
+
+  virtual process::Future<Option<CommandInfo> > isolate(
+      const ContainerID& containerId,
+      pid_t pid) = 0;
+
+  virtual process::Future<Limitation> watch(
+      const ContainerID& containerId) = 0;
+
+  virtual process::Future<Nothing> update(
+      const ContainerID& containerId,
+      const Resources& resources) = 0;
+
+  virtual process::Future<ResourceStatistics> usage(
+      const ContainerID& containerId) = 0;
+
+  virtual process::Future<Nothing> cleanup(const ContainerID& containerId) = 0;
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __ISOLATOR_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/d5266b8c/src/slave/containerizer/isolators/posix.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/posix.hpp b/src/slave/containerizer/isolators/posix.hpp
new file mode 100644
index 0000000..7fbc6dd
--- /dev/null
+++ b/src/slave/containerizer/isolators/posix.hpp
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __POSIX_ISOLATOR_HPP__
+#define __POSIX_ISOLATOR_HPP__
+
+#include <stout/hashmap.hpp>
+#include <stout/os/pstree.hpp>
+
+#include <process/future.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+// A basic IsolatorProcess that keeps track of the pid but doesn't do any
+// resource isolation. Subclasses must implement usage() for their appropriate
+// resource(s).
+class PosixIsolatorProcess : public IsolatorProcess
+{
+public:
+  virtual process::Future<Nothing> recover(
+      const std::list<state::RunState>& state)
+  {
+    foreach (const state::RunState& run, state) {
+      if (!run.id.isSome()) {
+        return process::Failure("ContainerID is required to recover");
+      }
+
+      if (!run.forkedPid.isSome()) {
+        return process::Failure("Executor pid is required to recover");
+      }
+
+      // This should (almost) never occur: see comment in
+      // PosixLauncher::recover().
+      if (pids.contains(run.id.get())) {
+        return process::Failure("Container already recovered");
+      }
+
+      pids.put(run.id.get(), run.forkedPid.get());
+
+      process::Owned<process::Promise<Limitation> > promise(
+          new process::Promise<Limitation>());
+      promises.put(run.id.get(), promise);
+    }
+
+    return Nothing();
+  }
+
+  virtual process::Future<Nothing> prepare(
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo)
+  {
+    if (promises.contains(containerId)) {
+      return process::Failure("Container " + stringify(containerId) +
+                              " has already been prepared");
+    }
+
+    process::Owned<process::Promise<Limitation> > promise(
+        new process::Promise<Limitation>());
+    promises.put(containerId, promise);
+
+    return Nothing();
+  }
+
+  virtual process::Future<Option<CommandInfo> > isolate(
+      const ContainerID& containerId,
+      pid_t pid)
+  {
+    if (!promises.contains(containerId)) {
+      return process::Failure("Unknown container: " + stringify(containerId));
+    }
+
+    pids.put(containerId, pid);
+
+    return None();
+  }
+
+  virtual process::Future<Limitation> watch(
+      const ContainerID& containerId)
+  {
+    if (!promises.contains(containerId)) {
+      return process::Failure("Unknown container: " + stringify(containerId));
+    }
+
+    return promises[containerId]->future();
+  }
+
+  virtual process::Future<Nothing> update(
+      const ContainerID& containerId,
+      const Resources& resources)
+  {
+    if (!promises.contains(containerId)) {
+      return process::Failure("Unknown container: " + stringify(containerId));
+    }
+
+    // No resources are actually isolated so nothing to do.
+    return Nothing();
+  }
+
+  virtual process::Future<Nothing> cleanup(const ContainerID& containerId)
+  {
+    if (!promises.contains(containerId)) {
+      return process::Failure("Unknown container: " + stringify(containerId));
+    }
+
+    // TODO(idownes): We should discard the container's promise here to signal
+    // to anyone that holds the future from watch().
+    promises.erase(containerId);
+
+    pids.erase(containerId);
+
+    return Nothing();
+  }
+
+protected:
+  hashmap<ContainerID, pid_t> pids;
+  hashmap<ContainerID,
+          process::Owned<process::Promise<Limitation> > > promises;
+};
+
+
+class PosixCpuIsolatorProcess : public PosixIsolatorProcess
+{
+public:
+  static Try<Isolator*> create(const Flags& flags)
+  {
+    process::Owned<IsolatorProcess> process(new PosixCpuIsolatorProcess());
+
+    return new Isolator(process);
+  }
+
+  virtual process::Future<ResourceStatistics> usage(
+      const ContainerID& containerId)
+  {
+    if (!pids.contains(containerId)) {
+      LOG(WARNING) << "No resource usage for unknown container '"
+                   << containerId << "'";
+      return ResourceStatistics();
+    }
+
+    Try<os::ProcessTree> tree = os::pstree(pids.get(containerId).get());
+
+    if (!tree.isSome()) {
+      return ResourceStatistics();
+    }
+
+    ResourceStatistics result;
+
+    std::deque<os::ProcessTree> trees;
+    trees.push_back(tree.get());
+
+    while (!trees.empty()) {
+      os::ProcessTree root = trees.front();
+
+      // We only show utime and stime when both are available, otherwise
+      // we're exposing a partial view of the CPU times.
+      if (root.process.utime.isSome() && root.process.stime.isSome()) {
+        result.set_cpus_user_time_secs(
+            result.cpus_user_time_secs() + root.process.utime.get().secs());
+        result.set_cpus_system_time_secs(
+            result.cpus_system_time_secs() + root.process.stime.get().secs());
+      }
+
+      trees.pop_front();
+      foreach (const os::ProcessTree& child, root.children) {
+        trees.push_back(child);
+      }
+    }
+
+    return result;
+  }
+
+private:
+  PosixCpuIsolatorProcess() {}
+};
+
+
+class PosixMemIsolatorProcess : public PosixIsolatorProcess
+{
+public:
+  static Try<Isolator*> create(const Flags& flags)
+  {
+    process::Owned<IsolatorProcess> process(new PosixMemIsolatorProcess());
+
+    return new Isolator(process);
+  }
+
+  virtual process::Future<ResourceStatistics> usage(
+      const ContainerID& containerId)
+  {
+    if (!pids.contains(containerId)) {
+      LOG(WARNING) << "No resource usage for unknown container '"
+                   << containerId << "'";
+      return ResourceStatistics();
+    }
+
+    Try<os::ProcessTree> tree = os::pstree(pids.get(containerId).get());
+
+    if (!tree.isSome()) {
+      return ResourceStatistics();
+    }
+
+    ResourceStatistics result;
+
+    std::deque<os::ProcessTree> trees;
+    trees.push_back(tree.get());
+
+    while (!trees.empty()) {
+      os::ProcessTree root = trees.front();
+
+      if (root.process.rss.isSome()) {
+        result.set_mem_rss_bytes(
+            result.mem_rss_bytes() + root.process.rss.get().bytes());
+      }
+
+      trees.pop_front();
+      foreach (const os::ProcessTree& child, root.children) {
+        trees.push_back(child);
+      }
+    }
+
+    return result;
+  }
+
+private:
+  PosixMemIsolatorProcess() {}
+};
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __POSIX_ISOLATOR_HPP__