You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/07/06 21:54:06 UTC

[1/5] mesos git commit: Made Mesos containerizer error messages more consistent.

Repository: mesos
Updated Branches:
  refs/heads/master b16dbb2c2 -> 13c2020d4


Made Mesos containerizer error messages more consistent.

We've been using slightly different wordings of the same condition in
multiple places in Mesos containerizer but they don't provide
additional information about where this failure is thrown in a long
continuation chain. Since failures don't capture the location in the
code we'd better distinguish them in a more meaningful way to assist
debugging.

Review: https://reviews.apache.org/r/49653


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/13c2020d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/13c2020d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/13c2020d

Branch: refs/heads/master
Commit: 13c2020d429d6d000bb37649f2a1be47de5b8f8c
Parents: 8907b5d
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Fri Jul 1 18:12:01 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 6 14:53:46 2016 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 29 +++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/13c2020d/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index fe6ddc8..e900d75 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -885,14 +885,14 @@ Future<bool> MesosContainerizerProcess::_launch(
   // and its dependencies finish before '_launch' starts since onAny
   // is not guaranteed to be executed in order.
   if (!containers_.contains(containerId)) {
-    return Failure("Container has been destroyed");
+    return Failure("Container destroyed during provisioning");
   }
 
   // Make sure containerizer is not in DESTROYING state, to avoid
   // a possible race that containerizer is destroying the container
   // while it is provisioning the image from volumes.
   if (containers_[containerId]->state == DESTROYING) {
-    return Failure("Container is currently being destroyed");
+    return Failure("Container is being destroyed during provisioning");
   }
 
   CHECK_EQ(containers_[containerId]->state, PROVISIONING);
@@ -988,14 +988,14 @@ Future<list<Option<ContainerLaunchInfo>>> MesosContainerizerProcess::prepare(
   // 'prepare' starts since onAny is not guaranteed to be executed
   // in order.
   if (!containers_.contains(containerId)) {
-    return Failure("Container has been destroyed");
+    return Failure("Container destroyed during provisioning");
   }
 
   // Make sure containerizer is not in DESTROYING state, to avoid
   // a possible race that containerizer is destroying the container
   // while it is preparing isolators for the container.
   if (containers_[containerId]->state == DESTROYING) {
-    return Failure("Container is currently being destroyed");
+    return Failure("Container is being destroyed during provisioning");
   }
 
   CHECK_EQ(containers_[containerId]->state, PROVISIONING);
@@ -1053,11 +1053,11 @@ Future<Nothing> MesosContainerizerProcess::fetch(
     const SlaveID& slaveId)
 {
   if (!containers_.contains(containerId)) {
-    return Failure("Container is already destroyed");
+    return Failure("Container destroyed during isolating");
   }
 
   if (containers_[containerId]->state == DESTROYING) {
-    return Failure("Container is currently being destroyed");
+    return Failure("Container is being destroyed during isolating");
   }
 
   CHECK_EQ(containers_[containerId]->state, ISOLATING);
@@ -1093,11 +1093,11 @@ Future<bool> MesosContainerizerProcess::__launch(
     const list<Option<ContainerLaunchInfo>>& launchInfos)
 {
   if (!containers_.contains(containerId)) {
-    return Failure("Container has been destroyed");
+    return Failure("Container destroyed during preparing");
   }
 
   if (containers_[containerId]->state == DESTROYING) {
-    return Failure("Container is currently being destroyed");
+    return Failure("Container is being destroyed during preparing");
   }
 
   CHECK_EQ(containers_[containerId]->state, PREPARING);
@@ -1356,11 +1356,11 @@ Future<bool> MesosContainerizerProcess::isolate(
     pid_t _pid)
 {
   if (!containers_.contains(containerId)) {
-    return Failure("Container is already destroyed");
+    return Failure("Container destroyed during preparing");
   }
 
   if (containers_[containerId]->state == DESTROYING) {
-    return Failure("Container is currently being destroyed");
+    return Failure("Container is being destroyed during preparing");
   }
 
   CHECK_EQ(containers_[containerId]->state, PREPARING);
@@ -1397,9 +1397,12 @@ Future<bool> MesosContainerizerProcess::exec(
 {
   // The container may be destroyed before we exec the executor so
   // return failure here.
-  if (!containers_.contains(containerId) ||
-      containers_[containerId]->state == DESTROYING) {
-    return Failure("Container destroyed during launch");
+  if (!containers_.contains(containerId)) {
+    return Failure("Container destroyed during fetching");
+  }
+
+  if (containers_[containerId]->state == DESTROYING) {
+    return Failure("Container is being destroyed during fetching");
   }
 
   CHECK_EQ(containers_[containerId]->state, FETCHING);


[3/5] mesos git commit: Fixed Mesos containerizer to set container FETCHING state.

Posted by ya...@apache.org.
Fixed Mesos containerizer to set container FETCHING state.

If the container state is not properly set to FETCHING, Mesos agent
cannot detect the terminated executor when the fetcher times out.

Review: https://reviews.apache.org/r/49650


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/114474c4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/114474c4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/114474c4

Branch: refs/heads/master
Commit: 114474c443678997da8f931a41703f1095206421
Parents: b16dbb2
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Fri Jul 1 15:27:37 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 6 14:53:46 2016 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/114474c4/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index f53b01b..5a6ec85 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -1052,6 +1052,12 @@ Future<Nothing> MesosContainerizerProcess::fetch(
     return Failure("Container is already destroyed");
   }
 
+  if (containers_[containerId]->state == DESTROYING) {
+    return Failure("Container is currently being destroyed");
+  }
+
+  containers_[containerId]->state = FETCHING;
+
   return fetcher->fetch(
       containerId,
       commandInfo,
@@ -1620,10 +1626,6 @@ void MesosContainerizerProcess::destroy(
     return;
   }
 
-  if (container->state == FETCHING) {
-    fetcher->kill(containerId);
-  }
-
   if (container->state == ISOLATING) {
     VLOG(1) << "Waiting for the isolators to complete for container '"
             << containerId << "'";
@@ -1638,6 +1640,11 @@ void MesosContainerizerProcess::destroy(
     return;
   }
 
+  // Either RUNNING or FETCHING at this point.
+  if (container->state == FETCHING) {
+    fetcher->kill(containerId);
+  }
+
   container->state = DESTROYING;
   _destroy(containerId);
 }


[2/5] mesos git commit: Improved Mesos containerizer logging and documentation.

Posted by ya...@apache.org.
Improved Mesos containerizer logging and documentation.

Review: https://reviews.apache.org/r/49651


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/48b1bfa6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/48b1bfa6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/48b1bfa6

Branch: refs/heads/master
Commit: 48b1bfa6ec5f88ceea327ae3c5345fd4d11442c7
Parents: dc18dd7
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Fri Jul 1 15:25:54 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 6 14:53:46 2016 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 21 +++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/48b1bfa6/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index e946a28..0b2abba 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -1414,7 +1414,10 @@ Future<containerizer::Termination> MesosContainerizerProcess::wait(
     const ContainerID& containerId)
 {
   if (!containers_.contains(containerId)) {
-    return Failure("Unknown container: " + stringify(containerId));
+    // See the comments in destroy() for race conditions which lead
+    // to "unknown containers".
+    return Failure("Unknown container (could have already been destroyed): " +
+                   stringify(containerId));
   }
 
   return containers_[containerId]->promise.future();
@@ -1578,14 +1581,26 @@ void MesosContainerizerProcess::destroy(
     const ContainerID& containerId)
 {
   if (!containers_.contains(containerId)) {
-    LOG(WARNING) << "Ignoring destroy of unknown container: " << containerId;
+    // This can happen due to the race between destroys initiated by
+    // the launch failure, the terminated executor and the agent so
+    // the same container is destroyed multiple times in reaction to
+    // one failure. e.g., a stuck fetcher results in:
+    // - The agent invoking destroy(), which kills the fetcher and
+    //   the executor.
+    // - The agent invoking destroy() again for the failed launch
+    //   (due to the fetcher getting killed).
+    // - The containerizer invoking destroy() for the reaped executor.
+    //
+    // The guard here and `if (container->state == DESTROYING)` below
+    // make sure redundant destroys short-circuit.
+    VLOG(1) << "Ignoring destroy of unknown container: " << containerId;
     return;
   }
 
   Container* container = containers_[containerId].get();
 
   if (container->state == DESTROYING) {
-    // Destroy has already been initiated.
+    VLOG(1) << "Destroy has already been initiated for '" << containerId << "'";
     return;
   }
 


[5/5] mesos git commit: Fail container launch if it's destroyed during logger->prepare().

Posted by ya...@apache.org.
Fail container launch if it's destroyed during logger->prepare().

Review: https://reviews.apache.org/r/49725


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dc18dd7a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dc18dd7a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dc18dd7a

Branch: refs/heads/master
Commit: dc18dd7a5ec48a184aeb1c5a7c475ecf7691734b
Parents: 114474c
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Wed Jul 6 13:48:34 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 6 14:53:46 2016 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/dc18dd7a/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 5a6ec85..e946a28 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -1347,7 +1347,13 @@ Future<bool> MesosContainerizerProcess::isolate(
     const ContainerID& containerId,
     pid_t _pid)
 {
-  CHECK(containers_.contains(containerId));
+  if (!containers_.contains(containerId)) {
+    return Failure("Container is already destroyed");
+  }
+
+  if (containers_[containerId]->state == DESTROYING) {
+    return Failure("Container is currently being destroyed");
+  }
 
   containers_[containerId]->state = ISOLATING;
 


[4/5] mesos git commit: Improved Mesos containerizer invariant checking.

Posted by ya...@apache.org.
Improved Mesos containerizer invariant checking.

One of the reasons for MESOS-5763 is due to the lack invariant
checking. Mesos containerizer transitions the container state in
particular ways so when continuation chains could potentially be
interleaved with other actions we should verify the state transitions.

Review: https://reviews.apache.org/r/49652


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8907b5d5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8907b5d5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8907b5d5

Branch: refs/heads/master
Commit: 8907b5d5e1f007c4592a0417a4d9f20d7e1f8efd
Parents: 48b1bfa
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Fri Jul 1 18:11:29 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Jul 6 14:53:46 2016 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/8907b5d5/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 0b2abba..fe6ddc8 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -895,6 +895,8 @@ Future<bool> MesosContainerizerProcess::_launch(
     return Failure("Container is currently being destroyed");
   }
 
+  CHECK_EQ(containers_[containerId]->state, PROVISIONING);
+
   // We will provision the images specified in ContainerInfo::volumes
   // as well. We will mutate ContainerInfo::volumes to include the
   // paths to the provisioned root filesystems (by setting the
@@ -996,6 +998,8 @@ Future<list<Option<ContainerLaunchInfo>>> MesosContainerizerProcess::prepare(
     return Failure("Container is currently being destroyed");
   }
 
+  CHECK_EQ(containers_[containerId]->state, PROVISIONING);
+
   containers_[containerId]->state = PREPARING;
 
   // Construct ContainerConfig.
@@ -1056,6 +1060,8 @@ Future<Nothing> MesosContainerizerProcess::fetch(
     return Failure("Container is currently being destroyed");
   }
 
+  CHECK_EQ(containers_[containerId]->state, ISOLATING);
+
   containers_[containerId]->state = FETCHING;
 
   return fetcher->fetch(
@@ -1094,6 +1100,8 @@ Future<bool> MesosContainerizerProcess::__launch(
     return Failure("Container is currently being destroyed");
   }
 
+  CHECK_EQ(containers_[containerId]->state, PREPARING);
+
   // Prepare environment variables for the executor.
   map<string, string> environment = executorEnvironment(
       executorInfo,
@@ -1355,6 +1363,8 @@ Future<bool> MesosContainerizerProcess::isolate(
     return Failure("Container is currently being destroyed");
   }
 
+  CHECK_EQ(containers_[containerId]->state, PREPARING);
+
   containers_[containerId]->state = ISOLATING;
 
   // Set up callbacks for isolator limitations.
@@ -1392,6 +1402,8 @@ Future<bool> MesosContainerizerProcess::exec(
     return Failure("Container destroyed during launch");
   }
 
+  CHECK_EQ(containers_[containerId]->state, FETCHING);
+
   // Now that we've contained the child we can signal it to continue
   // by writing to the pipe.
   char dummy;
@@ -1674,6 +1686,8 @@ void MesosContainerizerProcess::destroy(
 void MesosContainerizerProcess::_destroy(
     const ContainerID& containerId)
 {
+  CHECK(containers_.contains(containerId));
+
   // Kill all processes then continue destruction.
   launcher->destroy(containerId)
     .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1));
@@ -1722,6 +1736,8 @@ void MesosContainerizerProcess::___destroy(
     const Future<Option<int>>& status,
     const Option<string>& message)
 {
+  CHECK(containers_.contains(containerId));
+
   cleanupIsolators(containerId)
     .onAny(defer(self(),
                  &Self::____destroy,