You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/10/27 21:16:33 UTC

mesos git commit: Avoided CHECK failure with pre-1.0 agents.

Repository: mesos
Updated Branches:
  refs/heads/master f65fee495 -> 713922fcc


Avoided CHECK failure with pre-1.0 agents.

We don't guarantee compatibility with pre-1.0 agents. However, since it
is easy to avoid a CHECK failure in the master when an old agent
re-registers, it seems worth doing so.

Review: https://reviews.apache.org/r/53202/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/713922fc
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/713922fc
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/713922fc

Branch: refs/heads/master
Commit: 713922fcc00c944adb464d9bd78a37db58f9f8a8
Parents: f65fee4
Author: Neil Conway <ne...@gmail.com>
Authored: Thu Oct 27 14:16:01 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Thu Oct 27 14:16:01 2016 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/713922fc/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 8692726..013bb59 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6039,18 +6039,29 @@ void Master::_markUnreachable(
     // its FrameworkInfo will be in the `recovered` collection. Note that
     // if the master knows about a task, its FrameworkInfo must appear in
     // either the `registered` or `recovered` collections.
-    FrameworkInfo frameworkInfo;
+    //
+    // NOTE: If the framework is only running tasks on pre-1.0 agents
+    // and the framework hasn't yet re-registered, its FrameworkInfo
+    // will not appear in `recovered`. We can't accurately determine
+    // whether the framework is partition-aware; we assume it is NOT
+    // partition-aware, since using TASK_LOST ensures compatibility
+    // with the previous (and default) Mesos behavior.
+    Option<FrameworkInfo> frameworkInfo;
 
-    if (framework == nullptr) {
-      CHECK(frameworks.recovered.contains(frameworkId));
+    if (framework != nullptr) {
+      frameworkInfo = framework->info;
+    } else if (frameworks.recovered.contains(frameworkId)) {
       frameworkInfo = frameworks.recovered[frameworkId];
     } else {
-      frameworkInfo = framework->info;
+      LOG(WARNING) << "Unable to determine if framework " << frameworkId
+                   << " is partition-aware, because the cluster contains"
+                   << " agents running an old version of Mesos; upgrading"
+                   << " the agents to Mesos 1.0 or later is recommended";
     }
 
     TaskState newTaskState = TASK_UNREACHABLE;
-    if (!protobuf::frameworkHasCapability(
-            frameworkInfo, FrameworkInfo::Capability::PARTITION_AWARE)) {
+    if (frameworkInfo.isNone() || !protobuf::frameworkHasCapability(
+            frameworkInfo.get(), FrameworkInfo::Capability::PARTITION_AWARE)) {
       newTaskState = TASK_LOST;
     }