You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/10/27 21:16:33 UTC
mesos git commit: Avoided CHECK failure with pre-1.0 agents.
Repository: mesos
Updated Branches:
refs/heads/master f65fee495 -> 713922fcc
Avoided CHECK failure with pre-1.0 agents.
We don't guarantee compatibility with pre-1.0 agents. However, since it
is easy to avoid a CHECK failure in the master when an old agent
re-registers, it seems worth doing so.
Review: https://reviews.apache.org/r/53202/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/713922fc
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/713922fc
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/713922fc
Branch: refs/heads/master
Commit: 713922fcc00c944adb464d9bd78a37db58f9f8a8
Parents: f65fee4
Author: Neil Conway <ne...@gmail.com>
Authored: Thu Oct 27 14:16:01 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Thu Oct 27 14:16:01 2016 -0700
----------------------------------------------------------------------
src/master/master.cpp | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/713922fc/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 8692726..013bb59 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6039,18 +6039,29 @@ void Master::_markUnreachable(
// its FrameworkInfo will be in the `recovered` collection. Note that
// if the master knows about a task, its FrameworkInfo must appear in
// either the `registered` or `recovered` collections.
- FrameworkInfo frameworkInfo;
+ //
+ // NOTE: If the framework is only running tasks on pre-1.0 agents
+ // and the framework hasn't yet re-registered, its FrameworkInfo
+ // will not appear in `recovered`. We can't accurately determine
+ // whether the framework is partition-aware; we assume it is NOT
+ // partition-aware, since using TASK_LOST ensures compatibility
+ // with the previous (and default) Mesos behavior.
+ Option<FrameworkInfo> frameworkInfo;
- if (framework == nullptr) {
- CHECK(frameworks.recovered.contains(frameworkId));
+ if (framework != nullptr) {
+ frameworkInfo = framework->info;
+ } else if (frameworks.recovered.contains(frameworkId)) {
frameworkInfo = frameworks.recovered[frameworkId];
} else {
- frameworkInfo = framework->info;
+ LOG(WARNING) << "Unable to determine if framework " << frameworkId
+ << " is partition-aware, because the cluster contains"
+ << " agents running an old version of Mesos; upgrading"
+ << " the agents to Mesos 1.0 or later is recommended";
}
TaskState newTaskState = TASK_UNREACHABLE;
- if (!protobuf::frameworkHasCapability(
- frameworkInfo, FrameworkInfo::Capability::PARTITION_AWARE)) {
+ if (frameworkInfo.isNone() || !protobuf::frameworkHasCapability(
+ frameworkInfo.get(), FrameworkInfo::Capability::PARTITION_AWARE)) {
newTaskState = TASK_LOST;
}