You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/05/31 18:41:52 UTC
[02/10] mesos git commit: Removed code for handling missing
FrameworkInfo of a running task.
Removed code for handling missing FrameworkInfo of a running task.
In previous versions of Mesos, the master might know about a task
running on an agent, but might not have a `FrameworkInfo` for that
task's framework. This might occur if the master fails over, the agent
re-registers, but the framework has not (yet) re-registered.
In Mesos 1.0, the agent will now supply the FrameworkInfo for all tasks
it is running when it re-registers with the master. Since we no longer
support 0.x agents, we can remove the old backward compatibility code
for handling a running task with unknown FrameworkInfo.
Note that this means that "orphan tasks", "orphan executors", and
"unregistered frameworks" are no longer possible.
Review: https://reviews.apache.org/r/59214
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ba8c8248
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ba8c8248
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ba8c8248
Branch: refs/heads/master
Commit: ba8c8248ab8055fe87f175eff308db870a6ee37b
Parents: 09143ce
Author: Neil Conway <ne...@gmail.com>
Authored: Thu May 11 15:57:47 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed May 31 11:40:19 2017 -0700
----------------------------------------------------------------------
src/master/http.cpp | 135 +++------------------------------------------
src/master/master.cpp | 62 +++------------------
2 files changed, 17 insertions(+), 180 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/ba8c8248/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index eb80830..1dcfe6e 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -1451,28 +1451,9 @@ Future<Response> Master::Http::frameworks(
}
});
- // Model all unregistered frameworks. Such frameworks are only
- // possible if the cluster contains pre-1.0 agents.
- //
- // TODO(neilc): Remove this once we break compatibility with
- // pre-1.0 agents.
- //
- // TODO(vinod): Need to filter these frameworks based on authorization!
- // See TODO in `state()` for further details.
- writer->field("unregistered_frameworks", [this](
- JSON::ArrayWriter* writer) {
- // Find unregistered frameworks.
- hashset<FrameworkID> frameworkIds;
- foreachvalue (const Slave* slave, master->slaves.registered) {
- foreachkey (const FrameworkID& frameworkId, slave->tasks) {
- if (!master->frameworks.registered.contains(frameworkId) &&
- !frameworkIds.contains(frameworkId)) {
- writer->element(frameworkId.value());
- frameworkIds.insert(frameworkId);
- }
- }
- }
- });
+ // Unregistered frameworks are no longer possible. We emit an
+ // empty array for the sake of backward compatibility.
+ writer->field("unregistered_frameworks", [](JSON::ArrayWriter*) {});
};
return OK(jsonify(frameworks), request.url.query.get("jsonp"));
@@ -1681,36 +1662,6 @@ mesos::master::Response::GetExecutors Master::Http::_getExecutors(
}
}
- // Orphan executors. Such executors are only possible if the cluster
- // contains pre-1.0 agents.
- //
- // TODO(neilc): Remove this once we break compatibility with pre-1.0
- // agents.
- foreachvalue (const Slave* slave, master->slaves.registered) {
- typedef hashmap<ExecutorID, ExecutorInfo> ExecutorMap;
- foreachpair (const FrameworkID& frameworkId,
- const ExecutorMap& executors,
- slave->executors) {
- foreachvalue (const ExecutorInfo& executorInfo, executors) {
- if (!master->frameworks.registered.contains(frameworkId)) {
- // If authorization is enabled, do not show any orphaned
- // executors. We need the executor's FrameworkInfo to
- // authorize it, but if we had its FrameworkInfo, it would
- // not be orphaned.
- if (master->authorizer.isSome()) {
- continue;
- }
-
- mesos::master::Response::GetExecutors::Executor* executor =
- getExecutors.add_orphan_executors();
-
- executor->mutable_executor_info()->CopyFrom(executorInfo);
- executor->mutable_slave_id()->CopyFrom(slave->id);
- }
- }
- }
- }
-
return getExecutors;
}
@@ -2871,56 +2822,13 @@ Future<Response> Master::Http::state(
}
});
- // Model all of the orphan tasks. Such tasks are only possible
- // if the cluster contains pre-1.0 agents.
- //
- // TODO(neilc): Remove this once we break compatibility with
- // pre-1.0 agents.
- writer->field("orphan_tasks", [this](
- JSON::ArrayWriter* writer) {
- foreachvalue (const Slave* slave, master->slaves.registered) {
- typedef hashmap<TaskID, Task*> TaskMap;
- foreachvalue (const TaskMap& tasks, slave->tasks) {
- foreachvalue (const Task* task, tasks) {
- const FrameworkID& frameworkId = task->framework_id();
- if (!master->frameworks.registered.contains(frameworkId)) {
- // If authorization is enabled, do not show any
- // orphan tasks. We need the task's FrameworkInfo to
- // authorize it, but if we had its FrameworkInfo, it
- // would not be an orphan.
- if (master->authorizer.isSome()) {
- continue;
- }
-
- writer->element(*task);
- }
- }
- }
- }
- });
+ // Orphan tasks are no longer possible. We emit an empty array
+ // for the sake of backward compatibility.
+ writer->field("orphan_tasks", [](JSON::ArrayWriter*) {});
- // Model all unregistered frameworks. Such frameworks are only
- // possible if the cluster contains pre-1.0 agents.
- //
- // TODO(neilc): Remove this once we break compatibility with
- // pre-1.0 agents.
- //
- // TODO(vinod): Need to filter these frameworks based on authorization!
- // See the TODO above for "orphan_tasks" for further details.
- writer->field("unregistered_frameworks", [this](
- JSON::ArrayWriter* writer) {
- // Find unregistered frameworks.
- hashset<FrameworkID> frameworkIds;
- foreachvalue (const Slave* slave, master->slaves.registered) {
- foreachkey (const FrameworkID& frameworkId, slave->tasks) {
- if (!master->frameworks.registered.contains(frameworkId) &&
- !frameworkIds.contains(frameworkId)) {
- writer->element(frameworkId.value());
- frameworkIds.insert(frameworkId);
- }
- }
- }
- });
+ // Unregistered frameworks are no longer possible. We emit an
+ // empty array for the sake of backward compatibility.
+ writer->field("unregistered_frameworks", [](JSON::ArrayWriter*) {});
};
return OK(jsonify(state), request.url.query.get("jsonp"));
@@ -4072,31 +3980,6 @@ mesos::master::Response::GetTasks Master::Http::_getTasks(
}
}
- // Orphan tasks. Such tasks are only possible if the cluster
- // contains pre-1.0 agents.
- //
- // TODO(neilc): Remove this once we break compatibility with pre-1.0
- // agents.
- foreachvalue (const Slave* slave, master->slaves.registered) {
- typedef hashmap<TaskID, Task*> TaskMap;
- foreachvalue (const TaskMap& tasks, slave->tasks) {
- foreachvalue (const Task* task, tasks) {
- const FrameworkID& frameworkId = task->framework_id();
- if (!master->frameworks.registered.contains(frameworkId)) {
- // If authorization is enabled, do not show any orphan
- // tasks. We need the task's FrameworkInfo to authorize it,
- // but if we had its FrameworkInfo, it would not be an
- // orphan.
- if (master->authorizer.isSome()) {
- continue;
- }
-
- getTasks.add_orphan_tasks()->CopyFrom(*task);
- }
- }
- }
- }
-
return getTasks;
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/ba8c8248/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 540d607..d3f0d7e 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1326,17 +1326,14 @@ void Master::exited(const UPID& pid)
// removed but the framework is removed from the slave's
// structs, its tasks transitioned to LOST and resources
// recovered.
- //
- // NOTE: If the framework hasn't re-registered yet and no agents
- // (>= Mesos 1.0) have re-registered that are running any of the
- // framework's tasks, we can't determine if the framework has
- // enabled checkpointing. In that case, we assume it has.
hashset<FrameworkID> frameworkIds =
slave->tasks.keys() | slave->executors.keys();
foreach (const FrameworkID& frameworkId, frameworkIds) {
Framework* framework = getFramework(frameworkId);
- if (framework != nullptr && !framework->info.checkpoint()) {
+ CHECK_NOTNULL(framework);
+
+ if (!framework->info.checkpoint()) {
LOG(INFO) << "Removing framework " << *framework
<< " from disconnected agent " << *slave
<< " because the framework is not checkpointing";
@@ -6022,29 +6019,9 @@ void Master::__reregisterSlave(
machineId.set_ip(stringify(pid.address.ip));
// For easy lookup, first determine the set of FrameworkIDs on the
- // re-registering agent that are partition-aware. We examine both
- // the frameworks and the tasks running on the agent. The former is
- // necessary because the master might have failed over and not know
- // about a framework running on the agent; the latter is necessary
- // because pre-1.0 Mesos agents don't supply a list of running
- // frameworks on re-registration.
+ // re-registering agent that are partition-aware.
hashset<FrameworkID> partitionAwareFrameworks;
- // TODO(neilc): Remove this loop when we remove compatibility with
- // pre-1.0 Mesos agents.
- foreach (const Task& task, tasks) {
- Framework* framework = getFramework(task.framework_id());
-
- if (framework == nullptr) {
- continue;
- }
-
- if (protobuf::frameworkHasCapability(
- framework->info, FrameworkInfo::Capability::PARTITION_AWARE)) {
- partitionAwareFrameworks.insert(framework->id());
- }
- }
-
foreach (const FrameworkInfo& framework, frameworks) {
if (protobuf::frameworkHasCapability(
framework, FrameworkInfo::Capability::PARTITION_AWARE)) {
@@ -6192,15 +6169,6 @@ void Master::___reregisterSlave(
// Send the latest framework pids to the slave.
hashset<FrameworkID> ids;
- // TODO(joerg84): Remove this after a deprecation cycle starting
- // with the 1.0 release. It is only required if an older (pre 1.0)
- // agent reregisters with a newer master. In that case the agent
- // does not have the `frameworks` field set which is used below to
- // retrieve the framework information.
- foreach (const Task& task, tasks) {
- ids.insert(task.framework_id());
- }
-
foreach (const FrameworkInfo& frameworkInfo, frameworks) {
CHECK(frameworkInfo.has_id());
ids.insert(frameworkInfo.id());
@@ -6773,22 +6741,10 @@ void Master::_markUnreachable(
// PARTITION_AWARE capability.
foreachkey (const FrameworkID& frameworkId, utils::copy(slave->tasks)) {
Framework* framework = getFramework(frameworkId);
-
- if (framework == nullptr) {
- // If the framework is only running tasks on pre-1.0 agents and
- // the framework hasn't yet re-registered, the master doesn't
- // have a FrameworkInfo for it, and hence we cannot accurately
- // determine whether the framework is partition-aware. We assume
- // it is NOT partition-aware, since using TASK_LOST ensures
- // compatibility with the previous (and default) Mesos behavior.
- LOG(WARNING) << "Unable to determine if framework " << frameworkId
- << " is partition-aware, because the cluster contains"
- << " agents running an old version of Mesos; upgrading"
- << " the agents to Mesos 1.0 or later is recommended";
- }
+ CHECK_NOTNULL(framework);
TaskState newTaskState = TASK_UNREACHABLE;
- if (framework == nullptr || !protobuf::frameworkHasCapability(
+ if (!protobuf::frameworkHasCapability(
framework->info, FrameworkInfo::Capability::PARTITION_AWARE)) {
newTaskState = TASK_LOST;
}
@@ -6814,11 +6770,9 @@ void Master::_markUnreachable(
updateTask(task, update);
removeTask(task);
- if (framework == nullptr || !framework->connected()) {
- string status = (framework == nullptr ? "unknown" : "disconnected");
-
+ if (!framework->connected()) {
LOG(WARNING) << "Dropping update " << update
- << " for " << status
+ << " for disconnected "
<< " framework " << frameworkId;
} else {
forward(update, UPID(), framework);