You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/05/31 18:41:52 UTC

[02/10] mesos git commit: Removed code for handling missing FrameworkInfo of a running task.

Removed code for handling missing FrameworkInfo of a running task.

In previous versions of Mesos, the master might know about a task
running on an agent, but might not have a `FrameworkInfo` for that
task's framework. This might occur if the master fails over, the agent
re-registers, but the framework has not (yet) re-registered.

In Mesos 1.0, the agent will now supply the FrameworkInfo for all tasks
it is running when it re-registers with the master. Since we no longer
support 0.x agents, we can remove the old backward compatibility code
for handling a running task with unknown FrameworkInfo.

Note that this means that "orphan tasks", "orphan executors", and
"unregistered frameworks" are no longer possible.

Review: https://reviews.apache.org/r/59214


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ba8c8248
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ba8c8248
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ba8c8248

Branch: refs/heads/master
Commit: ba8c8248ab8055fe87f175eff308db870a6ee37b
Parents: 09143ce
Author: Neil Conway <ne...@gmail.com>
Authored: Thu May 11 15:57:47 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed May 31 11:40:19 2017 -0700

----------------------------------------------------------------------
 src/master/http.cpp   | 135 +++------------------------------------------
 src/master/master.cpp |  62 +++------------------
 2 files changed, 17 insertions(+), 180 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ba8c8248/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index eb80830..1dcfe6e 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -1451,28 +1451,9 @@ Future<Response> Master::Http::frameworks(
               }
             });
 
-        // Model all unregistered frameworks. Such frameworks are only
-        // possible if the cluster contains pre-1.0 agents.
-        //
-        // TODO(neilc): Remove this once we break compatibility with
-        // pre-1.0 agents.
-        //
-        // TODO(vinod): Need to filter these frameworks based on authorization!
-        // See TODO in `state()` for further details.
-        writer->field("unregistered_frameworks", [this](
-            JSON::ArrayWriter* writer) {
-          // Find unregistered frameworks.
-          hashset<FrameworkID> frameworkIds;
-          foreachvalue (const Slave* slave, master->slaves.registered) {
-            foreachkey (const FrameworkID& frameworkId, slave->tasks) {
-              if (!master->frameworks.registered.contains(frameworkId) &&
-                  !frameworkIds.contains(frameworkId)) {
-                writer->element(frameworkId.value());
-                frameworkIds.insert(frameworkId);
-              }
-            }
-          }
-        });
+        // Unregistered frameworks are no longer possible. We emit an
+        // empty array for the sake of backward compatibility.
+        writer->field("unregistered_frameworks", [](JSON::ArrayWriter*) {});
       };
 
       return OK(jsonify(frameworks), request.url.query.get("jsonp"));
@@ -1681,36 +1662,6 @@ mesos::master::Response::GetExecutors Master::Http::_getExecutors(
     }
   }
 
-  // Orphan executors. Such executors are only possible if the cluster
-  // contains pre-1.0 agents.
-  //
-  // TODO(neilc): Remove this once we break compatibility with pre-1.0
-  // agents.
-  foreachvalue (const Slave* slave, master->slaves.registered) {
-    typedef hashmap<ExecutorID, ExecutorInfo> ExecutorMap;
-    foreachpair (const FrameworkID& frameworkId,
-                 const ExecutorMap& executors,
-                 slave->executors) {
-      foreachvalue (const ExecutorInfo& executorInfo, executors) {
-        if (!master->frameworks.registered.contains(frameworkId)) {
-          // If authorization is enabled, do not show any orphaned
-          // executors. We need the executor's FrameworkInfo to
-          // authorize it, but if we had its FrameworkInfo, it would
-          // not be orphaned.
-          if (master->authorizer.isSome()) {
-            continue;
-          }
-
-          mesos::master::Response::GetExecutors::Executor* executor =
-            getExecutors.add_orphan_executors();
-
-          executor->mutable_executor_info()->CopyFrom(executorInfo);
-          executor->mutable_slave_id()->CopyFrom(slave->id);
-        }
-      }
-    }
-  }
-
   return getExecutors;
 }
 
@@ -2871,56 +2822,13 @@ Future<Response> Master::Http::state(
               }
             });
 
-        // Model all of the orphan tasks. Such tasks are only possible
-        // if the cluster contains pre-1.0 agents.
-        //
-        // TODO(neilc): Remove this once we break compatibility with
-        // pre-1.0 agents.
-        writer->field("orphan_tasks", [this](
-            JSON::ArrayWriter* writer) {
-          foreachvalue (const Slave* slave, master->slaves.registered) {
-            typedef hashmap<TaskID, Task*> TaskMap;
-            foreachvalue (const TaskMap& tasks, slave->tasks) {
-              foreachvalue (const Task* task, tasks) {
-                const FrameworkID& frameworkId = task->framework_id();
-                if (!master->frameworks.registered.contains(frameworkId)) {
-                  // If authorization is enabled, do not show any
-                  // orphan tasks. We need the task's FrameworkInfo to
-                  // authorize it, but if we had its FrameworkInfo, it
-                  // would not be an orphan.
-                  if (master->authorizer.isSome()) {
-                    continue;
-                  }
-
-                  writer->element(*task);
-                }
-              }
-            }
-          }
-        });
+        // Orphan tasks are no longer possible. We emit an empty array
+        // for the sake of backward compatibility.
+        writer->field("orphan_tasks", [](JSON::ArrayWriter*) {});
 
-        // Model all unregistered frameworks. Such frameworks are only
-        // possible if the cluster contains pre-1.0 agents.
-        //
-        // TODO(neilc): Remove this once we break compatibility with
-        // pre-1.0 agents.
-        //
-        // TODO(vinod): Need to filter these frameworks based on authorization!
-        // See the TODO above for "orphan_tasks" for further details.
-        writer->field("unregistered_frameworks", [this](
-            JSON::ArrayWriter* writer) {
-          // Find unregistered frameworks.
-          hashset<FrameworkID> frameworkIds;
-          foreachvalue (const Slave* slave, master->slaves.registered) {
-            foreachkey (const FrameworkID& frameworkId, slave->tasks) {
-              if (!master->frameworks.registered.contains(frameworkId) &&
-                  !frameworkIds.contains(frameworkId)) {
-                writer->element(frameworkId.value());
-                frameworkIds.insert(frameworkId);
-              }
-            }
-          }
-        });
+        // Unregistered frameworks are no longer possible. We emit an
+        // empty array for the sake of backward compatibility.
+        writer->field("unregistered_frameworks", [](JSON::ArrayWriter*) {});
       };
 
       return OK(jsonify(state), request.url.query.get("jsonp"));
@@ -4072,31 +3980,6 @@ mesos::master::Response::GetTasks Master::Http::_getTasks(
     }
   }
 
-  // Orphan tasks. Such tasks are only possible if the cluster
-  // contains pre-1.0 agents.
-  //
-  // TODO(neilc): Remove this once we break compatibility with pre-1.0
-  // agents.
-  foreachvalue (const Slave* slave, master->slaves.registered) {
-    typedef hashmap<TaskID, Task*> TaskMap;
-    foreachvalue (const TaskMap& tasks, slave->tasks) {
-      foreachvalue (const Task* task, tasks) {
-        const FrameworkID& frameworkId = task->framework_id();
-        if (!master->frameworks.registered.contains(frameworkId)) {
-          // If authorization is enabled, do not show any orphan
-          // tasks. We need the task's FrameworkInfo to authorize it,
-          // but if we had its FrameworkInfo, it would not be an
-          // orphan.
-          if (master->authorizer.isSome()) {
-            continue;
-          }
-
-          getTasks.add_orphan_tasks()->CopyFrom(*task);
-        }
-      }
-    }
-  }
-
   return getTasks;
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ba8c8248/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 540d607..d3f0d7e 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1326,17 +1326,14 @@ void Master::exited(const UPID& pid)
       //    removed but the framework is removed from the slave's
       //    structs, its tasks transitioned to LOST and resources
       //    recovered.
-      //
-      // NOTE: If the framework hasn't re-registered yet and no agents
-      // (>= Mesos 1.0) have re-registered that are running any of the
-      // framework's tasks, we can't determine if the framework has
-      // enabled checkpointing. In that case, we assume it has.
       hashset<FrameworkID> frameworkIds =
         slave->tasks.keys() | slave->executors.keys();
 
       foreach (const FrameworkID& frameworkId, frameworkIds) {
         Framework* framework = getFramework(frameworkId);
-        if (framework != nullptr && !framework->info.checkpoint()) {
+        CHECK_NOTNULL(framework);
+
+        if (!framework->info.checkpoint()) {
           LOG(INFO) << "Removing framework " << *framework
                     << " from disconnected agent " << *slave
                     << " because the framework is not checkpointing";
@@ -6022,29 +6019,9 @@ void Master::__reregisterSlave(
   machineId.set_ip(stringify(pid.address.ip));
 
   // For easy lookup, first determine the set of FrameworkIDs on the
-  // re-registering agent that are partition-aware. We examine both
-  // the frameworks and the tasks running on the agent. The former is
-  // necessary because the master might have failed over and not know
-  // about a framework running on the agent; the latter is necessary
-  // because pre-1.0 Mesos agents don't supply a list of running
-  // frameworks on re-registration.
+  // re-registering agent that are partition-aware.
   hashset<FrameworkID> partitionAwareFrameworks;
 
-  // TODO(neilc): Remove this loop when we remove compatibility with
-  // pre-1.0 Mesos agents.
-  foreach (const Task& task, tasks) {
-    Framework* framework = getFramework(task.framework_id());
-
-    if (framework == nullptr) {
-      continue;
-    }
-
-    if (protobuf::frameworkHasCapability(
-            framework->info, FrameworkInfo::Capability::PARTITION_AWARE)) {
-      partitionAwareFrameworks.insert(framework->id());
-    }
-  }
-
   foreach (const FrameworkInfo& framework, frameworks) {
     if (protobuf::frameworkHasCapability(
             framework, FrameworkInfo::Capability::PARTITION_AWARE)) {
@@ -6192,15 +6169,6 @@ void Master::___reregisterSlave(
   // Send the latest framework pids to the slave.
   hashset<FrameworkID> ids;
 
-  // TODO(joerg84): Remove this after a deprecation cycle starting
-  // with the 1.0 release. It is only required if an older (pre 1.0)
-  // agent reregisters with a newer master.  In that case the agent
-  // does not have the `frameworks` field set which is used below to
-  // retrieve the framework information.
-  foreach (const Task& task, tasks) {
-    ids.insert(task.framework_id());
-  }
-
   foreach (const FrameworkInfo& frameworkInfo, frameworks) {
     CHECK(frameworkInfo.has_id());
     ids.insert(frameworkInfo.id());
@@ -6773,22 +6741,10 @@ void Master::_markUnreachable(
   // PARTITION_AWARE capability.
   foreachkey (const FrameworkID& frameworkId, utils::copy(slave->tasks)) {
     Framework* framework = getFramework(frameworkId);
-
-    if (framework == nullptr) {
-      // If the framework is only running tasks on pre-1.0 agents and
-      // the framework hasn't yet re-registered, the master doesn't
-      // have a FrameworkInfo for it, and hence we cannot accurately
-      // determine whether the framework is partition-aware. We assume
-      // it is NOT partition-aware, since using TASK_LOST ensures
-      // compatibility with the previous (and default) Mesos behavior.
-      LOG(WARNING) << "Unable to determine if framework " << frameworkId
-                   << " is partition-aware, because the cluster contains"
-                   << " agents running an old version of Mesos; upgrading"
-                   << " the agents to Mesos 1.0 or later is recommended";
-    }
+    CHECK_NOTNULL(framework);
 
     TaskState newTaskState = TASK_UNREACHABLE;
-    if (framework == nullptr || !protobuf::frameworkHasCapability(
+    if (!protobuf::frameworkHasCapability(
             framework->info, FrameworkInfo::Capability::PARTITION_AWARE)) {
       newTaskState = TASK_LOST;
     }
@@ -6814,11 +6770,9 @@ void Master::_markUnreachable(
       updateTask(task, update);
       removeTask(task);
 
-      if (framework == nullptr || !framework->connected()) {
-        string status = (framework == nullptr ? "unknown" : "disconnected");
-
+      if (!framework->connected()) {
         LOG(WARNING) << "Dropping update " << update
-                     << " for " << status
+                     << " for disconnected "
                      << " framework " << frameworkId;
       } else {
         forward(update, UPID(), framework);