You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ch...@apache.org on 2018/05/04 18:58:54 UTC

mesos git commit: Added more logging to agent recovery path.

Repository: mesos
Updated Branches:
  refs/heads/master e54a75229 -> e048e898e


Added more logging to agent recovery path.

Added logging in some agent recovery continuations to
make analyzing agent recovery related issue less painful.

Review: https://reviews.apache.org/r/66749/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e048e898
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e048e898
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e048e898

Branch: refs/heads/master
Commit: e048e898e5b6ff99c61b321b70ae49d26cd612f8
Parents: e54a752
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Fri May 4 11:50:02 2018 -0700
Committer: Chun-Hung Hsiao <ch...@mesosphere.io>
Committed: Fri May 4 11:50:02 2018 -0700

----------------------------------------------------------------------
 src/slave/containerizer/composing.cpp            | 2 ++
 src/slave/containerizer/docker.cpp               | 4 ++++
 src/slave/containerizer/mesos/containerizer.cpp  | 6 +++++-
 src/slave/containerizer/mesos/linux_launcher.cpp | 2 ++
 src/slave/slave.cpp                              | 5 +++++
 5 files changed, 18 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/composing.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.cpp b/src/slave/containerizer/composing.cpp
index 186102c..1fb79f5 100644
--- a/src/slave/containerizer/composing.cpp
+++ b/src/slave/containerizer/composing.cpp
@@ -329,6 +329,8 @@ Future<Nothing> ComposingContainerizerProcess::__recover(
 
 Future<Nothing> ComposingContainerizerProcess::___recover()
 {
+  LOG(INFO) << "Finished recovering all containerizers";
+
   return Nothing();
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index a4c9c10..7171cb5 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -910,6 +910,8 @@ Future<Nothing> DockerContainerizerProcess::_recover(
     const Option<SlaveState>& state,
     const list<Docker::Container>& _containers)
 {
+  LOG(INFO) << "Got the list of Docker containers";
+
   if (state.isSome()) {
     // This mapping of ContainerIDs to running Docker container names
     // is established for two reasons:
@@ -1144,6 +1146,8 @@ Future<Nothing> DockerContainerizerProcess::__recover(
         }
       }
 
+      LOG(INFO) << "Finished processing orphaned Docker containers";
+
       return Nothing();
     }));
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 01386ac..eac1d16 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -709,7 +709,7 @@ Future<Nothing> MesosContainerizer::pruneImages(
 Future<Nothing> MesosContainerizerProcess::recover(
     const Option<state::SlaveState>& state)
 {
-  LOG(INFO) << "Recovering containerizer";
+  LOG(INFO) << "Recovering Mesos containers";
 
   // Gather the container states that we will attempt to recover.
   list<ContainerState> recoverable;
@@ -1016,6 +1016,8 @@ Future<list<Nothing>> MesosContainerizerProcess::recoverIsolators(
     const list<ContainerState>& recoverable,
     const hashset<ContainerID>& orphans)
 {
+  LOG(INFO) << "Recovering isolators";
+
   list<Future<Nothing>> futures;
 
   // Then recover the isolators.
@@ -1053,6 +1055,8 @@ Future<Nothing> MesosContainerizerProcess::recoverProvisioner(
     const list<ContainerState>& recoverable,
     const hashset<ContainerID>& orphans)
 {
+  LOG(INFO) << "Recovering provisioner";
+
   // TODO(gilbert): Consolidate 'recoverProvisioner()' interface
   // once the launcher returns a full set of known containers.
   hashset<ContainerID> knownContainerIds = orphans;

http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/containerizer/mesos/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/linux_launcher.cpp b/src/slave/containerizer/mesos/linux_launcher.cpp
index af34a85..80e4445 100644
--- a/src/slave/containerizer/mesos/linux_launcher.cpp
+++ b/src/slave/containerizer/mesos/linux_launcher.cpp
@@ -300,6 +300,8 @@ LinuxLauncherProcess::LinuxLauncherProcess(
 Future<hashset<ContainerID>> LinuxLauncherProcess::recover(
     const list<ContainerState>& states)
 {
+  LOG(INFO) << "Recovering Linux launcher";
+
   // Recover all of the "containers" we know about based on the
   // existing cgroups. Note that we check both the freezer hierarchy
   // and the systemd hierarchy (if enabled), and combine the results.

http://git-wip-us.apache.org/repos/asf/mesos/blob/e048e898/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 69280d9..c6d9152 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -6922,6 +6922,9 @@ Future<Nothing> Slave::recover(const Try<state::State>& state)
     return Failure(state.error());
   }
 
+  LOG(INFO) << "Finished recovering checkpointed state from '" << metaDir
+            << "', beginning agent recovery";
+
   Option<ResourcesState> resourcesState = state->resources;
   Option<SlaveState> slaveState = state->slave;
 
@@ -7148,6 +7151,8 @@ Future<Nothing> Slave::_recoverContainerizer(
 
 Future<Nothing> Slave::_recover()
 {
+  LOG(INFO) << "Recovering executors";
+
   // Alow HTTP based executors to subscribe after the
   // containerizer recovery is complete.
   recoveryInfo.reconnect = true;