You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2015/10/16 14:14:42 UTC

[1/2] mesos git commit: Added code that appends the fetcher log to the agent log upon fetcher failure.

Repository: mesos
Updated Branches:
  refs/heads/master 6d90b3b92 -> 7b53bb110


Added code that appends the fetcher log to the agent log upon fetcher failure.

Adds an onFailed() clause to the inspection of the fetcher subprocess run. This clause copies the fetcher log from <task sandbox>/stderr and appends it to the agent log.

This is to facilitate debugging spurious fetch failures in production or CI.

Similar, but not the same: https://reviews.apache.org/r/37813/ (see MESOS-3743 for an explanation).

Review: https://reviews.apache.org/r/39338


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/80d65f7f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/80d65f7f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/80d65f7f

Branch: refs/heads/master
Commit: 80d65f7f832193a27a3ec918dc8f91492f9a746b
Parents: 6d90b3b
Author: Bernd Mathiske <be...@mesosphere.io>
Authored: Fri Oct 16 10:58:35 2015 +0200
Committer: Bernd Mathiske <be...@mesosphere.io>
Committed: Fri Oct 16 10:58:35 2015 +0200

----------------------------------------------------------------------
 src/slave/containerizer/fetcher.cpp | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/80d65f7f/src/slave/containerizer/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher.cpp b/src/slave/containerizer/fetcher.cpp
index 2b2298c..e0d02d5 100644
--- a/src/slave/containerizer/fetcher.cpp
+++ b/src/slave/containerizer/fetcher.cpp
@@ -26,6 +26,8 @@
 #include <stout/net.hpp>
 #include <stout/path.hpp>
 
+#include <stout/os/read.hpp>
+
 #include "hdfs/hdfs.hpp"
 
 #include "slave/slave.hpp"
@@ -698,9 +700,9 @@ Future<Nothing> FetcherProcess::run(
     return Failure("Failed to create 'stdout' file: " + out.error());
   }
 
-  // Repeat for stderr.
+  string stderr = path::join(info.sandbox_directory(), "stderr");
   Try<int> err = os::open(
-      path::join(info.sandbox_directory(), "stderr"),
+      stderr,
       O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK | O_CLOEXEC,
       S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
 
@@ -789,6 +791,20 @@ Future<Nothing> FetcherProcess::run(
 
       return Nothing();
     }))
+    .onFailed(defer(self(), [=](const string&) {
+      // To aid debugging what went wrong when attempting to fetch, grab the
+      // fetcher's local log output from the sandbox and log it here.
+      Try<string> text = os::read(stderr);
+      if (text.isSome()) {
+        LOG(WARNING) << "Begin fetcher log (stderr in sandbox) for container "
+                     << containerId << " from running command: " << command
+                     << "\n" << text.get() << "\n"
+                     << "End fetcher log for container " << containerId;
+      } else {
+        LOG(ERROR) << "Fetcher log (stderr in sandbox) for container "
+                   << containerId << " not readable: " << text.error();
+      }
+    }))
     .onAny(defer(self(), [=](const Future<Nothing>&) {
       // Clear the subprocess PID remembered from running mesos-fetcher.
       subprocessPids.erase(containerId);


[2/2] mesos git commit: Added additional diagnostic output when a fetcher cache test fails.

Posted by be...@apache.org.
Added additional diagnostic output when a fetcher cache test fails.

Dumps all involved task/executor sandbox contents in test tear down
only if a failure occurred.

Review: https://reviews.apache.org/r/37813


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7b53bb11
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7b53bb11
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7b53bb11

Branch: refs/heads/master
Commit: 7b53bb110f560ae366bad24d6a51b39d1e4ce43b
Parents: 80d65f7
Author: Bernd Mathiske <be...@mesosphere.io>
Authored: Fri Oct 16 14:14:19 2015 +0200
Committer: Bernd Mathiske <be...@mesosphere.io>
Committed: Fri Oct 16 14:14:19 2015 +0200

----------------------------------------------------------------------
 src/tests/fetcher_cache_tests.cpp | 64 +++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7b53bb11/src/tests/fetcher_cache_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fetcher_cache_tests.cpp b/src/tests/fetcher_cache_tests.cpp
index 7e194dc..0b32451 100644
--- a/src/tests/fetcher_cache_tests.cpp
+++ b/src/tests/fetcher_cache_tests.cpp
@@ -78,6 +78,8 @@ using process::Promise;
 using process::Queue;
 using process::Subprocess;
 
+using std::cout;
+using std::endl;
 using std::list;
 using std::string;
 using std::vector;
@@ -165,6 +167,10 @@ private:
   // Promises whose futures indicate that FetcherProcess::_fetch() has been
   // called for a task with a given index.
   vector<Owned<Promise<Nothing>>> fetchContentionWaypoints;
+
+  // If this test did not succeed as indicated by the above variable,
+  // the contents of these sandboxes will be dumped during tear down.
+  vector<Path> sandboxes;
 };
 
 
@@ -206,8 +212,54 @@ void FetcherCacheTest::SetUp()
 }
 
 
+// Dumps the contents of a text file to cout, assuming
+// there are only text files.
+static void logFile(const Path& path, const string& filename)
+{
+  string filePath = path::join(path.value, filename);
+  Try<string> text = os::read(filePath);
+  if (text.isSome()) {
+    cout << "Begin file contents of `" << filename << "`:" << endl;
+    cout << text.get() << endl;
+    cout << "End file" << endl;
+  } else {
+    cout << "File `" << filename << "` not readable: " << text.error() << endl;
+  }
+}
+
+
+// Dumps the contents of all files in the sandbox to cout, assuming
+// there are only text files.
+static void logSandbox(const Path& path)
+{
+  Try<list<string>> entries = os::ls(path.value);
+  if (entries.isSome()) {
+    cout << "Begin listing sandbox `" << path.value << "`:" << endl;
+    foreach (const string& entry, entries.get()) {
+      logFile(path, entry);
+    }
+    cout << "End sandbox" << endl;
+  } else {
+    cout << "Could not list sandbox `" << path.value
+         << "`: " << entries.error() << endl;
+  }
+}
+
+
 void FetcherCacheTest::TearDown()
 {
+  if (HasFatalFailure()) {
+    // A gtest macro has terminated the test prematurely. Now stream
+    // additional info that might help debug the situation to where
+    // gtest writes its output: cout.
+
+    cout << "Begin listing sandboxes" << endl;
+    foreach (const Path& path, sandboxes) {
+      logSandbox(path);
+    }
+    cout << "End sandboxes" << endl;
+  }
+
   driver->stop();
   driver->join();
   delete driver;
@@ -395,13 +447,15 @@ Try<FetcherCacheTest::Task> FetcherCacheTest::launchTask(
 
   driver->launchTasks(offer.id(), tasks);
 
-  const Path path = Path(slave::paths::getExecutorLatestRunPath(
+  const Path sandboxPath = Path(slave::paths::getExecutorLatestRunPath(
       flags.work_dir,
       slaveId,
       offer.framework_id(),
       executorId));
 
-  return Task{path, taskStatusQueue};
+  sandboxes.push_back(sandboxPath);
+
+  return Task{sandboxPath, taskStatusQueue};
 }
 
 
@@ -505,19 +559,21 @@ Try<vector<FetcherCacheTest::Task>> FetcherCacheTest::launchTasks(
     ExecutorID executorId;
     executorId.set_value(task.task_id().value());
 
-    Path runDirectory = Path(slave::paths::getExecutorLatestRunPath(
+    Path sandboxPath = Path(slave::paths::getExecutorLatestRunPath(
         flags.work_dir,
         slaveId,
         frameworkId,
         executorId));
 
+    sandboxes.push_back(sandboxPath);
+
     // Grabbing task status futures to wait for. We make a queue of futures
     // for each task. We can then wait until the front element indicates
     // status TASK_FINISHED. We use a queue, because we never know which
     // status update will be the one we have been waiting for.
     Queue<TaskStatus> taskStatusQueue;
 
-    result.push_back(Task {runDirectory, taskStatusQueue});
+    result.push_back(Task {sandboxPath, taskStatusQueue});
 
     EXPECT_CALL(scheduler, statusUpdate(driver, _))
       .WillRepeatedly(PushIndexedTaskStatus<1>(result));