You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2017/01/20 07:31:29 UTC

mesos git commit: Rationalize process wait error checking.

Repository: mesos
Updated Branches:
  refs/heads/master d2d8ceaa6 -> f9d30a4f7


Rationalize process wait error checking.

Introduce the WSUCCEEDED() API to test that a forked process exited
successfully. Use it in all the places that were performing this test
bespokely, and update error logs to user WSTRINGIFY() if appropriate.

This also fixes a bug in fetcher where the parent wasn't checking
whether the child was signaled.

Review: https://reviews.apache.org/r/55683/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f9d30a4f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f9d30a4f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f9d30a4f

Branch: refs/heads/master
Commit: f9d30a4f79084a5e93ab5fda53c2ce715b72ca8d
Parents: d2d8cea
Author: James Peach <jp...@apache.org>
Authored: Thu Jan 19 23:14:20 2017 -0800
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Thu Jan 19 23:27:13 2017 -0800

----------------------------------------------------------------------
 src/common/status_utils.hpp                      | 18 +++++++++++++++++-
 src/docker/executor.cpp                          |  5 +++--
 src/hdfs/hdfs.cpp                                | 17 +++++++++--------
 src/launcher/default_executor.cpp                |  4 ++--
 src/launcher/executor.cpp                        |  5 +++--
 src/launcher/fetcher.cpp                         |  4 ++--
 src/linux/ns.hpp                                 |  9 ++++++---
 src/slave/containerizer/docker.cpp               |  6 +-----
 src/slave/containerizer/mesos/io/switchboard.cpp | 14 +++-----------
 src/slave/containerizer/mesos/launch.cpp         |  6 ++++--
 src/tests/script.cpp                             |  5 +++--
 11 files changed, 53 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/common/status_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/status_utils.hpp b/src/common/status_utils.hpp
index 26b94d4..36df84d 100644
--- a/src/common/status_utils.hpp
+++ b/src/common/status_utils.hpp
@@ -22,6 +22,13 @@
 #include <stout/option.hpp>
 #include <stout/stringify.hpp>
 
+// Return whether the wait(2) status was a successful process exit.
+inline bool WSUCCEEDED(int status)
+{
+  return WIFEXITED(status) && WEXITSTATUS(status) == 0;
+}
+
+
 inline std::string WSTRINGIFY(int status)
 {
 #ifdef __WINDOWS__
@@ -36,9 +43,18 @@ inline std::string WSTRINGIFY(int status)
   if (WIFEXITED(status)) {
     message += "exited with status ";
     message += stringify(WEXITSTATUS(status));
-  } else {
+  } else if (WIFSIGNALED(status)) {
     message += "terminated with signal ";
     message += strsignal(WTERMSIG(status));
+    if (WCOREDUMP(status)) {
+      message += " (core dumped)";
+    }
+  } else if (WIFSTOPPED(status)) {
+    message += "stopped on signal ";
+    message += strsignal(WSTOPSIG(status));
+  } else {
+    message += "wait status ";
+    message += stringify(status);
   }
   return message;
 #endif // __WINDOWS__

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index f7cb459..afb3ac2 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -419,9 +419,10 @@ private:
       message = "Failed to get exit status of container";
     } else {
       int status = run->get();
-      CHECK(WIFEXITED(status) || WIFSIGNALED(status)) << status;
+      CHECK(WIFEXITED(status) || WIFSIGNALED(status))
+        << "Unexpected wait status " << status;
 
-      if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+      if (WSUCCEEDED(status)) {
         state = TASK_FINISHED;
       } else if (killed) {
         // Send TASK_KILLED if the task was killed as a result of

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/hdfs/hdfs.cpp
----------------------------------------------------------------------
diff --git a/src/hdfs/hdfs.cpp b/src/hdfs/hdfs.cpp
index bebba8b..4aea868 100644
--- a/src/hdfs/hdfs.cpp
+++ b/src/hdfs/hdfs.cpp
@@ -34,6 +34,7 @@
 #include <stout/os/exists.hpp>
 #include <stout/os/shell.hpp>
 
+#include "common/status_utils.hpp"
 #include "hdfs/hdfs.hpp"
 
 using namespace process;
@@ -156,18 +157,18 @@ Future<bool> HDFS::exists(const string& path)
         return Failure("Failed to reap the subprocess");
       }
 
-      if (WIFEXITED(result.status.get())) {
-        int exitCode = WEXITSTATUS(result.status.get());
-        if (exitCode == 0) {
-          return true;
-        } else if (exitCode == 1) {
-          return false;
-        }
+      if (WSUCCEEDED(result.status.get())) {
+        return true;
+      }
+
+      if (WIFEXITED(result.status.get()) &&
+          WEXITSTATUS(result.status.get()) == 1) {
+        return false;
       }
 
       return Failure(
           "Unexpected result from the subprocess: "
-          "status='" + stringify(result.status.get()) + "', " +
+          "status='" + WSTRINGIFY(result.status.get()) + "', " +
           "stdout='" + result.out + "', " +
           "stderr='" + result.err + "'");
     });

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index 7e53cea..f25905c 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -632,9 +632,9 @@ protected:
       taskState = TASK_FAILED;
     } else {
       CHECK(WIFEXITED(status.get()) || WIFSIGNALED(status.get()))
-        << status.get();
+        << "Unexpected wait status " << status.get();
 
-      if (WIFEXITED(status.get()) && WEXITSTATUS(status.get()) == 0) {
+      if (WSUCCEEDED(status.get())) {
         taskState = TASK_FINISHED;
       } else if (shuttingDown) {
         // Send TASK_KILLED if the task was killed as a result of

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 4c60ef9..b42743b 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -635,9 +635,10 @@ private:
       message = "Failed to get exit status for Command";
     } else {
       int status = status_.get().get();
-      CHECK(WIFEXITED(status) || WIFSIGNALED(status)) << status;
+      CHECK(WIFEXITED(status) || WIFSIGNALED(status))
+        << "Unexpected wait status " << status;
 
-      if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+      if (WSUCCEEDED(status)) {
         taskState = TASK_FINISHED;
       } else if (killed) {
         // Send TASK_KILLED if the task was killed as a result of

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/launcher/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/fetcher.cpp b/src/launcher/fetcher.cpp
index 5f9a38d..a1dcced 100644
--- a/src/launcher/fetcher.cpp
+++ b/src/launcher/fetcher.cpp
@@ -111,7 +111,7 @@ static Try<bool> extract(
 
   // `status()` never fails or gets discarded.
   int status = extractProcess->status()->get();
-  if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
+  if (!WSUCCEEDED(status)) {
     return Error(
         "Failed to extract '" + sourcePath + "': '" +
         strings::join(" ", command) + "' failed: " +
@@ -197,7 +197,7 @@ static Try<string> copyFile(
     return ErrnoError("Failed to copy '" + sourcePath + "'");
   }
 
-  if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
+  if (!WSUCCEEDED(status)) {
     return Error(
         "Failed to copy '" + sourcePath + "': " + WSTRINGIFY(status));
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/linux/ns.hpp
----------------------------------------------------------------------
diff --git a/src/linux/ns.hpp b/src/linux/ns.hpp
index da43266..da4b716 100644
--- a/src/linux/ns.hpp
+++ b/src/linux/ns.hpp
@@ -50,6 +50,8 @@
 #include <process/future.hpp>
 #include <process/reap.hpp>
 
+#include "common/status_utils.hpp"
+
 #ifndef CLONE_NEWNS
 #define CLONE_NEWNS 0x00020000
 #endif
@@ -502,10 +504,11 @@ inline Try<pid_t> clone(
       }
     }
 
-    CHECK(WIFEXITED(status) || WIFSIGNALED(status));
+    CHECK(WIFEXITED(status) || WIFSIGNALED(status))
+      << "Unexpected wait status " << status;
 
-    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-      return Error("Failed to clone");
+    if (!WSUCCEEDED(status)) {
+      return Error("Failed to clone: " + WSTRINGIFY(status));
     }
 
     return pid;

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 7a8a727..cfc6795 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -1360,11 +1360,7 @@ Future<Docker::Container> DockerContainerizerProcess::launchExecutorContainer(
         inspect.discard();
         promise->fail("Failed to obtain exit status of container");
       } else {
-        bool exitedCleanly =
-          WIFEXITED(run->get()) &&
-          WEXITSTATUS(run->get()) == 0;
-
-        if (!exitedCleanly) {
+        if (!WSUCCEEDED(run->get())) {
           inspect.discard();
           promise->fail("Container " + WSTRINGIFY(run->get()));
         }

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/slave/containerizer/mesos/io/switchboard.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/io/switchboard.cpp b/src/slave/containerizer/mesos/io/switchboard.cpp
index 2c4eed8..4b134f8 100644
--- a/src/slave/containerizer/mesos/io/switchboard.cpp
+++ b/src/slave/containerizer/mesos/io/switchboard.cpp
@@ -59,6 +59,7 @@
 
 #include "common/http.hpp"
 #include "common/recordio.hpp"
+#include "common/status_utils.hpp"
 
 #include "slave/flags.hpp"
 #include "slave/state.hpp"
@@ -857,7 +858,7 @@ void IOSwitchboard::reaped(
     LOG(INFO) << "I/O switchboard server process for container "
               << containerId << " has terminated (status=N/A)";
     return;
-  } else if (WIFEXITED(status.get()) && WEXITSTATUS(status.get()) == 0) {
+  } else if (WSUCCEEDED(status.get())) {
     LOG(INFO) << "I/O switchboard server process for container "
               << containerId << " has terminated (status=0)";
     return;
@@ -870,16 +871,7 @@ void IOSwitchboard::reaped(
 
   ContainerLimitation limitation;
   limitation.set_reason(TaskStatus::REASON_IO_SWITCHBOARD_EXITED);
-
-  if (WIFEXITED(status.get())) {
-    limitation.set_message(
-        "'IOSwitchboard' exited with status:"
-        " " + stringify(WEXITSTATUS(status.get())));
-  } else if (WIFSIGNALED(status.get())) {
-    limitation.set_message(
-        "'IOSwitchboard' exited with signal:"
-        " " + stringify(strsignal(WTERMSIG(status.get()))));
-  }
+  limitation.set_message("'IOSwitchboard' " + WSTRINGIFY(status.get()));
 
   infos[containerId]->limitation.set(limitation);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/slave/containerizer/mesos/launch.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/launch.cpp b/src/slave/containerizer/mesos/launch.cpp
index e482ab8..4320cd6 100644
--- a/src/slave/containerizer/mesos/launch.cpp
+++ b/src/slave/containerizer/mesos/launch.cpp
@@ -393,9 +393,11 @@ int MesosContainerizerLaunch::execute()
       status = os::spawn(command.value(), args);
     }
 
-    if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
+    if (!WSUCCEEDED(status)) {
       cerr << "Failed to execute pre-exec command '"
-           << JSON::protobuf(command) << "'" << endl;
+           << JSON::protobuf(command) << "': "
+           << WSTRINGIFY(status)
+           << endl;
       exitWithStatus(EXIT_FAILURE);
     }
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/f9d30a4f/src/tests/script.cpp
----------------------------------------------------------------------
diff --git a/src/tests/script.cpp b/src/tests/script.cpp
index ef6b22a..deec248 100644
--- a/src/tests/script.cpp
+++ b/src/tests/script.cpp
@@ -74,9 +74,10 @@ void execute(const string& script)
     // In parent process.
     int status;
     while (wait(&status) != pid || WIFSTOPPED(status));
-    CHECK(WIFEXITED(status) || WIFSIGNALED(status));
+    CHECK(WIFEXITED(status) || WIFSIGNALED(status))
+      << "Unexpected wait status " << status;
 
-    if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+    if (!WSUCCEEDED(status)) {
       FAIL() << script << " " << WSTRINGIFY(status);
     }
   } else {