You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2016/12/07 00:07:44 UTC

[2/6] mesos git commit: Added SIGTERM handler to gracefully shutdown IOSwitchboard server.

Added SIGTERM handler to gracefully shutdown IOSwitchboard server.

When receiving a SIGTERM, the io switchboard process will forcibly
unblock the server from waiting on a connection before attempting to
drain its `stdoutFromFd` and `stderrFromFd` file descriptors. Once
these fds are drained (or they become invalid), the server will shut
itself down as per the normal exit route.

Review: https://reviews.apache.org/r/54442/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ff553e86
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ff553e86
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ff553e86

Branch: refs/heads/master
Commit: ff553e86ade41a47121e6aaf8a5e5fdee1b4e9e6
Parents: 85f0c46
Author: Kevin Klues <kl...@gmail.com>
Authored: Tue Dec 6 15:21:45 2016 -0800
Committer: Jie Yu <yu...@gmail.com>
Committed: Tue Dec 6 15:21:45 2016 -0800

----------------------------------------------------------------------
 .../containerizer/mesos/io/switchboard.cpp      | 16 ++++++
 .../containerizer/mesos/io/switchboard_main.cpp | 56 ++++++++++++++++++++
 2 files changed, 72 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ff553e86/src/slave/containerizer/mesos/io/switchboard.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/io/switchboard.cpp b/src/slave/containerizer/mesos/io/switchboard.cpp
index e0627cb..98f6d95 100644
--- a/src/slave/containerizer/mesos/io/switchboard.cpp
+++ b/src/slave/containerizer/mesos/io/switchboard.cpp
@@ -550,10 +550,26 @@ Future<Nothing> IOSwitchboard::cleanup(
     return Nothing();
   }
 
+  Option<pid_t> pid = infos[containerId]->pid;
   Future<Option<int>> status = infos[containerId]->status;
 
   infos.erase(containerId);
 
+  // If we have a pid, then we attempt to send it a SIGTERM to have it
+  // shutdown gracefully. This is best effort, as it's likely that the
+  // switchboard has already shutdown in the common case.
+  //
+  // NOTE: There is an unfortunate race condition here. If the io
+  // switchboard terminates and the pid is reused by some other
+  // process, we might be sending SIGTERM to a random process. This
+  // could be a problem under high load.
+  //
+  // TODO(klueska): Send a message over the io switchboard server's
+  // domain socket instead of using a signal.
+  if (pid.isSome()) {
+    os::kill(pid.get(), SIGTERM);
+  }
+
   return status
     .then(defer(self(), [this, containerId]() {
       // Best effort removal of the unix domain socket file created for

http://git-wip-us.apache.org/repos/asf/mesos/blob/ff553e86/src/slave/containerizer/mesos/io/switchboard_main.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/io/switchboard_main.cpp b/src/slave/containerizer/mesos/io/switchboard_main.cpp
index aff6c21..6ce4cdb 100644
--- a/src/slave/containerizer/mesos/io/switchboard_main.cpp
+++ b/src/slave/containerizer/mesos/io/switchboard_main.cpp
@@ -15,17 +15,48 @@
 // limitations under the License.
 
 #include <process/future.hpp>
+#include <process/io.hpp>
 #include <process/owned.hpp>
 
+#include <stout/os.hpp>
 #include <stout/try.hpp>
 
 #include "slave/containerizer/mesos/io/switchboard.hpp"
 
+namespace io = process::io;
+
 using namespace mesos::internal::slave;
 
 using process::Future;
 using process::Owned;
 
+// We use a pipe to forcibly unblock an io switchboard server and
+// cause it to gracefully shutdown after receiving a SIGTERM signal.
+//
+// TODO(klueska): Ideally we would use `libevent` or `libev`s built in
+// support to defer a signal handler to a thread, but we currently
+// don't expose this through libprocess. Once we do expose this, we
+// should change this logic to use it.
+int unblockFds[2];
+
+
+static void sigtermHandler(int sig)
+{
+  int write = -1;
+  do {
+    write = ::write(unblockFds[1], "\0", 1);
+  } while (write == -1 && errno == EINTR);
+
+  ::close(unblockFds[1]);
+
+  if (write == -1) {
+    const char error[] = "Failed to terminate io switchboard gracefully\n";
+    ::write(STDERR_FILENO, error, sizeof(error));
+    ::_exit(EXIT_FAILURE);
+  }
+}
+
+
 int main(int argc, char** argv)
 {
   IOSwitchboardServerFlags flags;
@@ -37,6 +68,17 @@ int main(int argc, char** argv)
     EXIT(EXIT_FAILURE) << flags.usage(load.error());
   }
 
+  Try<Nothing> pipe = os::pipe(unblockFds);
+  if (pipe.isError()) {
+    EXIT(EXIT_FAILURE) << "Failed to create pipe for signaling unblock:"
+                       << " " + pipe.error();
+  }
+
+  if (os::signals::install(SIGTERM, sigtermHandler) != 0) {
+    EXIT(EXIT_FAILURE) << "Failed to register signal"
+                       << " '" + stringify(strsignal(SIGTERM)) << "'";
+  }
+
   Try<Owned<IOSwitchboardServer>> server = IOSwitchboardServer::create(
       flags.tty,
       flags.stdin_to_fd,
@@ -52,6 +94,20 @@ int main(int argc, char** argv)
                           " " << server.error();
   }
 
+  io::poll(unblockFds[0], io::READ)
+    .onAny([server](const Future<short>& future) {
+      os::close(unblockFds[0]);
+
+      if (!future.isReady()) {
+        EXIT(EXIT_FAILURE) << "Failed while polling on 'unblockFds[0]': "
+                           << (future.isFailed() ?
+                               future.failure() :
+                               "discarded");
+      }
+
+      server.get()->unblock();
+    });
+
   Future<Nothing> run = server.get()->run();
   run.await();