You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2016/12/07 00:07:44 UTC
[2/6] mesos git commit: Added SIGTERM handler to gracefully shutdown
IOSwitchboard server.
Added SIGTERM handler to gracefully shutdown IOSwitchboard server.
When receiving a SIGTERM, the io switchboard process will forcibly
unblock the server from waiting on a connection before attempting to
drain its `stdoutFromFd` and `stderrFromFd` file descriptors. Once
these fds are drained (or they become invalid), the server will shut
itself down as per the normal exit route.
Review: https://reviews.apache.org/r/54442/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ff553e86
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ff553e86
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ff553e86
Branch: refs/heads/master
Commit: ff553e86ade41a47121e6aaf8a5e5fdee1b4e9e6
Parents: 85f0c46
Author: Kevin Klues <kl...@gmail.com>
Authored: Tue Dec 6 15:21:45 2016 -0800
Committer: Jie Yu <yu...@gmail.com>
Committed: Tue Dec 6 15:21:45 2016 -0800
----------------------------------------------------------------------
.../containerizer/mesos/io/switchboard.cpp | 16 ++++++
.../containerizer/mesos/io/switchboard_main.cpp | 56 ++++++++++++++++++++
2 files changed, 72 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/ff553e86/src/slave/containerizer/mesos/io/switchboard.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/io/switchboard.cpp b/src/slave/containerizer/mesos/io/switchboard.cpp
index e0627cb..98f6d95 100644
--- a/src/slave/containerizer/mesos/io/switchboard.cpp
+++ b/src/slave/containerizer/mesos/io/switchboard.cpp
@@ -550,10 +550,26 @@ Future<Nothing> IOSwitchboard::cleanup(
return Nothing();
}
+ Option<pid_t> pid = infos[containerId]->pid;
Future<Option<int>> status = infos[containerId]->status;
infos.erase(containerId);
+ // If we have a pid, then we attempt to send it a SIGTERM to have it
+ // shutdown gracefully. This is best effort, as it's likely that the
+ // switchboard has already shutdown in the common case.
+ //
+ // NOTE: There is an unfortunate race condition here. If the io
+ // switchboard terminates and the pid is reused by some other
+ // process, we might be sending SIGTERM to a random process. This
+ // could be a problem under high load.
+ //
+ // TODO(klueska): Send a message over the io switchboard server's
+ // domain socket instead of using a signal.
+ if (pid.isSome()) {
+ os::kill(pid.get(), SIGTERM);
+ }
+
return status
.then(defer(self(), [this, containerId]() {
// Best effort removal of the unix domain socket file created for
http://git-wip-us.apache.org/repos/asf/mesos/blob/ff553e86/src/slave/containerizer/mesos/io/switchboard_main.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/io/switchboard_main.cpp b/src/slave/containerizer/mesos/io/switchboard_main.cpp
index aff6c21..6ce4cdb 100644
--- a/src/slave/containerizer/mesos/io/switchboard_main.cpp
+++ b/src/slave/containerizer/mesos/io/switchboard_main.cpp
@@ -15,17 +15,48 @@
// limitations under the License.
#include <process/future.hpp>
+#include <process/io.hpp>
#include <process/owned.hpp>
+#include <stout/os.hpp>
#include <stout/try.hpp>
#include "slave/containerizer/mesos/io/switchboard.hpp"
+namespace io = process::io;
+
using namespace mesos::internal::slave;
using process::Future;
using process::Owned;
+// We use a pipe to forcibly unblock an io switchboard server and
+// cause it to gracefully shutdown after receiving a SIGTERM signal.
+//
+// TODO(klueska): Ideally we would use `libevent` or `libev`s built in
+// support to defer a signal handler to a thread, but we currently
+// don't expose this through libprocess. Once we do expose this, we
+// should change this logic to use it.
+int unblockFds[2];
+
+
+static void sigtermHandler(int sig)
+{
+ int write = -1;
+ do {
+ write = ::write(unblockFds[1], "\0", 1);
+ } while (write == -1 && errno == EINTR);
+
+ ::close(unblockFds[1]);
+
+ if (write == -1) {
+ const char error[] = "Failed to terminate io switchboard gracefully\n";
+ ::write(STDERR_FILENO, error, sizeof(error));
+ ::_exit(EXIT_FAILURE);
+ }
+}
+
+
int main(int argc, char** argv)
{
IOSwitchboardServerFlags flags;
@@ -37,6 +68,17 @@ int main(int argc, char** argv)
EXIT(EXIT_FAILURE) << flags.usage(load.error());
}
+ Try<Nothing> pipe = os::pipe(unblockFds);
+ if (pipe.isError()) {
+ EXIT(EXIT_FAILURE) << "Failed to create pipe for signaling unblock:"
+ << " " + pipe.error();
+ }
+
+ if (os::signals::install(SIGTERM, sigtermHandler) != 0) {
+ EXIT(EXIT_FAILURE) << "Failed to register signal"
+ << " '" + stringify(strsignal(SIGTERM)) << "'";
+ }
+
Try<Owned<IOSwitchboardServer>> server = IOSwitchboardServer::create(
flags.tty,
flags.stdin_to_fd,
@@ -52,6 +94,20 @@ int main(int argc, char** argv)
" " << server.error();
}
+ io::poll(unblockFds[0], io::READ)
+ .onAny([server](const Future<short>& future) {
+ os::close(unblockFds[0]);
+
+ if (!future.isReady()) {
+ EXIT(EXIT_FAILURE) << "Failed while polling on 'unblockFds[0]': "
+ << (future.isFailed() ?
+ future.failure() :
+ "discarded");
+ }
+
+ server.get()->unblock();
+ });
+
Future<Nothing> run = server.get()->run();
run.await();