You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/10/26 01:50:30 UTC

impala git commit: IMPALA-7714: remove unsafe code from signal handlers

Repository: impala
Updated Branches:
  refs/heads/master 1dbb3c1be -> 93ee538c5


IMPALA-7714: remove unsafe code from signal handlers

IMPALA-6271 added LOG statements to some signal handlers and an exit()
call to a different signal handler. These functions are not async-signal
safe.

The fixes are:
* Use the write system call directly. I tried using glog's RAW_LOG
  functionality but had major issues getting it to work.
* Call _exit() directly instead of exit() so that static destructors
  are not run. This is the same default behaviour as SIGTERM. This
  wans't necessary to prevent this specific crash.

Testing:
Could reproduce the crash by looping
tests/custom_cluster/test_local_catalog.py until a minidump was
produced. After this fix it did not reproduce after looping for
a few hours.

Ran exhaustive build.

Change-Id: I52037d6510b9b34ec33d3a8b5492226aee1b9d92
Reviewed-on: http://gerrit.cloudera.org:8080/11777
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/93ee538c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/93ee538c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/93ee538c

Branch: refs/heads/master
Commit: 93ee538c5403509804cb18411e4407352f5b242b
Parents: 1dbb3c1
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Oct 24 17:06:33 2018 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Fri Oct 26 01:50:16 2018 +0000

----------------------------------------------------------------------
 be/src/common/init.cc                 | 9 ++++++---
 be/src/util/minidump.cc               | 4 ++--
 tests/custom_cluster/test_breakpad.py | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/93ee538c/be/src/common/init.cc
----------------------------------------------------------------------
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index b064485..fcd6971 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -20,6 +20,7 @@
 #include <csignal>
 #include <gperftools/heap-profiler.h>
 #include <gperftools/malloc_extension.h>
+#include <third_party/lss/linux_syscall_support.h>
 
 #include "common/global-flags.h"
 #include "common/logging.h"
@@ -187,9 +188,11 @@ static void PauseMonitorLoop() {
 
 // Signal handler for SIGTERM, that prints the message before doing an exit.
 [[noreturn]] static void HandleSigTerm(int signum, siginfo_t* info, void* context) {
-  LOG(INFO) << "Caught signal: SIGTERM. Daemon will exit. Sender UID: " << info->si_uid
-            << ", PID: " << info->si_pid;
-  exit(0);
+  const char* msg = "Caught signal: SIGTERM. Daemon will exit.\n";
+  sys_write(STDOUT_FILENO, msg, strlen(msg));
+  // _exit() is async signal safe and is equivalent to the behaviour of the default
+  // SIGTERM handler. exit() can run arbitrary code and is *not* safe to use here.
+  _exit(0);
 }
 
 void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,

http://git-wip-us.apache.org/repos/asf/impala/blob/93ee538c/be/src/util/minidump.cc
----------------------------------------------------------------------
diff --git a/be/src/util/minidump.cc b/be/src/util/minidump.cc
index 99d4b92..f09439f 100644
--- a/be/src/util/minidump.cc
+++ b/be/src/util/minidump.cc
@@ -98,8 +98,8 @@ static bool DumpCallback(const google_breakpad::MinidumpDescriptor& descriptor,
 
 /// Signal handler to write a minidump file outside of crashes.
 static void HandleSignal(int signum, siginfo_t* info, void* context) {
-  LOG(INFO) << "Caught signal: SIGUSR1. Sender UID: " << info->si_uid
-            << ", PID: " << info->si_pid;
+  const char* msg = "Caught signal: SIGUSR1\n";
+  sys_write(STDOUT_FILENO, msg, strlen(msg));
   minidump_exception_handler->WriteMinidump(FLAGS_minidump_path, DumpCallback, NULL);
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/93ee538c/tests/custom_cluster/test_breakpad.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index 661b6a4..ed3e427 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -250,7 +250,7 @@ class TestBreakpadExhaustive(TestBreakpadBase):
     uid = os.getuid()
     # There should be a SIGTERM message in the log now
     # since we raised one above.
-    log_str = 'Caught signal: SIGTERM. Daemon will exit. Sender UID: ' + str(uid)
+    log_str = 'Caught signal: SIGTERM. Daemon will exit.'
     self.assert_impalad_log_contains('INFO', log_str, expected_count=1)
 
   @pytest.mark.execute_serially