You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/10/04 21:52:44 UTC

[4/4] impala git commit: IMPALA-7581: timeout backend tests after 2 hours

IMPALA-7581: timeout backend tests after 2 hours

This will abort the process if a backend test is taking too long, which
we assume is because of a hang. This makes job failures easier to triage
and may make it easier to debug failures if we collect coredumps or
minidumps.

Also disable the death tests for ASAN under the theory that the
probability of the hang is higher than a regular DEBUG build.

Testing:
Reduced the timeout to 5s and confirmed that it worked.

Change-Id: I2e4ef9cb0549ead0bae57b11489f6a4d9b44ef95
Reviewed-on: http://gerrit.cloudera.org:8080/11533
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/93606e60
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/93606e60
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/93606e60

Branch: refs/heads/master
Commit: 93606e6046054526c093975894efc1eef8a53bc1
Parents: d301600
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Sep 26 17:43:51 2018 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Thu Oct 4 21:51:50 2018 +0000

----------------------------------------------------------------------
 be/src/common/init.cc             | 22 ++++++++++++++++++++--
 be/src/testutil/death-test-util.h |  5 ++++-
 be/src/util/test-info.h           |  1 +
 3 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/93606e60/be/src/common/init.cc
----------------------------------------------------------------------
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index 62a33b1..6c02dbf 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -115,6 +115,13 @@ static unique_ptr<impala::Thread> memory_maintenance_thread;
 // time slept. If that exceeds PAUSE_WARN_THRESHOLD_MS, a warning is logged.
 static unique_ptr<impala::Thread> pause_monitor;
 
+// Thread only used in backend tests to implement a test timeout.
+static unique_ptr<impala::Thread> be_timeout_thread;
+
+// Timeout after 2 hours - backend tests should generally run in minutes or tens of
+// minutes at worst.
+static const int64_t BE_TEST_TIMEOUT_S = 60L * 60L * 2L;
+
 [[noreturn]] static void LogMaintenanceThread() {
   while (true) {
     sleep(FLAGS_logbufsecs);
@@ -236,10 +243,21 @@ void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm,
       &LogMaintenanceThread, &log_maintenance_thread);
   if (!thread_spawn_status.ok()) CLEAN_EXIT_WITH_ERROR(thread_spawn_status.GetDetail());
 
-  thread_spawn_status = Thread::Create("common", "pause-monitor",
-      &PauseMonitorLoop, &pause_monitor);
+  thread_spawn_status =
+      Thread::Create("common", "pause-monitor", &PauseMonitorLoop, &pause_monitor);
   if (!thread_spawn_status.ok()) CLEAN_EXIT_WITH_ERROR(thread_spawn_status.GetDetail());
 
+  // Implement timeout for backend tests.
+  if (impala::TestInfo::is_be_test()) {
+    thread_spawn_status = Thread::Create("common", "be-test-timeout-thread",
+        []() {
+          SleepForMs(BE_TEST_TIMEOUT_S * 1000L);
+          LOG(FATAL) << "Backend test timed out after " << BE_TEST_TIMEOUT_S << "s";
+        },
+        &be_timeout_thread);
+    if (!thread_spawn_status.ok()) CLEAN_EXIT_WITH_ERROR(thread_spawn_status.GetDetail());
+  }
+
   PeriodicCounterUpdater::Init();
 
   LOG(INFO) << impala::GetVersionString();

http://git-wip-us.apache.org/repos/asf/impala/blob/93606e60/be/src/testutil/death-test-util.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/death-test-util.h b/be/src/testutil/death-test-util.h
index 474025b..7a4b3f9 100644
--- a/be/src/testutil/death-test-util.h
+++ b/be/src/testutil/death-test-util.h
@@ -24,7 +24,7 @@
 
 // Wrapper around gtest's ASSERT_DEBUG_DEATH that prevents coredumps and minidumps
 // being generated as the result of the death test.
-#ifndef NDEBUG
+#if !defined(NDEBUG) && !defined(ADDRESS_SANITIZER) && !defined(THREAD_SANITIZER)
 #define IMPALA_ASSERT_DEBUG_DEATH(fn, msg)      \
   do {                                          \
     ScopedCoredumpDisabler disable_coredumps;   \
@@ -37,6 +37,9 @@
 // death tests that work in both debug and release builds. To avoid this problem, update
 // our wrapper macro to simply omit the death test expression in release builds, where we
 // can't actually test DCHECKs anyway.
+// Also disable the death tests in ASAN and TSAN builds where we suspect there is a
+// higher risk of hangs because of races with other threads holding locks during fork() -
+// see IMPALA-7581.
 #define IMPALA_ASSERT_DEBUG_DEATH(fn, msg)
 #endif
 

http://git-wip-us.apache.org/repos/asf/impala/blob/93606e60/be/src/util/test-info.h
----------------------------------------------------------------------
diff --git a/be/src/util/test-info.h b/be/src/util/test-info.h
index c2d1a5f..b1ec64d 100644
--- a/be/src/util/test-info.h
+++ b/be/src/util/test-info.h
@@ -33,6 +33,7 @@ class TestInfo {
   /// Called in InitCommonRuntime().
   static void Init(Mode mode) { mode_ = mode; }
 
+  static bool is_be_test() { return mode_ == BE_TEST; }
   static bool is_fe_test() { return mode_ == FE_TEST; }
   static bool is_test() { return mode_ == BE_TEST || mode_ == FE_TEST; }