You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/04/15 04:40:12 UTC

[kudu] 02/02: tablet_server-test: avoid NO_FATALS gtest race

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b4559dbf6d55a7506982e218af069e94cf05237e
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Tue Apr 14 15:58:09 2020 -0700

    tablet_server-test: avoid NO_FATALS gtest race
    
    We saw a failure of the test with the following stack trace:
    
    *** Aborted at 1586816377 (unix time) try "date -d @1586816377" if you are using GNU date ***
    PC: @     0x7f0b851df05a testing::Test::HasFatalFailure()
    *** SIGSEGV (@0x7f0b6be4e010) received by PID 8291 (TID 0x7f0b85cbb400) from PID 1810161680; stack trace: ***
        @           0x4be38d __tsan::CallUserSignalHandler() at /home/jenkins-slave/workspace/kudu-master/3/thirdparty/src/llvm-9.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc:1916
        @           0x4bf47a rtl_sigaction() at /home/jenkins-slave/workspace/kudu-master/3/thirdparty/src/llvm-9.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interceptors.cc:2006
        @     0x7f0b7f999330 (unknown) at ??:0
        @     0x7f0b851df05a testing::Test::HasFatalFailure() at ??:0
        @           0x574687 kudu::tserver::TabletServerMaintenanceMemoryPressureTest_TestDontStarveDMSWhileUnderMemoryPressure_Test::TestBody() at /home/jenkins-slave/workspace/kudu-master/3/src/kudu/tserver/tablet_server-test.cc:?
        @     0x7f0b851fffa0 testing::internal::HandleExceptionsInMethodIfSupported<>() at ??:0
        @     0x7f0b851def39 testing::Test::Run() at ??:0
        @     0x7f0b851e01dd testing::TestInfo::Run() at ??:0
        @     0x7f0b851e0ef7 testing::TestCase::Run() at ??:0
        @     0x7f0b851ed3a7 testing::internal::UnitTestImpl::RunAllTests() at ??:0
        @     0x7f0b85200e50 testing::internal::HandleExceptionsInMethodIfSupported<>() at ??:0
        @     0x7f0b851ecc62 testing::UnitTest::Run() at ??:0
        @     0x7f0b8565c7bc RUN_ALL_TESTS() at ??:0
        @     0x7f0b8565b637 main at ??:0
        @     0x7f0b77360f45 __libc_start_main at ??:0
        @           0x4b6b75 (unknown) at ??:?
    
    I haven't been able to reproduce this, but it seems like NO_FATALS calls
    from a non-main thread may not always play nicely with an
    ASSERT_EVENTUALLY in the main thread, which attempts to capture
    assertion failures.
    
    Change-Id: Icbb1a785606df93042c352ba350e31e9e828c8a9
    Reviewed-on: http://gerrit.cloudera.org:8080/15734
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/tserver/tablet_server-test.cc | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/kudu/tserver/tablet_server-test.cc b/src/kudu/tserver/tablet_server-test.cc
index fab5ad4..b4d3dc6 100644
--- a/src/kudu/tserver/tablet_server-test.cc
+++ b/src/kudu/tserver/tablet_server-test.cc
@@ -101,7 +101,6 @@
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/crc.h"
 #include "kudu/util/curl_util.h"
-#include "kudu/util/debug/sanitizer_scopes.h"
 #include "kudu/util/env.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/hdr_histogram.h"
@@ -940,10 +939,6 @@ TEST_F(TabletServerMaintenanceMemoryPressureTest, TestDontStarveDMSWhileUnderMem
   thread insert_thread([&] {
     int cur_row = 2;
     while (keep_inserting) {
-      // Ignore TSAN warnings that complain about a race in gtest between this
-      // check for fatal failures and the check for fatal failures in the below
-      // AssertEventually.
-      debug::ScopedTSANIgnoreReadsAndWrites ignore_tsan;
       NO_FATALS(InsertTestRowsDirect(cur_row++, 1));
     }
   });
@@ -961,9 +956,16 @@ TEST_F(TabletServerMaintenanceMemoryPressureTest, TestDontStarveDMSWhileUnderMem
   // since it anchors WALs.
   scoped_refptr<Histogram> dms_flushes =
       METRIC_flush_dms_duration.Instantiate(tablet_replica_->tablet()->GetMetricEntity());
-  ASSERT_EVENTUALLY([&] {
-    ASSERT_EQ(1, dms_flushes->histogram()->TotalCount());
-  });
+  // NOTE: we don't use ASSERT_EVENTUALLY because gtest may race with the
+  // NO_FATALS call in the inserter thread.
+  constexpr int kTimeoutSecs = 30;
+  const MonoTime deadline = MonoTime::Now() + MonoDelta::FromSeconds(kTimeoutSecs);
+  while (dms_flushes->histogram()->TotalCount() < 1) {
+    if (MonoTime::Now() > deadline) {
+      FAIL() << Substitute("Didn't flush DMS in $0 seconds", kTimeoutSecs);
+    }
+    SleepFor(MonoDelta::FromMilliseconds(100));
+  }
 }
 
 // Regression test for KUDU-2929. Previously, when under memory pressure, we