You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2021/04/09 00:35:55 UTC

[kudu] branch master updated: KUDU-3268: fix race between MM scheduling and unregistration

This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 6aeb466  KUDU-3268: fix race between MM scheduling and unregistration
6aeb466 is described below

commit 6aeb466c538d8cc566031c4a82200f18f088e18a
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Thu Apr 8 14:39:54 2021 -0700

    KUDU-3268: fix race between MM scheduling and unregistration
    
    Following commit 9e4664d the following race was possible (S: scheduler thread,
    T: another thread):
      S: finds best op is op1
      T: unregisters op1 because the tablet is being shut down
      T: takes running_instances_lock_
        - waits for running count to go to 0
        - destructs op1
      S: takes running_instances_lock_
        - increments running count to 1
      S: attempts to run op1 but segfaults
    
    This resulted in a crash like the following, when shutting down the tserver
    (though it seems possible this could manifest itself when deleting a
    TabletReplica):
    PC: @     0x7ff97596de0d kudu::MaintenanceManager::LaunchOp()
    *** SIGSEGV (@0x30) received by PID 21067 (TID 0x7ff96343b700) from PID 48; stack trace: ***
        @     0x7ff976846980 (unknown) at ??:0
        @     0x7ff97596de0d kudu::MaintenanceManager::LaunchOp() at ??:0
        @     0x7ff97596b538 _ZZN4kudu18MaintenanceManager18RunSchedulerThreadEvENKUlvE_clEv at ??:0
        @     0x7ff97596f124 _ZNSt17_Function_handlerIFvvEZN4kudu18MaintenanceManager18RunSchedulerThreadEvEUlvE_E9_M_invokeERKSt9_Any_data at ??:0
        @     0x7ff977e2bcf4 std::function<>::operator()() at ??:0
        @     0x7ff975a05e6e kudu::ThreadPool::DispatchThread() at ??:0
        @     0x7ff975a06757 _ZZN4kudu10ThreadPool12CreateThreadEvENKUlvE_clEv at ??:0
        @     0x7ff975a07e7b _ZNSt17_Function_handlerIFvvEZN4kudu10ThreadPool12CreateThreadEvEUlvE_E9_M_invokeERKSt9_Any_data at ??:0
        @     0x7ff977e2bcf4 std::function<>::operator()() at ??:0
        @     0x7ff9759f8913 kudu::Thread::SuperviseThread() at ??:0
        @     0x7ff97683b6db start_thread at ??:0
        @     0x7ff97388e71f clone at ??:0
    
    This patch fixes this by checking whether the op has been cancelled, while
    holding running_instanes_lock_, before proceeding with the scheduling. I added
    a new test that failed consistently within 100 runs, that passed 5000 times
    locally.
    
    Change-Id: I29ad4d545de8166832b7c4148880a7e8719353bf
    Reviewed-on: http://gerrit.cloudera.org:8080/17291
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/util/maintenance_manager-test.cc | 18 ++++++++++++++++--
 src/kudu/util/maintenance_manager.cc      |  9 +++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/kudu/util/maintenance_manager-test.cc b/src/kudu/util/maintenance_manager-test.cc
index 0fc5d80..de78f96 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -389,8 +389,8 @@ TEST_F(MaintenanceManagerTest, TestNewOpsDontGetScheduledDuringUnregister) {
 
   // Wait until two instances of the ops start running, since we have two MM threads.
   ASSERT_EVENTUALLY([&]() {
-      ASSERT_EQ(op1.RunningGauge()->value(), 2);
-    });
+    ASSERT_EQ(op1.RunningGauge()->value(), 2);
+  });
 
   // Trigger Unregister while they are running. This should wait for the currently-
   // running operations to complete, but no new operations should be scheduled.
@@ -885,4 +885,18 @@ TEST_F(MaintenanceManagerTest, ManyOperationsHeavyUpdateStats) {
                           ops.front()->update_stats_count());
 }
 
+// Regression test for KUDU-3268, where the unregistering and destruction of an
+// op may race with the scheduling of that op, resulting in a segfault.
+TEST_F(MaintenanceManagerTest, TestUnregisterWhileScheduling) {
+  TestMaintenanceOp op1("1", MaintenanceOp::HIGH_IO_USAGE);
+  op1.set_perf_improvement(10);
+  // Set a bunch of runs so we continually schedule the op.
+  op1.set_remaining_runs(1000000);
+  manager_->RegisterOp(&op1);
+  ASSERT_EVENTUALLY([&]() {
+    ASSERT_GE(op1.DurationHistogram()->TotalCount(), 1);
+  });
+  op1.Unregister();
+}
+
 } // namespace kudu
diff --git a/src/kudu/util/maintenance_manager.cc b/src/kudu/util/maintenance_manager.cc
index 64a361a..deeaf2f 100644
--- a/src/kudu/util/maintenance_manager.cc
+++ b/src/kudu/util/maintenance_manager.cc
@@ -348,7 +348,16 @@ void MaintenanceManager::RunSchedulerThread() {
         op_note = std::move(best_op_and_why.second);
       }
       if (op) {
+        // While 'running_instances_lock_' is held, check one more time for
+        // whether the op is cancelled. This ensures that we don't attempt to
+        // launch an op that has been destructed in UnregisterOp(). See
+        // KUDU-3268 for more details.
         std::lock_guard<Mutex> guard(running_instances_lock_);
+        if (op->cancelled()) {
+          VLOG_AND_TRACE_WITH_PREFIX("maintenance", 2)
+              << "picked maintenance operation that has been cancelled";
+          continue;
+        }
         IncreaseOpCount(op);
         prev_iter_found_no_work = false;
       } else {