You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2021/04/30 17:29:38 UTC

[kudu] 01/05: KUDU-2612: fix race when adopting partition lock

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 4c53ce892a083ff660f4b7af2374fd6b273329f8
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Wed Apr 28 21:26:41 2021 -0700

    KUDU-2612: fix race when adopting partition lock
    
    The way we were transferring the partition lock to the Txn wasn't
    thread-safe. This patch wraps the critical section with a spinlock.
    
    This patch includes a test that would trigger a TSAN error.
    
    Change-Id: Ifc7ac7f474baf308860847298355b300c76d9ef5
    Reviewed-on: http://gerrit.cloudera.org:8080/17361
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/tablet/txn_participant-test.cc | 20 ++++++++++++++++++++
 src/kudu/tablet/txn_participant.cc      |  1 +
 src/kudu/tablet/txn_participant.h       |  2 ++
 3 files changed, 23 insertions(+)

diff --git a/src/kudu/tablet/txn_participant-test.cc b/src/kudu/tablet/txn_participant-test.cc
index cedcebd..41ceb44 100644
--- a/src/kudu/tablet/txn_participant-test.cc
+++ b/src/kudu/tablet/txn_participant-test.cc
@@ -39,6 +39,7 @@
 #include "kudu/common/iterator.h"
 #include "kudu/common/partial_row.h"
 #include "kudu/common/row_operations.h"
+#include "kudu/common/row_operations.pb.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/timestamp.h"
 #include "kudu/common/wire_protocol.h"
@@ -389,6 +390,25 @@ TEST_F(TxnParticipantTest, TestConcurrentTransactions) {
   }
 }
 
+TEST_F(TxnParticipantTest, TestConcurrentWrites) {
+  constexpr const auto kNumRows = 10;
+  constexpr const auto kTxnId = 0;
+  ParticipantResponsePB resp;
+  ASSERT_OK(CallParticipantOp(tablet_replica_.get(), kTxnId,
+                              ParticipantOpPB::BEGIN_TXN, -1, &resp));
+  vector<thread> threads;
+  Status statuses[kNumRows];
+  for (int i = 0; i < kNumRows; i++) {
+    threads.emplace_back([&, i] {
+      statuses[i] = Write(i, kTxnId);
+    });
+  }
+  std::for_each(threads.begin(), threads.end(), [] (thread& t) { t.join(); });
+  for (const auto& s : statuses) {
+    EXPECT_OK(s);
+  }
+}
+
 // Concurrently try to apply every op and test, based on the results, that some
 // invariants are maintained.
 TEST_F(TxnParticipantTest, TestConcurrentOps) {
diff --git a/src/kudu/tablet/txn_participant.cc b/src/kudu/tablet/txn_participant.cc
index 4a63d53..1f72eb5 100644
--- a/src/kudu/tablet/txn_participant.cc
+++ b/src/kudu/tablet/txn_participant.cc
@@ -77,6 +77,7 @@ void Txn::AcquireReadLock(shared_lock<rw_semaphore>* txn_lock) {
 void Txn::AdoptPartitionLock(ScopedPartitionLock partition_lock) {
   if (PREDICT_TRUE(FLAGS_enable_txn_partition_lock)) {
     TabletServerErrorPB::Code code = tserver::TabletServerErrorPB::UNKNOWN_ERROR;
+    std::lock_guard<simple_spinlock> l(lock_);
 #ifndef NDEBUG
     CHECK(partition_lock.IsAcquired(&code)) << code;
     if (partition_lock_.IsAcquired(&code)) {
diff --git a/src/kudu/tablet/txn_participant.h b/src/kudu/tablet/txn_participant.h
index 92b7b00..43800ae 100644
--- a/src/kudu/tablet/txn_participant.h
+++ b/src/kudu/tablet/txn_participant.h
@@ -265,6 +265,7 @@ class Txn : public RefCountedThreadSafe<Txn> {
   }
 
   void ReleasePartitionLock() {
+    std::lock_guard<simple_spinlock> l(lock_);
     partition_lock_.Release();
   }
 
@@ -358,6 +359,7 @@ class Txn : public RefCountedThreadSafe<Txn> {
   std::unique_ptr<ScopedOp> commit_op_;
 
   // Holds the partition lock acquired for this transaction.
+  simple_spinlock lock_;
   ScopedPartitionLock partition_lock_;
 
   DISALLOW_COPY_AND_ASSIGN(Txn);