You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/04/07 14:52:09 UTC

[kudu] 01/03: [txn_participant-test] deflake TxnParticipantTest.TestConcurrentOps

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 83c97c0e35b8bd7a2f871e18fec685355dbbbd9b
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Tue Apr 6 21:34:54 2021 -0700

    [txn_participant-test] deflake TxnParticipantTest.TestConcurrentOps
    
    The test started failing after 6be9794f91a2eebb291e4db2daf63a0f12c3ae2a
    since one of the tested invariants is no longer true: in a
    randomly-ordered sequence of participant ops, the BEGIN_TXN op is no
    longer guaranteed to succeed. Instead, either it or the ABORT_TXN op
    must have succeeded.
    
    This test failed pretty quickly when looping it. With this fix, I looped
    it 1000 times and it passed.
    
    Change-Id: I58776295a359922a1b72c4286bd8b78f36ea50bd
    Reviewed-on: http://gerrit.cloudera.org:8080/17278
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/tablet/txn_participant-test.cc | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/kudu/tablet/txn_participant-test.cc b/src/kudu/tablet/txn_participant-test.cc
index 4e54f31..f21b698 100644
--- a/src/kudu/tablet/txn_participant-test.cc
+++ b/src/kudu/tablet/txn_participant-test.cc
@@ -50,6 +50,7 @@
 #include "kudu/consensus/raft_consensus.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/tablet/ops/op.h"
 #include "kudu/tablet/ops/op_driver.h"
 #include "kudu/tablet/ops/op_tracker.h"
@@ -83,6 +84,7 @@ using std::string;
 using std::thread;
 using std::unique_ptr;
 using std::vector;
+using strings::Substitute;
 
 DECLARE_bool(enable_maintenance_manager);
 DECLARE_bool(log_preallocate_segments);
@@ -410,22 +412,27 @@ TEST_F(TxnParticipantTest, TestConcurrentOps) {
   const auto status_for_op = [&] (ParticipantOpPB::ParticipantOpType type) {
     return statuses[FindOrDie(kIndexByOps, type)];
   };
-  // Regardless of order, we should have been able to begin the transaction.
-  ASSERT_OK(status_for_op(ParticipantOpPB::BEGIN_TXN));
+  // The only way we could have failed to begin a transaction is if we
+  // replicated an ABORT_TXN first.
+  ASSERT_TRUE(status_for_op(ParticipantOpPB::BEGIN_TXN).ok() ||
+              status_for_op(ParticipantOpPB::ABORT_TXN).ok()) <<
+      Substitute("BEGIN_TXN error: $0, ABORT_TXN error: $1",
+                 status_for_op(ParticipantOpPB::BEGIN_TXN).ToString(),
+                 status_for_op(ParticipantOpPB::ABORT_TXN).ToString());
 
   // If we finalized the commit, we must not have been able to abort.
   if (status_for_op(ParticipantOpPB::FINALIZE_COMMIT).ok()) {
     ASSERT_EQ(vector<TxnParticipant::TxnEntry>({
         { kTxnId, kCommitted, kDummyCommitTimestamp },
     }), txn_participant()->GetTxnsForTests());
-    ASSERT_FALSE(statuses[FindOrDie(kIndexByOps, ParticipantOpPB::ABORT_TXN)].ok());
+    ASSERT_FALSE(status_for_op(ParticipantOpPB::ABORT_TXN).ok());
 
   // If we aborted the commit, we could not have finalized the commit.
   } else if (status_for_op(ParticipantOpPB::ABORT_TXN).ok()) {
     ASSERT_EQ(vector<TxnParticipant::TxnEntry>({
         { kTxnId, kAborted, -1 },
     }), txn_participant()->GetTxnsForTests());
-    ASSERT_FALSE(statuses[FindOrDie(kIndexByOps, ParticipantOpPB::FINALIZE_COMMIT)].ok());
+    ASSERT_FALSE(status_for_op(ParticipantOpPB::FINALIZE_COMMIT).ok());
 
   // If we neither aborted nor finalized, but we began to commit, we should be
   // left with the commit in progress.