You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/04/07 14:52:09 UTC
[kudu] 01/03: [txn_participant-test] deflake
TxnParticipantTest.TestConcurrentOps
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 83c97c0e35b8bd7a2f871e18fec685355dbbbd9b
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Tue Apr 6 21:34:54 2021 -0700
[txn_participant-test] deflake TxnParticipantTest.TestConcurrentOps
The test started failing after 6be9794f91a2eebb291e4db2daf63a0f12c3ae2a
since one of the tested invariants is no longer true: in a
randomly-ordered sequence of participant ops, the BEGIN_TXN op is no
longer guaranteed to succeed. Instead, either it or the ABORT_TXN op
must have succeeded.
This test failed pretty quickly when looping it. With this fix, I looped
it 1000 times and it passed.
Change-Id: I58776295a359922a1b72c4286bd8b78f36ea50bd
Reviewed-on: http://gerrit.cloudera.org:8080/17278
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Andrew Wong <aw...@cloudera.com>
---
src/kudu/tablet/txn_participant-test.cc | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/src/kudu/tablet/txn_participant-test.cc b/src/kudu/tablet/txn_participant-test.cc
index 4e54f31..f21b698 100644
--- a/src/kudu/tablet/txn_participant-test.cc
+++ b/src/kudu/tablet/txn_participant-test.cc
@@ -50,6 +50,7 @@
#include "kudu/consensus/raft_consensus.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/substitute.h"
#include "kudu/tablet/ops/op.h"
#include "kudu/tablet/ops/op_driver.h"
#include "kudu/tablet/ops/op_tracker.h"
@@ -83,6 +84,7 @@ using std::string;
using std::thread;
using std::unique_ptr;
using std::vector;
+using strings::Substitute;
DECLARE_bool(enable_maintenance_manager);
DECLARE_bool(log_preallocate_segments);
@@ -410,22 +412,27 @@ TEST_F(TxnParticipantTest, TestConcurrentOps) {
const auto status_for_op = [&] (ParticipantOpPB::ParticipantOpType type) {
return statuses[FindOrDie(kIndexByOps, type)];
};
- // Regardless of order, we should have been able to begin the transaction.
- ASSERT_OK(status_for_op(ParticipantOpPB::BEGIN_TXN));
+ // The only way we could have failed to begin a transaction is if we
+ // replicated an ABORT_TXN first.
+ ASSERT_TRUE(status_for_op(ParticipantOpPB::BEGIN_TXN).ok() ||
+ status_for_op(ParticipantOpPB::ABORT_TXN).ok()) <<
+ Substitute("BEGIN_TXN error: $0, ABORT_TXN error: $1",
+ status_for_op(ParticipantOpPB::BEGIN_TXN).ToString(),
+ status_for_op(ParticipantOpPB::ABORT_TXN).ToString());
// If we finalized the commit, we must not have been able to abort.
if (status_for_op(ParticipantOpPB::FINALIZE_COMMIT).ok()) {
ASSERT_EQ(vector<TxnParticipant::TxnEntry>({
{ kTxnId, kCommitted, kDummyCommitTimestamp },
}), txn_participant()->GetTxnsForTests());
- ASSERT_FALSE(statuses[FindOrDie(kIndexByOps, ParticipantOpPB::ABORT_TXN)].ok());
+ ASSERT_FALSE(status_for_op(ParticipantOpPB::ABORT_TXN).ok());
// If we aborted the commit, we could not have finalized the commit.
} else if (status_for_op(ParticipantOpPB::ABORT_TXN).ok()) {
ASSERT_EQ(vector<TxnParticipant::TxnEntry>({
{ kTxnId, kAborted, -1 },
}), txn_participant()->GetTxnsForTests());
- ASSERT_FALSE(statuses[FindOrDie(kIndexByOps, ParticipantOpPB::FINALIZE_COMMIT)].ok());
+ ASSERT_FALSE(status_for_op(ParticipantOpPB::FINALIZE_COMMIT).ok());
// If we neither aborted nor finalized, but we began to commit, we should be
// left with the commit in progress.