You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2016/12/11 16:34:34 UTC

kudu git commit: Make delete_table-test less flaky

Repository: kudu
Updated Branches:
  refs/heads/master e018a837c -> 2d74f7c06


Make delete_table-test less flaky

This patch reduces the flakiness of delete_table-test by solving
flakiness problems on two individual tests:

1. DeleteTableTest.TestAutoTombstoneAfterTabletCopyRemoteFails

This test triggers a remote crash that would sometimes prevent an RPC
response from arriving at the caller. We now account for this.

2. DeleteTableTombstonedParamTest.TestTabletTombstone

This test would sometimes not write quickly enough, resulting in a
timeout waiting for 3 wal files to appear on tablet 0. This could occur
when running under TSAN on a system under heavy load.

Now, the above two tests run successfully in 200/200 attempts on TSAN
under load.

This patch does not address the flakiness of
DeleteTableTest.TestDeleteTableWithConcurrentWrites yet. That test has
two issues that are being tracked as KUDU-1294 and KUDU-1797.

However a log message has been added to this commit to aid in debugging
that test in a later patch.

Change-Id: Iea7760009acc8a795225369721af97583974feba
Reviewed-on: http://gerrit.cloudera.org:8080/5421
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2d74f7c0
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2d74f7c0
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2d74f7c0

Branch: refs/heads/master
Commit: 2d74f7c062f6795c6be811d8c8bb9ed17f0240d1
Parents: e018a83
Author: Mike Percy <mp...@apache.org>
Authored: Tue Dec 6 17:00:30 2016 +0000
Committer: Todd Lipcon <to...@apache.org>
Committed: Sun Dec 11 14:56:00 2016 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/delete_table-test.cc | 23 ++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2d74f7c0/src/kudu/integration-tests/delete_table-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/delete_table-test.cc b/src/kudu/integration-tests/delete_table-test.cc
index 5f0f58b..a2f6091 100644
--- a/src/kudu/integration-tests/delete_table-test.cc
+++ b/src/kudu/integration-tests/delete_table-test.cc
@@ -376,6 +376,7 @@ TEST_F(DeleteTableTest, TestDeleteTableWithConcurrentWrites) {
   NO_FATALS(StartCluster());
   int n_iters = AllowSlowTests() ? 20 : 1;
   for (int i = 0; i < n_iters; i++) {
+    LOG(INFO) << "Running iteration " << i;
     TestWorkload workload(cluster_.get());
     workload.set_table_name(Substitute("table-$0", i));
 
@@ -630,7 +631,9 @@ TEST_F(DeleteTableTest, TestAutoTombstoneAfterTabletCopyRemoteFails) {
   ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
   TServerDetails* leader = ts_map_[kLeaderUuid];
   TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()];
-  ASSERT_OK(itest::AddServer(leader, tablet_id, ts, RaftPeerPB::VOTER, boost::none, kTimeout));
+  // The server may crash before responding to our RPC.
+  Status s = itest::AddServer(leader, tablet_id, ts, RaftPeerPB::VOTER, boost::none, kTimeout);
+  ASSERT_TRUE(s.ok() || s.IsNetworkError()) << s.ToString();
   NO_FATALS(WaitForTSToCrash(kLeaderIndex));
 
   // The tablet server will detect that the leader failed, and automatically
@@ -1146,7 +1149,11 @@ class DeleteTableTombstonedParamTest : public DeleteTableTest,
 //    (transition from TABLET_DATA_TOMBSTONED to TABLET_DATA_DELETED).
 TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
   vector<string> flags;
-  flags.push_back("--log_segment_size_mb=1"); // Faster log rolls.
+  // We want fast log rolls and deterministic preallocation, since we wait for
+  // a certain number of logs at the beginning of the test.
+  flags.push_back("--log_segment_size_mb=1");
+  flags.push_back("--log_async_preallocate_segments=false");
+  flags.push_back("--log_min_segments_to_retain=3");
   NO_FATALS(StartCluster(flags));
   const string fault_flag = GetParam();
   LOG(INFO) << "Running with fault flag: " << fault_flag;
@@ -1173,6 +1180,8 @@ TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
 
   // Start a workload on the cluster, and run it until we find WALs on disk.
   TestWorkload workload(cluster_.get());
+  workload.set_payload_bytes(32 * 1024); // Write ops of size 32KB to quickly fill the logs.
+  workload.set_write_batch_size(1);
   workload.Setup();
 
   // The table should have 2 tablets (1 split) on all 3 tservers (for a total of 6).
@@ -1185,6 +1194,8 @@ TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
   vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
   ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
 
+  LOG(INFO) << "Starting workload...";
+
   // Run the workload against whoever the leader is until WALs appear on TS 0
   // for the tablets we created.
   const int kTsIndex = 0; // Index of the tablet server we'll use for the test.
@@ -1192,10 +1203,18 @@ TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
   while (workload.rows_inserted() < 100) {
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
+
+  LOG(INFO) << "Waiting for 3 wal files for tablet "
+            << tablets[0].tablet_status().tablet_id() << "...";
   ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
             tablets[0].tablet_status().tablet_id(), 3));
+
+  LOG(INFO) << "Waiting for 3 wal files for tablet "
+            << tablets[1].tablet_status().tablet_id() << "...";
   ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
             tablets[1].tablet_status().tablet_id(), 3));
+
+  LOG(INFO) << "Stopping workload...";
   workload.StopAndJoin();
 
   // Shut down the master and the other tablet servers so they don't interfere