You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/07/18 23:13:07 UTC

[3/3] incubator-kudu git commit: Fix flaky disk_reservation-itest

Fix flaky disk_reservation-itest

There are two fixes in this patch for two separate types of failures
seen on Jenkins for this test:

1. Fix a data race in DiskReservationITest.TestFillMultipleDisks

We can't override gflag strings at runtime in a thread-safe manner,
although this test was attempting to.

Take what used to be a single parsed string gflag and replace it with 2
path strings and 2 integer overrides, one for each path. That makes 4
new test-only gflags total. Only the integer flags are modified at
runtime.

2. Fix a startup race between the TestWorkload client thread and
   SetFlags() in DiskReservationITest.TestWalWriteToFullDiskAborts

We need to wait for some rows to be written after starting up the
TestWorkload threads in TestWalWriteToFullDiskAborts before we allow the
TS to crash by setting gflags. If we don't, the test gets confused
because the TestWorkload client thread may not be able to resolve where
the tablet is located. The previous failures were because we sometimes
managed to crash the TS before it sent its tablet report to the master.

After applying these changes, I looped disk_reservation-itest 1000x in
TSAN mode and got no failures.

Change-Id: Ica86390b49d459e079807d777e97c47fa35134d1
Reviewed-on: http://gerrit.cloudera.org:8080/3652
Tested-by: Mike Percy <mp...@apache.org>
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/2521554b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/2521554b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/2521554b

Branch: refs/heads/master
Commit: 2521554b3f30b8711519c20292b933f15ce2a529
Parents: 0a10606
Author: Mike Percy <mp...@apache.org>
Authored: Thu Jul 14 01:51:36 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Mon Jul 18 23:12:12 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/disk_reservation-itest.cc | 34 +++++++-----
 src/kudu/util/env_util.cc                       | 56 ++++++++++++++------
 2 files changed, 61 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/2521554b/src/kudu/integration-tests/disk_reservation-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/disk_reservation-itest.cc b/src/kudu/integration-tests/disk_reservation-itest.cc
index 6ed6f30..41cf999 100644
--- a/src/kudu/integration-tests/disk_reservation-itest.cc
+++ b/src/kudu/integration-tests/disk_reservation-itest.cc
@@ -64,6 +64,11 @@ TEST_F(DiskReservationITest, TestFillMultipleDisks) {
   ts_flags.push_back("--disable_core_dumps");
   ts_flags.push_back(Substitute("--fs_data_dirs=$0/a,$0/b",
                                 GetTestDataDirectory()));
+  ts_flags.push_back(Substitute("--disk_reserved_override_prefix_1_path_for_testing=$0/a",
+                                GetTestDataDirectory()));
+  ts_flags.push_back(Substitute("--disk_reserved_override_prefix_2_path_for_testing=$0/b",
+                                GetTestDataDirectory()));
+
   NO_FATALS(StartCluster(ts_flags, {}, 1));
 
   TestWorkload workload(cluster_.get());
@@ -86,12 +91,13 @@ TEST_F(DiskReservationITest, TestFillMultipleDisks) {
 
   LOG(INFO) << "Two log block containers are active";
 
-  // Simulate that /a has 0 bytes free but /b has 1GB free.
+  // Simulate that /a has 0 bytes free.
+  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(0),
+                              "disk_reserved_override_prefix_1_bytes_free_for_testing", "0"));
+  // Simulate that /b has 1GB free.
   ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(0),
-                              "disk_reserved_prefixes_with_bytes_free_for_testing",
-                              Substitute("$0/a:0,$0/b:$1",
-                                         GetTestDataDirectory(),
-                                         1L * 1024 * 1024 * 1024)));
+                              "disk_reserved_override_prefix_2_bytes_free_for_testing",
+                              Substitute("$0", 1L * 1024 * 1024 * 1024)));
 
   // Wait until we have 1 unusable container.
   while (true) {
@@ -107,9 +113,7 @@ TEST_F(DiskReservationITest, TestFillMultipleDisks) {
 
   // Now simulate that all disks are full.
   ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(0),
-                              "disk_reserved_prefixes_with_bytes_free_for_testing",
-                              Substitute("$0/a:0,$0/b:0",
-                                         GetTestDataDirectory())));
+                              "disk_reserved_override_prefix_2_bytes_free_for_testing", "0"));
 
   // Wait for crash due to inability to flush or compact.
   ASSERT_OK(cluster_->tablet_server(0)->WaitForCrash(MonoDelta::FromSeconds(10)));
@@ -127,14 +131,20 @@ TEST_F(DiskReservationITest, TestWalWriteToFullDiskAborts) {
   TestWorkload workload(cluster_.get());
   workload.set_num_replicas(1);
   workload.set_timeout_allowed(true); // Allow timeouts because we expect the server to crash.
-  workload.set_write_timeout_millis(100); // Keep test time low after crash.
+  workload.set_write_timeout_millis(200); // Keep test time low after crash.
   // Write lots of data to quickly fill up our 1mb log segment size.
-  workload.set_num_write_threads(8);
-  workload.set_write_batch_size(1024);
-  workload.set_payload_bytes(128);
+  workload.set_num_write_threads(4);
+  workload.set_write_batch_size(10);
+  workload.set_payload_bytes(1000);
   workload.Setup();
   workload.Start();
 
+  // Ensure the cluster is running, the client was able to look up the tablet
+  // locations, etc.
+  while (workload.rows_inserted() < 10) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+
   // Set the disk to "nearly full" which should eventually cause a crash at WAL
   // preallocation time.
   ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(0),

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/2521554b/src/kudu/util/env_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.cc b/src/kudu/util/env_util.cc
index e2bb489..5218c33 100644
--- a/src/kudu/util/env_util.cc
+++ b/src/kudu/util/env_util.cc
@@ -39,11 +39,31 @@ DEFINE_int64(disk_reserved_bytes_free_for_testing, -1,
 TAG_FLAG(disk_reserved_bytes_free_for_testing, runtime);
 TAG_FLAG(disk_reserved_bytes_free_for_testing, unsafe);
 
-DEFINE_string(disk_reserved_prefixes_with_bytes_free_for_testing, "",
-             "For testing only! Syntax: '/path/a:5,/path/b:7' means a has 5 bytes free, "
-             "b has 7 bytes free. Set to empty string to disable this test-specific override.");
-TAG_FLAG(disk_reserved_prefixes_with_bytes_free_for_testing, runtime);
-TAG_FLAG(disk_reserved_prefixes_with_bytes_free_for_testing, unsafe);
+// We define some flags for testing purposes: Two prefixes and their associated
+// "bytes free" overrides.
+DEFINE_string(disk_reserved_override_prefix_1_path_for_testing, "",
+              "For testing only! Specifies a prefix to override the visible 'bytes free' on. "
+              "Use --disk_reserved_override_prefix_1_bytes_free_for_testing to set the number of "
+              "bytes free for this path prefix. Set to empty string to disable.");
+DEFINE_int64(disk_reserved_override_prefix_1_bytes_free_for_testing, -1,
+             "For testing only! Set number of bytes free on the path prefix specified by "
+             "--disk_reserved_override_prefix_1_path_for_testing. Set to -1 to disable.");
+DEFINE_string(disk_reserved_override_prefix_2_path_for_testing, "",
+              "For testing only! Specifies a prefix to override the visible 'bytes free' on. "
+              "Use --disk_reserved_override_prefix_2_bytes_free_for_testing to set the number of "
+              "bytes free for this path prefix. Set to empty string to disable.");
+DEFINE_int64(disk_reserved_override_prefix_2_bytes_free_for_testing, -1,
+             "For testing only! Set number of bytes free on the path prefix specified by "
+             "--disk_reserved_override_prefix_2_path_for_testing. Set to -1 to disable.");
+//DEFINE_string(disk_reserved_prefixes_with_bytes_free_for_testing, "",
+//             "For testing only! Syntax: '/path/a:5,/path/b:7' means a has 5 bytes free, "
+//             "b has 7 bytes free. Set to empty string to disable this test-specific override.");
+TAG_FLAG(disk_reserved_override_prefix_1_path_for_testing, unsafe);
+TAG_FLAG(disk_reserved_override_prefix_2_path_for_testing, unsafe);
+TAG_FLAG(disk_reserved_override_prefix_1_bytes_free_for_testing, unsafe);
+TAG_FLAG(disk_reserved_override_prefix_2_bytes_free_for_testing, unsafe);
+TAG_FLAG(disk_reserved_override_prefix_1_bytes_free_for_testing, runtime);
+TAG_FLAG(disk_reserved_override_prefix_2_bytes_free_for_testing, runtime);
 
 using std::shared_ptr;
 using strings::Substitute;
@@ -81,16 +101,17 @@ Status OpenFileForSequential(Env *env, const string &path,
   return Status::OK();
 }
 
-// If we can parse the flag value, and the flag specifies an override for the
-// given path, then override the free bytes to match what is specified in the
-// flag. See definition of disk_reserved_prefixes_with_bytes_free_for_testing.
-static void OverrideBytesFree(const string& path, const string& flag, int64_t* bytes_free) {
-  for (const auto& str : strings::Split(flag, ",")) {
-    pair<string, string> p = strings::Split(str, ":");
-    if (HasPrefixString(path, p.first)) {
-      int64_t free_override;
-      if (!safe_strto64(p.second.c_str(), p.second.size(), &free_override)) return;
-      *bytes_free = free_override;
+// If any of the override gflags specifies an override for the given path, then
+// override the free bytes to match what is specified in the flag. See the
+// definitions of these test-only flags for more information.
+static void OverrideBytesFreeWithTestingFlags(const string& path, int64_t* bytes_free) {
+  const string* prefixes[] = { &FLAGS_disk_reserved_override_prefix_1_path_for_testing,
+                               &FLAGS_disk_reserved_override_prefix_2_path_for_testing };
+  const int64_t* overrides[] = { &FLAGS_disk_reserved_override_prefix_1_bytes_free_for_testing,
+                                 &FLAGS_disk_reserved_override_prefix_2_bytes_free_for_testing };
+  for (int i = 0; i < arraysize(prefixes); i++) {
+    if (*overrides[i] != -1 && !prefixes[i]->empty() && HasPrefixString(path, *prefixes[i])) {
+      *bytes_free = *overrides[i];
       return;
     }
   }
@@ -107,8 +128,9 @@ Status VerifySufficientDiskSpace(Env *env, const std::string& path,
   if (PREDICT_FALSE(FLAGS_disk_reserved_bytes_free_for_testing > -1)) {
     bytes_free = FLAGS_disk_reserved_bytes_free_for_testing;
   }
-  if (PREDICT_FALSE(!FLAGS_disk_reserved_prefixes_with_bytes_free_for_testing.empty())) {
-    OverrideBytesFree(path, FLAGS_disk_reserved_prefixes_with_bytes_free_for_testing, &bytes_free);
+  if (PREDICT_FALSE(FLAGS_disk_reserved_override_prefix_1_bytes_free_for_testing != -1 ||
+                    FLAGS_disk_reserved_override_prefix_2_bytes_free_for_testing != -1)) {
+    OverrideBytesFreeWithTestingFlags(path, &bytes_free);
   }
 
   if (bytes_free - requested_bytes < reserved_bytes) {