You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/02/02 08:09:52 UTC

incubator-kudu git commit: Fix flakiness in create-table-itest

Repository: incubator-kudu
Updated Branches:
  refs/heads/master 0b5545bc3 -> 266d214b1


Fix flakiness in create-table-itest

This fixes a flaky test introduced in c5d748198df48007b2a6fe07c2a214a885fc41ee.
The test was flaky because tablet deletion is asynchronous and can actually
take a substantial amount of time since it does file system operations. Since
RPCs aren't counted in a metric until they're complete, it was possible for
multiple replicas to get created before any of them got deleted.

This patch just changes the verification mechanism to ensure that the tablet
server eventually converges to the correct state.

I looped the new test 100 times and it passed:
http://dist-test.cloudera.org/job?job_id=todd.1454117343.9201

Change-Id: I25ed27a9cf0b4539b6e7a056a2d5c8872e3c1860
Reviewed-on: http://gerrit.cloudera.org:8080/1971
Tested-by: Todd Lipcon <to...@apache.org>
Reviewed-by: Binglin Chang <de...@gmail.com>
Reviewed-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/266d214b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/266d214b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/266d214b

Branch: refs/heads/master
Commit: 266d214b123dfe2db610ec41de00908495eae6ba
Parents: 0b5545b
Author: Todd Lipcon <to...@apache.org>
Authored: Fri Jan 29 18:19:09 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Tue Feb 2 07:04:24 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/create-table-itest.cc     | 28 ++++++++++----------
 .../external_mini_cluster_fs_inspector.cc       |  9 +++++++
 .../external_mini_cluster_fs_inspector.h        | 10 +++++++
 3 files changed, 33 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/266d214b/src/kudu/integration-tests/create-table-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/create-table-itest.cc b/src/kudu/integration-tests/create-table-itest.cc
index 3583329..f70ee42 100644
--- a/src/kudu/integration-tests/create-table-itest.cc
+++ b/src/kudu/integration-tests/create-table-itest.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <gflags/gflags.h>
+#include <glog/stl_logging.h>
 #include <gtest/gtest.h>
 #include <map>
 #include <memory>
@@ -82,20 +83,6 @@ TEST_F(CreateTableITest, TestCreateWhenMajorityOfReplicasFailCreation) {
     LOG(INFO) << "Waiting for the master to retry creating the tablet 3 times... "
               << num_create_attempts << " RPCs seen so far";
 
-    int64_t num_delete_tablet_rpc;
-    ASSERT_OK(cluster_->tablet_server(0)->GetInt64Metric(
-        &METRIC_ENTITY_server,
-        "kudu.tabletserver",
-        &METRIC_handler_latency_kudu_tserver_TabletServerAdminService_DeleteTablet,
-        "total_count",
-        &num_delete_tablet_rpc));
-    // When attempting to replace old tablet and create new tablet,
-    // master should also send delete tablet rpc to tablet servers.
-    // There may be race for async create/delete rpcs, but there
-    // absolute difference should be less than or equal to 1.
-    ASSERT_GE(num_delete_tablet_rpc - num_create_attempts, -1);
-    ASSERT_LE(num_delete_tablet_rpc - num_create_attempts, 1);
-
     // The CreateTable operation should still be considered in progress, even though
     // we'll be successful at creating a single replica.
     bool in_progress = false;
@@ -115,6 +102,19 @@ TEST_F(CreateTableITest, TestCreateWhenMajorityOfReplicasFailCreation) {
     ASSERT_OK(client_->IsCreateTableInProgress(kTableName, &in_progress));
     SleepFor(MonoDelta::FromMilliseconds(100));
   }
+
+  // The server that was up from the beginning should be left with only
+  // one tablet, eventually, since the tablets which failed to get created
+  // properly should get deleted.
+  vector<string> tablets;
+  int wait_iter = 0;
+  while (tablets.size() != 1 && wait_iter++ < 100) {
+    LOG(INFO) << "Waiting for only one tablet to be left on TS 0. Currently have: "
+              << tablets;
+    SleepFor(MonoDelta::FromMilliseconds(100));
+    tablets = inspect_->ListTabletsWithDataOnTS(0);
+  }
+  ASSERT_EQ(1, tablets.size()) << "Tablets on TS0: " << tablets;
 }
 
 // Regression test for KUDU-1317. Ensure that, when a table is created,

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/266d214b/src/kudu/integration-tests/external_mini_cluster_fs_inspector.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster_fs_inspector.cc b/src/kudu/integration-tests/external_mini_cluster_fs_inspector.cc
index 41c4f24..e1b5ca6 100644
--- a/src/kudu/integration-tests/external_mini_cluster_fs_inspector.cc
+++ b/src/kudu/integration-tests/external_mini_cluster_fs_inspector.cc
@@ -91,6 +91,7 @@ vector<string> ExternalMiniClusterFsInspector::ListTablets() {
   }
   return vector<string>(tablets.begin(), tablets.end());
 }
+
 vector<string> ExternalMiniClusterFsInspector::ListTabletsOnTS(int index) {
   string data_dir = cluster_->tablet_server(index)->data_dir();
   string meta_dir = JoinPathSegments(data_dir, FsManager::kTabletMetadataDirName);
@@ -99,6 +100,14 @@ vector<string> ExternalMiniClusterFsInspector::ListTabletsOnTS(int index) {
   return tablets;
 }
 
+vector<string> ExternalMiniClusterFsInspector::ListTabletsWithDataOnTS(int index) {
+  string data_dir = cluster_->tablet_server(index)->data_dir();
+  string wal_dir = JoinPathSegments(data_dir, FsManager::kWalDirName);
+  vector<string> tablets;
+  CHECK_OK(ListFilesInDir(wal_dir, &tablets));
+  return tablets;
+}
+
 int ExternalMiniClusterFsInspector::CountWALSegmentsForTabletOnTS(int index,
                                                                   const string& tablet_id) {
   string data_dir = cluster_->tablet_server(index)->data_dir();

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/266d214b/src/kudu/integration-tests/external_mini_cluster_fs_inspector.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster_fs_inspector.h b/src/kudu/integration-tests/external_mini_cluster_fs_inspector.h
index ebe038b..ff160da 100644
--- a/src/kudu/integration-tests/external_mini_cluster_fs_inspector.h
+++ b/src/kudu/integration-tests/external_mini_cluster_fs_inspector.h
@@ -52,8 +52,18 @@ class ExternalMiniClusterFsInspector {
   Status ListFilesInDir(const std::string& path, std::vector<std::string>* entries);
   int CountFilesInDir(const std::string& path);
   int CountWALSegmentsOnTS(int index);
+
+  // List all of the tablets with tablet metadata in the cluster.
   std::vector<std::string> ListTablets();
+
+  // List all of the tablets with tablet metadata on the given tablet server index.
+  // This may include tablets that are tombstoned and not running.
   std::vector<std::string> ListTabletsOnTS(int index);
+
+  // List the tablet IDs on the given tablet which actually have data (as
+  // evidenced by their having a WAL). This excludes those that are tombstoned.
+  std::vector<std::string> ListTabletsWithDataOnTS(int index);
+
   int CountWALSegmentsForTabletOnTS(int index, const std::string& tablet_id);
   bool DoesConsensusMetaExistForTabletOnTS(int index, const std::string& tablet_id);