You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/01/04 01:10:26 UTC

kudu git commit: Fix flakiness in ToolTest.TestRemoteReplicaCopy

Repository: kudu
Updated Branches:
  refs/heads/master bf2e6015d -> ddb2c16bf


Fix flakiness in ToolTest.TestRemoteReplicaCopy

This test created a table, and then immediately asserted on the error
message returned when trying to copy a tablet on top of an existing
tablet. In fact, the destination tablet might still be in the 'creating'
transition, in which case the copy attempt failed with an unexpected
error message.

This patch fixes the issue by waiting for the RUNNING state first.

Tested a loop 500x with 4 stress threads in a RELEASE build (where this
seemed to be most flaky according to the dashboard).

Change-Id: I740f6b469561a17792a315caf4a10c2627b55149
Reviewed-on: http://gerrit.cloudera.org:8080/5543
Tested-by: Kudu Jenkins
Reviewed-by: Dinesh Bhat <di...@cloudera.com>
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/ddb2c16b
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/ddb2c16b
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/ddb2c16b

Branch: refs/heads/master
Commit: ddb2c16bf0e20f84da1778124efa673f55b43bde
Parents: bf2e601
Author: Todd Lipcon <to...@apache.org>
Authored: Mon Dec 19 17:37:44 2016 +0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Wed Jan 4 01:09:14 2017 +0000

----------------------------------------------------------------------
 src/kudu/tools/kudu-tool-test.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/ddb2c16b/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 625d341..55672f0 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -982,6 +982,11 @@ TEST_F(ToolTest, TestRemoteReplicaCopy) {
   ASSERT_OK(WaitForNumTabletsOnTS(src_ts, kNumTablets, kTimeout, &tablets));
   TServerDetails* dst_ts = ts_map_[cluster_->tablet_server(kDstTsIndex)->uuid()];
   ASSERT_OK(WaitForNumTabletsOnTS(dst_ts, kNumTablets, kTimeout, &tablets));
+  const string& healthy_tablet_id = tablets[0].tablet_status().tablet_id();
+
+  // Wait until the tablets are RUNNING before we start any copies.
+  ASSERT_OK(WaitUntilTabletInState(src_ts, healthy_tablet_id, tablet::RUNNING, kTimeout));
+  ASSERT_OK(WaitUntilTabletInState(dst_ts, healthy_tablet_id, tablet::RUNNING, kTimeout));
 
   // Test 1: Test when the destination replica is healthy with and without --force_copy flag.
   // This is an 'online tablet copy'. i.e, when the tool initiates a copy,
@@ -992,7 +997,6 @@ TEST_F(ToolTest, TestRemoteReplicaCopy) {
   string stderr;
   const string& src_ts_addr = cluster_->tablet_server(kSrcTsIndex)->bound_rpc_addr().ToString();
   const string& dst_ts_addr = cluster_->tablet_server(kDstTsIndex)->bound_rpc_addr().ToString();
-  const string& healthy_tablet_id = tablets[0].tablet_status().tablet_id();
   Status s = RunTool(
       Substitute("remote_replica copy $0 $1 $2",
                  healthy_tablet_id, src_ts_addr, dst_ts_addr),