You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/03/16 04:42:36 UTC
incubator-kudu git commit: client-test: fix several sources of
TestScanFaultTolerance flakiness
Repository: incubator-kudu
Updated Branches:
refs/heads/master 158c26991 -> 75c277b3b
client-test: fix several sources of TestScanFaultTolerance flakiness
This fixes a couple of flakes that were seen in this test:
1) sometimes we would scan the tablet and end up scanning a minority replica
that hadn't yet replicated the inserts made at the beginning of the test.
This changes to using only two replicas, so that the majority is 2/2 and
we are much more likely to see the writes as soon as the test starts.
It's still not a hard guarantee, because we lack all the features for
proper cross-server consistency, but after looping many times I didn't
see this failure pop up after this fix.
2) fixes MiniTabletServer so that if we Shutdown() and then Start() a server,
it will still retain its old bound port numbers. This fixes a source of
flakiness where, when we restarted a server, it would come back with a
different port, and then be inaccessible for the rest of the test, due to
bugs in the client's handling of host/port changes (KUDU-418).
I have a sense that this will also fix some other flaky tests that relied
on similar shutdown/start sequences.
With this patch, the test case passed 498/500 in ASAN. The remaining two
failures were due to KUDU-1369. Without the patch, I couldn't reproduce
the flakiness easily on GCE machines, but investigating the logs from the
flaky test dashboard make me think this should fix it.
Change-Id: I775a597d396ecdc43ebe826240a77245957070ea
Reviewed-on: http://gerrit.cloudera.org:8080/2510
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/75c277b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/75c277b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/75c277b3
Branch: refs/heads/master
Commit: 75c277b3b4afce89a9691b9dedf58e0d455077b0
Parents: 158c269
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Mar 9 15:45:42 2016 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Mar 16 03:42:18 2016 +0000
----------------------------------------------------------------------
src/kudu/client/client-test.cc | 10 ++++++++--
src/kudu/tserver/mini_tablet_server.cc | 7 +++++--
2 files changed, 13 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/75c277b3/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index b1fa896..a49aaf8 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -948,7 +948,13 @@ TEST_F(ClientTest, TestScanFaultTolerance) {
// Create test table and insert test rows.
const string kScanTable = "TestScanFaultTolerance";
shared_ptr<KuduTable> table;
- ASSERT_NO_FATAL_FAILURE(CreateTable(kScanTable, 3, vector<const KuduPartialRow*>(), &table));
+
+ // We use only two replicas in this test so that every write is fully replicated to both
+ // servers (the Raft majority is 2/2). This reduces potential flakiness if the scanner tries
+ // to read from a replica that is lagging for some reason. This won't be necessary once
+ // we implement full support for snapshot consistency (KUDU-430).
+ const int kNumReplicas = 2;
+ ASSERT_NO_FATAL_FAILURE(CreateTable(kScanTable, kNumReplicas, {}, &table));
ASSERT_NO_FATAL_FAILURE(InsertTestRows(table.get(), FLAGS_test_scan_num_rows));
// Do an initial scan to determine the expected rows for later verification.
@@ -961,7 +967,7 @@ TEST_F(ClientTest, TestScanFaultTolerance) {
// disk.
if (with_flush) {
string tablet_id = GetFirstTabletId(table.get());
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
scoped_refptr<TabletPeer> tablet_peer;
ASSERT_TRUE(cluster_->mini_tablet_server(i)->server()->tablet_manager()->LookupTablet(
tablet_id, &tablet_peer));
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/75c277b3/src/kudu/tserver/mini_tablet_server.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/mini_tablet_server.cc b/src/kudu/tserver/mini_tablet_server.cc
index 6f835ad..7c042bd 100644
--- a/src/kudu/tserver/mini_tablet_server.cc
+++ b/src/kudu/tserver/mini_tablet_server.cc
@@ -90,6 +90,11 @@ Status MiniTabletServer::WaitStarted() {
void MiniTabletServer::Shutdown() {
if (started_) {
+ // Save the bound ports back into the options structure so that, if we restart the
+ // server, it will come back on the same address. This is necessary since we don't
+ // currently support tablet servers re-registering on different ports (KUDU-418).
+ opts_.webserver_opts.port = bound_http_addr().port();
+ opts_.rpc_opts.rpc_bind_addresses = Substitute("127.0.0.1:$0", bound_rpc_addr().port());
server_->Shutdown();
server_.reset();
}
@@ -98,8 +103,6 @@ void MiniTabletServer::Shutdown() {
Status MiniTabletServer::Restart() {
CHECK(started_);
- opts_.rpc_opts.rpc_bind_addresses = Substitute("127.0.0.1:$0", bound_rpc_addr().port());
- opts_.webserver_opts.port = bound_http_addr().port();
Shutdown();
RETURN_NOT_OK(Start());
return Status::OK();