You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2016/12/08 00:17:38 UTC
kudu git commit: KUDU-1785. Fix potential crash in
TabletCopySourceSession
Repository: kudu
Updated Branches:
refs/heads/master 3fa9db761 -> 1e957ae61
KUDU-1785. Fix potential crash in TabletCopySourceSession
This patch fixes a crash in TabletCopySourceSession that can result from
dereferencing a null pointer. This may occur when a follower initiates a
TabletCopy session with a source server while the source server is
bootstrapping the given tablet.
Added a stress test in tablet_copy_client_session-itest to trigger the
issue.
I looped the new test 300x each on RELEASE and TSAN modes with 8 stress
threads and got no failures:
- http://dist-test.cloudera.org/job?job_id=mpercy.1481118736.17656
- http://dist-test.cloudera.org/job?job_id=mpercy.1481118870.18180
I disabled the "fix" and ran the test again 300 times without stress
threads. I got a 100% failure rate on TSAN and a >90% failure rate in
RELEASE mode, making this a pretty reliable test for the race we are
trying to trigger:
- http://dist-test.cloudera.org/job?job_id=mpercy.1481119582.18826
- http://dist-test.cloudera.org/job?job_id=mpercy.1481119710.19325
Change-Id: I6f3ec35885dbf1a81c23ac10b1c9556dfddbd4b7
Reviewed-on: http://gerrit.cloudera.org:8080/5363
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1e957ae6
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1e957ae6
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1e957ae6
Branch: refs/heads/master
Commit: 1e957ae61eee748b53cd6e3d630ac7fb3abf4744
Parents: 3fa9db7
Author: Mike Percy <mp...@apache.org>
Authored: Mon Dec 5 16:34:31 2016 +0000
Committer: Mike Percy <mp...@apache.org>
Committed: Thu Dec 8 00:17:16 2016 +0000
----------------------------------------------------------------------
src/kudu/integration-tests/CMakeLists.txt | 1 +
.../integration-tests/cluster_itest_util.cc | 48 ++++--
src/kudu/integration-tests/cluster_itest_util.h | 11 ++
.../tablet_copy_client_session-itest.cc | 167 +++++++++++++++++++
src/kudu/tserver/tablet_copy_service.cc | 4 +-
src/kudu/tserver/tablet_copy_source_session.cc | 4 +-
6 files changed, 214 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt b/src/kudu/integration-tests/CMakeLists.txt
index b799328..fdea290 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -71,6 +71,7 @@ ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(table_locations-itest)
ADD_KUDU_TEST(tablet_copy-itest)
+ADD_KUDU_TEST(tablet_copy_client_session-itest)
ADD_KUDU_TEST(tablet_history_gc-itest)
ADD_KUDU_TEST(tablet_replacement-itest)
ADD_KUDU_TEST(ts_recovery-itest)
http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/cluster_itest_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.cc b/src/kudu/integration-tests/cluster_itest_util.cc
index 0368546..b700782 100644
--- a/src/kudu/integration-tests/cluster_itest_util.cc
+++ b/src/kudu/integration-tests/cluster_itest_util.cc
@@ -695,31 +695,43 @@ Status WaitForNumTabletsOnTS(TServerDetails* ts,
return Status::OK();
}
+Status CheckIfTabletInState(TServerDetails* ts,
+ const std::string& tablet_id,
+ tablet::TabletStatePB expected_state,
+ const MonoDelta& timeout) {
+ vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+ RETURN_NOT_OK(ListTablets(ts, timeout, &tablets));
+ for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
+ if (t.tablet_status().tablet_id() == tablet_id) {
+ tablet::TabletStatePB current_state = t.tablet_status().state();
+ if (current_state != expected_state) {
+ return Status::IllegalState(Substitute("Tablet not in expected state $0 (state = $1)",
+ TabletStatePB_Name(expected_state),
+ TabletStatePB_Name(current_state)));
+ }
+ return Status::OK();
+ }
+ }
+ return Status::NotFound("Tablet " + tablet_id + " not found");
+}
+
+Status CheckIfTabletRunning(TServerDetails* ts,
+ const std::string& tablet_id,
+ const MonoDelta& timeout) {
+ return CheckIfTabletInState(ts, tablet_id, tablet::RUNNING, timeout);
+}
+
Status WaitUntilTabletInState(TServerDetails* ts,
const std::string& tablet_id,
tablet::TabletStatePB state,
const MonoDelta& timeout) {
MonoTime start = MonoTime::Now();
MonoTime deadline = start + timeout;
- vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
Status s;
- tablet::TabletStatePB last_state = tablet::UNKNOWN;
while (true) {
- s = ListTablets(ts, MonoDelta::FromSeconds(10), &tablets);
+ s = CheckIfTabletInState(ts, tablet_id, state, deadline - MonoTime::Now());
if (s.ok()) {
- bool seen = false;
- for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
- if (t.tablet_status().tablet_id() == tablet_id) {
- seen = true;
- last_state = t.tablet_status().state();
- if (last_state == state) {
- return Status::OK();
- }
- }
- }
- if (!seen) {
- s = Status::NotFound("Tablet " + tablet_id + " not found");
- }
+ return Status::OK();
}
if (MonoTime::Now() > deadline) {
break;
@@ -727,11 +739,11 @@ Status WaitUntilTabletInState(TServerDetails* ts,
SleepFor(MonoDelta::FromMilliseconds(10));
}
return Status::TimedOut(Substitute("T $0 P $1: Tablet not in $2 state after $3: "
- "Tablet state: $4, Status message: $5",
+ "Status message: $4",
tablet_id, ts->uuid(),
tablet::TabletStatePB_Name(state),
(MonoTime::Now() - start).ToString(),
- tablet::TabletStatePB_Name(last_state), s.ToString()));
+ s.ToString()));
}
// Wait until the specified tablet is in RUNNING state.
http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/cluster_itest_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.h b/src/kudu/integration-tests/cluster_itest_util.h
index 8f8571e..2810186 100644
--- a/src/kudu/integration-tests/cluster_itest_util.h
+++ b/src/kudu/integration-tests/cluster_itest_util.h
@@ -269,6 +269,17 @@ Status WaitForNumTabletsOnTS(
const MonoDelta& timeout,
std::vector<tserver::ListTabletsResponsePB::StatusAndSchemaPB>* tablets);
+// Check if the tablet is in the specified state.
+Status CheckIfTabletInState(TServerDetails* ts,
+ const std::string& tablet_id,
+ tablet::TabletStatePB expected_state,
+ const MonoDelta& timeout);
+
+// Check if the given tablet is RUNNING.
+Status CheckIfTabletRunning(TServerDetails* ts,
+ const std::string& tablet_id,
+ const MonoDelta& timeout);
+
// Wait until the specified replica is in the specified state.
Status WaitUntilTabletInState(TServerDetails* ts,
const std::string& tablet_id,
http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/tablet_copy_client_session-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/tablet_copy_client_session-itest.cc b/src/kudu/integration-tests/tablet_copy_client_session-itest.cc
new file mode 100644
index 0000000..0f7fd4a
--- /dev/null
+++ b/src/kudu/integration-tests/tablet_copy_client_session-itest.cc
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <boost/optional.hpp>
+
+#include "kudu/common/wire_protocol.h"
+#include "kudu/gutil/macros.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
+#include "kudu/integration-tests/external_mini_cluster-itest-base.h"
+#include "kudu/integration-tests/test_workload.h"
+
+using kudu::itest::StartTabletCopy;
+using kudu::itest::TServerDetails;
+using kudu::itest::WaitUntilTabletRunning;
+using kudu::tablet::TabletDataState;
+using kudu::tserver::ListTabletsResponsePB;
+using std::string;
+using std::vector;
+
+namespace kudu {
+
+// Integration test for StartTabletCopy() and related functionality.
+class TabletCopyClientSessionITest : public ExternalMiniClusterITestBase {
+ protected:
+ // Bring up two tablet servers. Load tablet(s) onto TS 0, while TS 1 is left
+ // blank. Prevent the master from tombstoning evicted replicas.
+ void PrepareClusterForTabletCopy(const vector<string>& extra_tserver_flags = {},
+ vector<string> extra_master_flags = {},
+ int num_tablets = kDefaultNumTablets);
+
+ static const int kDefaultNumTablets;
+ const MonoDelta kDefaultTimeout = MonoDelta::FromSeconds(30);
+};
+
+const int TabletCopyClientSessionITest::kDefaultNumTablets = 1;
+
+void TabletCopyClientSessionITest::PrepareClusterForTabletCopy(
+ const vector<string>& extra_tserver_flags,
+ vector<string> extra_master_flags,
+ int num_tablets) {
+ const int kNumTabletServers = 2;
+ // We don't want the master to interfere when we manually make copies of
+ // tablets onto servers it doesn't know about.
+ extra_master_flags.push_back("--master_tombstone_evicted_tablet_replicas=false");
+ NO_FATALS(StartCluster(extra_tserver_flags, extra_master_flags, kNumTabletServers));
+ // Shut down the 2nd tablet server; we'll create tablets on the first one.
+ cluster_->tablet_server(1)->Shutdown();
+
+ // Restart the Master so it doesn't try to assign tablets to the dead TS.
+ cluster_->master()->Shutdown();
+ ASSERT_OK(cluster_->master()->Restart());
+ ASSERT_OK(cluster_->WaitForTabletServerCount(kNumTabletServers - 1, kDefaultTimeout));
+
+ // Write a bunch of data to the first tablet server.
+ TestWorkload workload(cluster_.get());
+ workload.set_num_replicas(1);
+ workload.set_num_tablets(num_tablets);
+ workload.Setup();
+ workload.Start();
+
+ // Get some data written to the write ahead log.
+ while (workload.rows_inserted() < 10000) {
+ SleepFor(MonoDelta::FromMilliseconds(10));
+ }
+
+ vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+ TServerDetails* ts0 = ts_map_[cluster_->tablet_server(0)->uuid()];
+ ASSERT_OK(WaitForNumTabletsOnTS(ts0, num_tablets, kDefaultTimeout, &tablets));
+
+ workload.StopAndJoin();
+
+ // Restart TS 1 so we can use it in our tests.
+ ASSERT_OK(cluster_->tablet_server(1)->Restart());
+}
+
+// Regression test for KUDU-1785. Ensure that starting a tablet copy session
+// while a tablet is bootstrapping will result in a simple failure, not a crash.
+TEST_F(TabletCopyClientSessionITest, TestStartTabletCopyWhileSourceBootstrapping) {
+ const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
+ NO_FATALS(PrepareClusterForTabletCopy());
+
+ TServerDetails* ts0 = ts_map_[cluster_->tablet_server(0)->uuid()];
+ TServerDetails* ts1 = ts_map_[cluster_->tablet_server(1)->uuid()];
+ vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+ ASSERT_OK(WaitForNumTabletsOnTS(ts0, kDefaultNumTablets, kTimeout, &tablets));
+ ASSERT_EQ(kDefaultNumTablets, tablets.size());
+ const string& tablet_id = tablets[0].tablet_status().tablet_id();
+ HostPort src_addr;
+ ASSERT_OK(HostPortFromPB(ts0->registration.rpc_addresses(0), &src_addr));
+
+ // Repeat the restart process several times.
+ for (int i = 0; i < 3; i++) {
+ LOG(INFO) << "Starting loop " << i;
+ // Shut down the source tablet server (TS 0) so we can catch it with our
+ // StartTabletCopy requests during startup.
+ cluster_->tablet_server(0)->Shutdown();
+
+ const int kNumStartTabletThreads = 4;
+ vector<scoped_refptr<Thread>> threads;
+ for (int j = 0; j < kNumStartTabletThreads; j++) {
+ scoped_refptr<Thread> t;
+ CHECK_OK(Thread::Create("test", "start-tablet-copy", [&]() {
+ // Retry until it succeeds (we will intially race against TS 0 startup).
+ MonoTime deadline = MonoTime::Now() + kTimeout;
+ Status s;
+ while (true) {
+ if (MonoTime::Now() > deadline) {
+ FAIL() << "Timed out waiting for bootstrap: " << s.ToString();
+ }
+ s = StartTabletCopy(ts1, tablet_id, ts0->uuid(), src_addr, 0,
+ deadline - MonoTime::Now());
+ if (s.ok()) {
+ break;
+ }
+ s = CheckIfTabletRunning(ts1, tablet_id, deadline - MonoTime::Now());
+ if (s.ok()) {
+ break;
+ }
+ SleepFor(MonoDelta::FromMilliseconds(10));
+ continue;
+ }
+ LOG(INFO) << "Waiting until tablet running...";
+ EXPECT_OK(WaitUntilTabletRunning(ts1, tablet_id, deadline - MonoTime::Now()));
+ }, &t));
+ threads.push_back(t);
+ }
+
+ // Restart the source tablet server (TS 0).
+ ASSERT_OK(cluster_->tablet_server(0)->Restart());
+
+ // Wait for one of the threads to succeed with its tablet copy and for the
+ // tablet to be running on TS 1.
+ EXPECT_OK(WaitUntilTabletRunning(ts1, tablet_id, kTimeout));
+
+ for (auto& t : threads) {
+ t->Join();
+ }
+
+ NO_FATALS(cluster_->AssertNoCrashes());
+
+ // There is a delay between the tablet marking itself as RUNNING and the
+ // tablet copy operation completing, so we retry tablet deletion attempts.
+ // We want a clean deletion so only one thread wins, then we have to
+ // restart TS 1 to clear knowledge of the replica from memory.
+ NO_FATALS(DeleteTabletWithRetries(ts1, tablet_id,
+ TabletDataState::TABLET_DATA_DELETED,
+ boost::none, kTimeout));
+ cluster_->tablet_server(1)->Shutdown();
+ ASSERT_OK(cluster_->tablet_server(1)->Restart());
+ }
+}
+
+} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/tserver/tablet_copy_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_service.cc b/src/kudu/tserver/tablet_copy_service.cc
index a8b54f6..fc31259 100644
--- a/src/kudu/tserver/tablet_copy_service.cc
+++ b/src/kudu/tserver/tablet_copy_service.cc
@@ -78,7 +78,7 @@ static void SetupErrorAndRespond(rpc::RpcContext* context,
const string& message,
const Status& s) {
LOG(WARNING) << "Error handling TabletCopyService RPC request from "
- << context->requestor_string() << ": "
+ << context->requestor_string() << ": " << message << ": "
<< s.ToString();
TabletCopyErrorPB error;
StatusToPB(s, error.mutable_status());
@@ -129,7 +129,7 @@ void TabletCopyServiceImpl::BeginTabletCopySession(
requestor_uuid, fs_manager_));
RPC_RETURN_NOT_OK(session->Init(),
TabletCopyErrorPB::UNKNOWN_ERROR,
- Substitute("Error initializing tablet copy session for tablet $0",
+ Substitute("Error beginning tablet copy session for tablet $0",
tablet_id));
InsertOrDie(&sessions_, session_id, session);
} else {
http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/tserver/tablet_copy_source_session.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_source_session.cc b/src/kudu/tserver/tablet_copy_source_session.cc
index 597714b..caa22d1 100644
--- a/src/kudu/tserver/tablet_copy_source_session.cc
+++ b/src/kudu/tserver/tablet_copy_source_session.cc
@@ -69,6 +69,8 @@ Status TabletCopySourceSession::Init() {
MutexLock l(session_lock_);
CHECK(!initted_);
+ RETURN_NOT_OK(tablet_peer_->CheckRunning());
+
const string& tablet_id = tablet_peer_->tablet_id();
// Prevent log GC while we grab log segments and Tablet metadata.
@@ -94,7 +96,7 @@ Status TabletCopySourceSession::Init() {
// Get the latest opid in the log at this point in time so we can re-anchor.
OpId last_logged_opid;
- tablet_peer_->log()->GetLatestEntryOpId(&last_logged_opid);
+ CHECK_NOTNULL(tablet_peer_->log())->GetLatestEntryOpId(&last_logged_opid);
// Get the current segments from the log, including the active segment.
// The Log doesn't add the active segment to the log reader's list until