You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2016/12/08 00:17:38 UTC

kudu git commit: KUDU-1785. Fix potential crash in TabletCopySourceSession

Repository: kudu
Updated Branches:
  refs/heads/master 3fa9db761 -> 1e957ae61


KUDU-1785. Fix potential crash in TabletCopySourceSession

This patch fixes a crash in TabletCopySourceSession that can result from
dereferencing a null pointer. This may occur when a follower initiates a
TabletCopy session with a source server while the source server is
bootstrapping the given tablet.

Added a stress test in tablet_copy_client_session-itest to trigger the
issue.

I looped the new test 300x each on RELEASE and TSAN modes with 8 stress
threads and got no failures:

- http://dist-test.cloudera.org/job?job_id=mpercy.1481118736.17656
- http://dist-test.cloudera.org/job?job_id=mpercy.1481118870.18180

I disabled the "fix" and ran the test again 300 times without stress
threads. I got a 100% failure rate on TSAN and a >90% failure rate in
RELEASE mode, making this a pretty reliable test for the race we are
trying to trigger:

- http://dist-test.cloudera.org/job?job_id=mpercy.1481119582.18826
- http://dist-test.cloudera.org/job?job_id=mpercy.1481119710.19325

Change-Id: I6f3ec35885dbf1a81c23ac10b1c9556dfddbd4b7
Reviewed-on: http://gerrit.cloudera.org:8080/5363
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1e957ae6
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1e957ae6
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1e957ae6

Branch: refs/heads/master
Commit: 1e957ae61eee748b53cd6e3d630ac7fb3abf4744
Parents: 3fa9db7
Author: Mike Percy <mp...@apache.org>
Authored: Mon Dec 5 16:34:31 2016 +0000
Committer: Mike Percy <mp...@apache.org>
Committed: Thu Dec 8 00:17:16 2016 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/CMakeLists.txt       |   1 +
 .../integration-tests/cluster_itest_util.cc     |  48 ++++--
 src/kudu/integration-tests/cluster_itest_util.h |  11 ++
 .../tablet_copy_client_session-itest.cc         | 167 +++++++++++++++++++
 src/kudu/tserver/tablet_copy_service.cc         |   4 +-
 src/kudu/tserver/tablet_copy_source_session.cc  |   4 +-
 6 files changed, 214 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt b/src/kudu/integration-tests/CMakeLists.txt
index b799328..fdea290 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -71,6 +71,7 @@ ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
 ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
 ADD_KUDU_TEST(table_locations-itest)
 ADD_KUDU_TEST(tablet_copy-itest)
+ADD_KUDU_TEST(tablet_copy_client_session-itest)
 ADD_KUDU_TEST(tablet_history_gc-itest)
 ADD_KUDU_TEST(tablet_replacement-itest)
 ADD_KUDU_TEST(ts_recovery-itest)

http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/cluster_itest_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.cc b/src/kudu/integration-tests/cluster_itest_util.cc
index 0368546..b700782 100644
--- a/src/kudu/integration-tests/cluster_itest_util.cc
+++ b/src/kudu/integration-tests/cluster_itest_util.cc
@@ -695,31 +695,43 @@ Status WaitForNumTabletsOnTS(TServerDetails* ts,
   return Status::OK();
 }
 
+Status CheckIfTabletInState(TServerDetails* ts,
+                            const std::string& tablet_id,
+                            tablet::TabletStatePB expected_state,
+                            const MonoDelta& timeout) {
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+  RETURN_NOT_OK(ListTablets(ts, timeout, &tablets));
+  for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
+    if (t.tablet_status().tablet_id() == tablet_id) {
+      tablet::TabletStatePB current_state = t.tablet_status().state();
+      if (current_state != expected_state) {
+        return Status::IllegalState(Substitute("Tablet not in expected state $0 (state = $1)",
+                                               TabletStatePB_Name(expected_state),
+                                               TabletStatePB_Name(current_state)));
+      }
+      return Status::OK();
+    }
+  }
+  return Status::NotFound("Tablet " + tablet_id + " not found");
+}
+
+Status CheckIfTabletRunning(TServerDetails* ts,
+                            const std::string& tablet_id,
+                            const MonoDelta& timeout) {
+  return CheckIfTabletInState(ts, tablet_id, tablet::RUNNING, timeout);
+}
+
 Status WaitUntilTabletInState(TServerDetails* ts,
                               const std::string& tablet_id,
                               tablet::TabletStatePB state,
                               const MonoDelta& timeout) {
   MonoTime start = MonoTime::Now();
   MonoTime deadline = start + timeout;
-  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
   Status s;
-  tablet::TabletStatePB last_state = tablet::UNKNOWN;
   while (true) {
-    s = ListTablets(ts, MonoDelta::FromSeconds(10), &tablets);
+    s = CheckIfTabletInState(ts, tablet_id, state, deadline - MonoTime::Now());
     if (s.ok()) {
-      bool seen = false;
-      for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
-        if (t.tablet_status().tablet_id() == tablet_id) {
-          seen = true;
-          last_state = t.tablet_status().state();
-          if (last_state == state) {
-            return Status::OK();
-          }
-        }
-      }
-      if (!seen) {
-        s = Status::NotFound("Tablet " + tablet_id + " not found");
-      }
+      return Status::OK();
     }
     if (MonoTime::Now() > deadline) {
       break;
@@ -727,11 +739,11 @@ Status WaitUntilTabletInState(TServerDetails* ts,
     SleepFor(MonoDelta::FromMilliseconds(10));
   }
   return Status::TimedOut(Substitute("T $0 P $1: Tablet not in $2 state after $3: "
-                                     "Tablet state: $4, Status message: $5",
+                                     "Status message: $4",
                                      tablet_id, ts->uuid(),
                                      tablet::TabletStatePB_Name(state),
                                      (MonoTime::Now() - start).ToString(),
-                                     tablet::TabletStatePB_Name(last_state), s.ToString()));
+                                     s.ToString()));
 }
 
 // Wait until the specified tablet is in RUNNING state.

http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/cluster_itest_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/cluster_itest_util.h b/src/kudu/integration-tests/cluster_itest_util.h
index 8f8571e..2810186 100644
--- a/src/kudu/integration-tests/cluster_itest_util.h
+++ b/src/kudu/integration-tests/cluster_itest_util.h
@@ -269,6 +269,17 @@ Status WaitForNumTabletsOnTS(
     const MonoDelta& timeout,
     std::vector<tserver::ListTabletsResponsePB::StatusAndSchemaPB>* tablets);
 
+// Check if the tablet is in the specified state.
+Status CheckIfTabletInState(TServerDetails* ts,
+                            const std::string& tablet_id,
+                            tablet::TabletStatePB expected_state,
+                            const MonoDelta& timeout);
+
+// Check if the given tablet is RUNNING.
+Status CheckIfTabletRunning(TServerDetails* ts,
+                            const std::string& tablet_id,
+                            const MonoDelta& timeout);
+
 // Wait until the specified replica is in the specified state.
 Status WaitUntilTabletInState(TServerDetails* ts,
                               const std::string& tablet_id,

http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/integration-tests/tablet_copy_client_session-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/tablet_copy_client_session-itest.cc b/src/kudu/integration-tests/tablet_copy_client_session-itest.cc
new file mode 100644
index 0000000..0f7fd4a
--- /dev/null
+++ b/src/kudu/integration-tests/tablet_copy_client_session-itest.cc
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <boost/optional.hpp>
+
+#include "kudu/common/wire_protocol.h"
+#include "kudu/gutil/macros.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
+#include "kudu/integration-tests/external_mini_cluster-itest-base.h"
+#include "kudu/integration-tests/test_workload.h"
+
+using kudu::itest::StartTabletCopy;
+using kudu::itest::TServerDetails;
+using kudu::itest::WaitUntilTabletRunning;
+using kudu::tablet::TabletDataState;
+using kudu::tserver::ListTabletsResponsePB;
+using std::string;
+using std::vector;
+
+namespace kudu {
+
+// Integration test for StartTabletCopy() and related functionality.
+class TabletCopyClientSessionITest : public ExternalMiniClusterITestBase {
+ protected:
+  // Bring up two tablet servers. Load tablet(s) onto TS 0, while TS 1 is left
+  // blank. Prevent the master from tombstoning evicted replicas.
+  void PrepareClusterForTabletCopy(const vector<string>& extra_tserver_flags = {},
+                                   vector<string> extra_master_flags = {},
+                                   int num_tablets = kDefaultNumTablets);
+
+  static const int kDefaultNumTablets;
+  const MonoDelta kDefaultTimeout = MonoDelta::FromSeconds(30);
+};
+
+const int TabletCopyClientSessionITest::kDefaultNumTablets = 1;
+
+void TabletCopyClientSessionITest::PrepareClusterForTabletCopy(
+    const vector<string>& extra_tserver_flags,
+    vector<string> extra_master_flags,
+    int num_tablets) {
+  const int kNumTabletServers = 2;
+  // We don't want the master to interfere when we manually make copies of
+  // tablets onto servers it doesn't know about.
+  extra_master_flags.push_back("--master_tombstone_evicted_tablet_replicas=false");
+  NO_FATALS(StartCluster(extra_tserver_flags, extra_master_flags, kNumTabletServers));
+  // Shut down the 2nd tablet server; we'll create tablets on the first one.
+  cluster_->tablet_server(1)->Shutdown();
+
+  // Restart the Master so it doesn't try to assign tablets to the dead TS.
+  cluster_->master()->Shutdown();
+  ASSERT_OK(cluster_->master()->Restart());
+  ASSERT_OK(cluster_->WaitForTabletServerCount(kNumTabletServers - 1, kDefaultTimeout));
+
+  // Write a bunch of data to the first tablet server.
+  TestWorkload workload(cluster_.get());
+  workload.set_num_replicas(1);
+  workload.set_num_tablets(num_tablets);
+  workload.Setup();
+  workload.Start();
+
+  // Get some data written to the write ahead log.
+  while (workload.rows_inserted() < 10000) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+  TServerDetails* ts0 = ts_map_[cluster_->tablet_server(0)->uuid()];
+  ASSERT_OK(WaitForNumTabletsOnTS(ts0, num_tablets, kDefaultTimeout, &tablets));
+
+  workload.StopAndJoin();
+
+  // Restart TS 1 so we can use it in our tests.
+  ASSERT_OK(cluster_->tablet_server(1)->Restart());
+}
+
+// Regression test for KUDU-1785. Ensure that starting a tablet copy session
+// while a tablet is bootstrapping will result in a simple failure, not a crash.
+TEST_F(TabletCopyClientSessionITest, TestStartTabletCopyWhileSourceBootstrapping) {
+  const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
+  NO_FATALS(PrepareClusterForTabletCopy());
+
+  TServerDetails* ts0 = ts_map_[cluster_->tablet_server(0)->uuid()];
+  TServerDetails* ts1 = ts_map_[cluster_->tablet_server(1)->uuid()];
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+  ASSERT_OK(WaitForNumTabletsOnTS(ts0, kDefaultNumTablets, kTimeout, &tablets));
+  ASSERT_EQ(kDefaultNumTablets, tablets.size());
+  const string& tablet_id = tablets[0].tablet_status().tablet_id();
+  HostPort src_addr;
+  ASSERT_OK(HostPortFromPB(ts0->registration.rpc_addresses(0), &src_addr));
+
+  // Repeat the restart process several times.
+  for (int i = 0; i < 3; i++) {
+    LOG(INFO) << "Starting loop " << i;
+    // Shut down the source tablet server (TS 0) so we can catch it with our
+    // StartTabletCopy requests during startup.
+    cluster_->tablet_server(0)->Shutdown();
+
+    const int kNumStartTabletThreads = 4;
+    vector<scoped_refptr<Thread>> threads;
+    for (int j = 0; j < kNumStartTabletThreads; j++) {
+      scoped_refptr<Thread> t;
+      CHECK_OK(Thread::Create("test", "start-tablet-copy", [&]() {
+        // Retry until it succeeds (we will intially race against TS 0 startup).
+        MonoTime deadline = MonoTime::Now() + kTimeout;
+        Status s;
+        while (true) {
+          if (MonoTime::Now() > deadline) {
+            FAIL() << "Timed out waiting for bootstrap: " << s.ToString();
+          }
+          s = StartTabletCopy(ts1, tablet_id, ts0->uuid(), src_addr, 0,
+                              deadline - MonoTime::Now());
+          if (s.ok()) {
+            break;
+          }
+          s = CheckIfTabletRunning(ts1, tablet_id, deadline - MonoTime::Now());
+          if (s.ok()) {
+            break;
+          }
+          SleepFor(MonoDelta::FromMilliseconds(10));
+          continue;
+        }
+        LOG(INFO) << "Waiting until tablet running...";
+        EXPECT_OK(WaitUntilTabletRunning(ts1, tablet_id, deadline - MonoTime::Now()));
+      }, &t));
+      threads.push_back(t);
+    }
+
+    // Restart the source tablet server (TS 0).
+    ASSERT_OK(cluster_->tablet_server(0)->Restart());
+
+    // Wait for one of the threads to succeed with its tablet copy and for the
+    // tablet to be running on TS 1.
+    EXPECT_OK(WaitUntilTabletRunning(ts1, tablet_id, kTimeout));
+
+    for (auto& t : threads) {
+      t->Join();
+    }
+
+    NO_FATALS(cluster_->AssertNoCrashes());
+
+    // There is a delay between the tablet marking itself as RUNNING and the
+    // tablet copy operation completing, so we retry tablet deletion attempts.
+    // We want a clean deletion so only one thread wins, then we have to
+    // restart TS 1 to clear knowledge of the replica from memory.
+    NO_FATALS(DeleteTabletWithRetries(ts1, tablet_id,
+                                      TabletDataState::TABLET_DATA_DELETED,
+                                      boost::none, kTimeout));
+    cluster_->tablet_server(1)->Shutdown();
+    ASSERT_OK(cluster_->tablet_server(1)->Restart());
+  }
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/tserver/tablet_copy_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_service.cc b/src/kudu/tserver/tablet_copy_service.cc
index a8b54f6..fc31259 100644
--- a/src/kudu/tserver/tablet_copy_service.cc
+++ b/src/kudu/tserver/tablet_copy_service.cc
@@ -78,7 +78,7 @@ static void SetupErrorAndRespond(rpc::RpcContext* context,
                                  const string& message,
                                  const Status& s) {
   LOG(WARNING) << "Error handling TabletCopyService RPC request from "
-               << context->requestor_string() << ": "
+               << context->requestor_string() << ": " << message << ": "
                << s.ToString();
   TabletCopyErrorPB error;
   StatusToPB(s, error.mutable_status());
@@ -129,7 +129,7 @@ void TabletCopyServiceImpl::BeginTabletCopySession(
                                                requestor_uuid, fs_manager_));
       RPC_RETURN_NOT_OK(session->Init(),
                         TabletCopyErrorPB::UNKNOWN_ERROR,
-                        Substitute("Error initializing tablet copy session for tablet $0",
+                        Substitute("Error beginning tablet copy session for tablet $0",
                                    tablet_id));
       InsertOrDie(&sessions_, session_id, session);
     } else {

http://git-wip-us.apache.org/repos/asf/kudu/blob/1e957ae6/src/kudu/tserver/tablet_copy_source_session.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_copy_source_session.cc b/src/kudu/tserver/tablet_copy_source_session.cc
index 597714b..caa22d1 100644
--- a/src/kudu/tserver/tablet_copy_source_session.cc
+++ b/src/kudu/tserver/tablet_copy_source_session.cc
@@ -69,6 +69,8 @@ Status TabletCopySourceSession::Init() {
   MutexLock l(session_lock_);
   CHECK(!initted_);
 
+  RETURN_NOT_OK(tablet_peer_->CheckRunning());
+
   const string& tablet_id = tablet_peer_->tablet_id();
 
   // Prevent log GC while we grab log segments and Tablet metadata.
@@ -94,7 +96,7 @@ Status TabletCopySourceSession::Init() {
 
   // Get the latest opid in the log at this point in time so we can re-anchor.
   OpId last_logged_opid;
-  tablet_peer_->log()->GetLatestEntryOpId(&last_logged_opid);
+  CHECK_NOTNULL(tablet_peer_->log())->GetLatestEntryOpId(&last_logged_opid);
 
   // Get the current segments from the log, including the active segment.
   // The Log doesn't add the active segment to the log reader's list until