You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/07/20 18:28:32 UTC

incubator-kudu git commit: tablet_peer: update status message on failure to start

Repository: incubator-kudu
Updated Branches:
  refs/heads/master 3ee54af7b -> c637abe1c


tablet_peer: update status message on failure to start

If the tablet peer fails to start up, we were calling SetFailed(),
but this didn't actually update the status message which would
later be reported as part of the TabletStatusPB. This made for
confusing debug experiences. This now surfaces the error.

Change-Id: I6b6e53a33fde296d99be7027dbe75ac057920c20
Reviewed-on: http://gerrit.cloudera.org:8080/3682
Tested-by: Kudu Jenkins
Reviewed-by: Jean-Daniel Cryans <jd...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/c637abe1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/c637abe1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/c637abe1

Branch: refs/heads/master
Commit: c637abe1ce3646360f272c39c30959ea23626a93
Parents: 3ee54af
Author: Todd Lipcon <to...@apache.org>
Authored: Tue Jul 19 18:32:52 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Jul 20 18:28:15 2016 +0000

----------------------------------------------------------------------
 src/kudu/tablet/tablet_peer.cc             |  7 +++++++
 src/kudu/tablet/tablet_peer.h              |  8 ++------
 src/kudu/tserver/tablet_server-test-base.h | 10 ++++++++--
 src/kudu/tserver/tablet_server-test.cc     |  5 +++++
 4 files changed, 22 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/c637abe1/src/kudu/tablet/tablet_peer.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_peer.cc b/src/kudu/tablet/tablet_peer.cc
index 44d3b30..013653e 100644
--- a/src/kudu/tablet/tablet_peer.cc
+++ b/src/kudu/tablet/tablet_peer.cc
@@ -375,6 +375,13 @@ Status TabletPeer::RunLogGC() {
   return Status::OK();
 }
 
+void TabletPeer::SetFailed(const Status& error) {
+  std::lock_guard<simple_spinlock> lock(lock_);
+  state_ = FAILED;
+  error_ = error;
+  status_listener_->StatusMessage(error.ToString());
+}
+
 string TabletPeer::HumanReadableState() const {
   std::lock_guard<simple_spinlock> lock(lock_);
   TabletDataState data_state = meta_->tablet_data_state();

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/c637abe1/src/kudu/tablet/tablet_peer.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_peer.h b/src/kudu/tablet/tablet_peer.h
index 2ced0cb..101c183 100644
--- a/src/kudu/tablet/tablet_peer.h
+++ b/src/kudu/tablet/tablet_peer.h
@@ -168,13 +168,9 @@ class TabletPeer : public RefCountedThreadSafe<TabletPeer>,
     state_ = BOOTSTRAPPING;
   }
 
-  // sets the tablet state to FAILED additionally setting the error to the provided
+  // Sets the tablet state to FAILED additionally setting the error to the provided
   // one.
-  void SetFailed(const Status& error) {
-    std::lock_guard<simple_spinlock> lock(lock_);
-    state_ = FAILED;
-    error_ = error;
-  }
+  void SetFailed(const Status& error);
 
   // Returns the error that occurred, when state is FAILED.
   Status error() const {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/c637abe1/src/kudu/tserver/tablet_server-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_server-test-base.h b/src/kudu/tserver/tablet_server-test-base.h
index 3bcf34a..b60caba 100644
--- a/src/kudu/tserver/tablet_server-test-base.h
+++ b/src/kudu/tserver/tablet_server-test-base.h
@@ -357,11 +357,17 @@ class TabletServerTestBase : public KuduTest {
     mini_server_->options()->master_addresses.push_back(HostPort("255.255.255.255", 1));
     // this should open the tablet created on StartTabletServer()
     RETURN_NOT_OK(mini_server_->Start());
-    RETURN_NOT_OK(mini_server_->WaitStarted());
 
-    if (!mini_server_->server()->tablet_manager()->LookupTablet(kTabletId, &tablet_peer_)) {
+    // Don't RETURN_NOT_OK immediately -- even if we fail, we may still get a TabletPeer object
+    // which has information about the failure.
+    Status wait_status = mini_server_->WaitStarted();
+    bool found_peer = mini_server_->server()->tablet_manager()->LookupTablet(
+        kTabletId, &tablet_peer_);
+    RETURN_NOT_OK(wait_status);
+    if (!found_peer) {
       return Status::NotFound("Tablet was not found");
     }
+
     // Connect to it.
     ResetClientProxies();
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/c637abe1/src/kudu/tserver/tablet_server-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_server-test.cc b/src/kudu/tserver/tablet_server-test.cc
index 18ef0c5..40bbd19 100644
--- a/src/kudu/tserver/tablet_server-test.cc
+++ b/src/kudu/tserver/tablet_server-test.cc
@@ -23,6 +23,7 @@
 #include "kudu/server/hybrid_clock.h"
 #include "kudu/server/server_base.pb.h"
 #include "kudu/server/server_base.proxy.h"
+#include "kudu/tablet/tablet_bootstrap.h"
 #include "kudu/util/crc.h"
 #include "kudu/util/curl_util.h"
 #include "kudu/util/url-coding.h"
@@ -973,6 +974,10 @@ TEST_F(TabletServerTest, TestClientGetsErrorBackWhenRecoveryFailed) {
   ASSERT_OK(DCHECK_NOTNULL(proxy_.get())->Write(req, &resp, &controller));
   ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_RUNNING, resp.error().code());
   ASSERT_STR_CONTAINS(resp.error().status().message(), "Tablet not RUNNING: FAILED");
+
+  // Check that the tablet peer's status message is updated with the failure.
+  ASSERT_STR_CONTAINS(tablet_peer_->status_listener()->last_status(),
+                      "Log file corruption detected");
 }
 
 TEST_F(TabletServerTest, TestScan) {