You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/09/01 22:44:26 UTC
kudu git commit: ts_tablet_manager: set status message after failed
tablet copy
Repository: kudu
Updated Branches:
refs/heads/master e1f54cbe7 -> 680cec6bb
ts_tablet_manager: set status message after failed tablet copy
I noticed in a test cluster that a tablet copy had failed, and ksck was
reporting the state as:
b314b10216d74c07ab9da974671137db (ve0136.halxg.cloudera.com:7050): bad state
State: NOT_STARTED
Data state: TABLET_DATA_TOMBSTONED
Last status: TabletCopy: Downloading block 4611685870251245460 (2342/33956)
This was rather confusing, because it claimed it was downloading a
block, and also claimed to be tombstoned at the same time (rather than
being in the COPYING state).
This fixes the failure case to set the status message properly.
Change-Id: I36ecd45dacec4ea329a0f0dd236e52bc603f9b93
Reviewed-on: http://gerrit.cloudera.org:8080/4194
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/680cec6b
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/680cec6b
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/680cec6b
Branch: refs/heads/master
Commit: 680cec6bbf104dcba0d3d350e34af98756a5d974
Parents: e1f54cb
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Aug 31 21:27:16 2016 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Thu Sep 1 22:44:18 2016 +0000
----------------------------------------------------------------------
src/kudu/integration-tests/delete_table-test.cc | 8 ++++++++
src/kudu/tserver/ts_tablet_manager.cc | 18 +++++++++---------
src/kudu/tserver/ts_tablet_manager.h | 2 +-
3 files changed, 18 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/680cec6b/src/kudu/integration-tests/delete_table-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/delete_table-test.cc b/src/kudu/integration-tests/delete_table-test.cc
index 437730d..4c8de2a 100644
--- a/src/kudu/integration-tests/delete_table-test.cc
+++ b/src/kudu/integration-tests/delete_table-test.cc
@@ -550,6 +550,14 @@ TEST_F(DeleteTableTest, TestAutoTombstoneAfterTabletCopyRemoteFails) {
cluster_->tablet_server(2)->Shutdown();
NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
+ // Check the textual status message for the failed copy.
+ {
+ vector<ListTabletsResponsePB::StatusAndSchemaPB> status_pbs;
+ ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, kTimeout, &status_pbs));
+ ASSERT_STR_CONTAINS(status_pbs[0].tablet_status().last_status(),
+ "Tombstoned tablet: Tablet Copy: Unable to fetch data from remote peer");
+ }
+
// Now bring the other replicas back, re-elect the previous leader (TS-1),
// and wait for the leader to Tablet Copy the tombstoned replica. This
// will have replaced a tablet with no consensus metadata.
http://git-wip-us.apache.org/repos/asf/kudu/blob/680cec6b/src/kudu/tserver/ts_tablet_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc
index 0e8b52a..8b8eb85 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -311,11 +311,11 @@ Status TSTabletManager::CreateNewTablet(const string& table_id,
// If 'expr' fails, log a message, tombstone the given tablet, and return the
// error status.
-#define TOMBSTONE_NOT_OK(expr, meta, msg) \
+#define TOMBSTONE_NOT_OK(expr, peer, msg) \
do { \
Status _s = (expr); \
if (PREDICT_FALSE(!_s.ok())) { \
- LogAndTombstone((meta), (msg), _s); \
+ LogAndTombstone((peer), (msg), _s); \
return _s; \
} \
} while (0)
@@ -436,7 +436,7 @@ Status TSTabletManager::StartTabletCopy(
// Download all of the remote files.
Status s = tc_client.FetchAll(implicit_cast<TabletStatusListener*>(tablet_peer.get()));
- TOMBSTONE_NOT_OK(s, meta,
+ TOMBSTONE_NOT_OK(s, tablet_peer,
"Tablet Copy: Unable to fetch data from remote peer " +
copy_source_uuid + " (" + copy_source_addr.ToString() + ")");
@@ -444,7 +444,7 @@ Status TSTabletManager::StartTabletCopy(
// Write out the last files to make the new replica visible and update the
// TabletDataState in the superblock to TABLET_DATA_READY.
- TOMBSTONE_NOT_OK(tc_client.Finish(), meta, "Tablet Copy: Failure calling Finish()");
+ TOMBSTONE_NOT_OK(tc_client.Finish(), tablet_peer, "Tablet Copy: Failure calling Finish()");
// We run this asynchronously. We don't tombstone the tablet if this fails,
// because if we were to fail to open the tablet, on next startup, it's in a
@@ -941,21 +941,21 @@ Status TSTabletManager::DeleteTabletData(const scoped_refptr<TabletMetadata>& me
return meta->DeleteSuperBlock();
}
-void TSTabletManager::LogAndTombstone(const scoped_refptr<TabletMetadata>& meta,
+void TSTabletManager::LogAndTombstone(const scoped_refptr<TabletPeer>& peer,
const std::string& msg,
const Status& s) {
- const string& tablet_id = meta->tablet_id();
+ const string& tablet_id = peer->tablet_id();
const string kLogPrefix = "T " + tablet_id + " P " + fs_manager_->uuid() + ": ";
LOG(WARNING) << kLogPrefix << msg << ": " << s.ToString();
- // Tombstone the tablet when tablet copy fails.
- LOG(INFO) << kLogPrefix << "Tombstoning tablet after failed tablet copy";
- Status delete_status = DeleteTabletData(meta, TABLET_DATA_TOMBSTONED, boost::optional<OpId>());
+ Status delete_status = DeleteTabletData(
+ peer->tablet_metadata(), TABLET_DATA_TOMBSTONED, boost::optional<OpId>());
if (PREDICT_FALSE(!delete_status.ok())) {
// This failure should only either indicate a bug or an IO error.
LOG(FATAL) << kLogPrefix << "Failed to tombstone tablet after tablet copy: "
<< delete_status.ToString();
}
+ peer->StatusMessage(Substitute("Tombstoned tablet: $0 ($1)", msg, s.ToString()));
}
TransitionInProgressDeleter::TransitionInProgressDeleter(
http://git-wip-us.apache.org/repos/asf/kudu/blob/680cec6b/src/kudu/tserver/ts_tablet_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.h b/src/kudu/tserver/ts_tablet_manager.h
index 8005683..dcdc76e 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -267,7 +267,7 @@ class TSTabletManager : public tserver::TabletPeerLookupIf {
// Print a log message using the given info and tombstone the specified
// tablet. If tombstoning the tablet fails, a FATAL error is logged, resulting
// in a crash.
- void LogAndTombstone(const scoped_refptr<tablet::TabletMetadata>& meta,
+ void LogAndTombstone(const scoped_refptr<tablet::TabletPeer>& peer,
const std::string& msg,
const Status& s);