You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by jd...@apache.org on 2016/12/08 16:14:38 UTC
[6/6] kudu git commit: KUDU-1796: Use last REPLICATE OpId instead of
last COMMIT OpId to tombstone a tablet replica
KUDU-1796: Use last REPLICATE OpId instead of last COMMIT OpId to tombstone a tablet replica
The kudu-tool-test has been flaky because of the following failure:
kudu-tool-test.cc:1203: Failure
Value of: tombstoned_opid.index()
Actual: 205
Expected: last_logged_opid.index()
Which is: 206
Issue here is, offline cli tool "local_replica delete" stored
last COMMIT OpId on the tablet superblock instead of last REPLICATE
OpId while tombstoning the tablet replica. Hence kudu-tool-test
ToolTest.TestLocalReplicaTombstoneDelete observed that sometimes
tombstoned_opid (COMMIT) lagged behind last_logged_opid (REPLICATE).
This patch fixes the tool to use the REPLICATE OpId from the WAL.
Testing: Ran 5000 iterations of current failing test.
Change-Id: I02e95f05fab80e94b1afd078b23e5e03eca6e42a
Reviewed-on: http://gerrit.cloudera.org:8080/5416
Reviewed-by: Mike Percy <mp...@apache.org>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/839bd6f9
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/839bd6f9
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/839bd6f9
Branch: refs/heads/master
Commit: 839bd6f9abe3dedf439628e80fd7111763c11b52
Parents: 4d8fe6c
Author: Dinesh Bhat <di...@cloudera.com>
Authored: Thu Dec 8 13:03:41 2016 +0530
Committer: Mike Percy <mp...@apache.org>
Committed: Thu Dec 8 15:49:41 2016 +0000
----------------------------------------------------------------------
src/kudu/tools/tool_action_local_replica.cc | 30 ++++++++++++------------
1 file changed, 15 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/839bd6f9/src/kudu/tools/tool_action_local_replica.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc
index b788748..7e0cd22 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -162,18 +162,18 @@ Status ParseHostPortString(const string& hostport_str, HostPort* hostport) {
return Status::OK();
}
-// Find the last committed OpId for the tablet_id from the WAL.
-Status FindLastCommittedOpId(FsManager* fs, const string& tablet_id,
- OpId* last_committed_opid) {
+// Find the last replicated OpId for the tablet_id from the WAL.
+Status FindLastLoggedOpId(FsManager* fs, const string& tablet_id,
+ OpId* last_logged_opid) {
shared_ptr<LogReader> reader;
RETURN_NOT_OK(LogReader::Open(fs, scoped_refptr<log::LogIndex>(), tablet_id,
scoped_refptr<MetricEntity>(), &reader));
SegmentSequence segs;
RETURN_NOT_OK(reader->GetSegmentsSnapshot(&segs));
- // Reverse iterate segments to find the first 'last committed' entry.
+ // Reverse iterate the segments to find the 'last replicated' entry quickly.
// Note that we still read the entries within a segment in sequential
- // fashion, so the last entry within the 'found' segment will
- // give us the last_committed_opid.
+ // fashion, so the last entry within the first 'found' segment will
+ // give us the last_logged_opid.
vector<scoped_refptr<ReadableLogSegment>>::reverse_iterator seg;
bool found = false;
for (seg = segs.rbegin(); seg != segs.rend(); ++seg) {
@@ -183,13 +183,13 @@ Status FindLastCommittedOpId(FsManager* fs, const string& tablet_id,
Status s = reader.ReadNextEntry(&entry);
if (s.IsEndOfFile()) break;
RETURN_NOT_OK_PREPEND(s, "Error in log segment");
- if (entry.type() != log::COMMIT) continue;
- *last_committed_opid = entry.commit().commited_op_id();
+ if (entry.type() != log::REPLICATE) continue;
+ *last_logged_opid = entry.replicate().id();
found = true;
}
if (found) return Status::OK();
}
- return Status::NotFound("Committed OpId not found in the log");
+ return Status::NotFound("No entries found in the write-ahead log");
}
// Parses a colon-delimited string containing a uuid, hostname or IP address,
@@ -300,7 +300,7 @@ Status DeleteLocalReplica(const RunnerContext& context) {
const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
FsManager fs_manager(Env::Default(), FsManagerOpts());
RETURN_NOT_OK(fs_manager.Open());
- boost::optional<OpId> last_committed_opid = boost::none;
+ boost::optional<OpId> last_logged_opid = boost::none;
TabletDataState state = TabletDataState::TABLET_DATA_DELETED;
if (!FLAGS_clean_unsafe) {
state = TabletDataState::TABLET_DATA_TOMBSTONED;
@@ -308,14 +308,14 @@ Status DeleteLocalReplica(const RunnerContext& context) {
// the log, it's not an error. But if we receive any other error,
// indicate the user to delete with --clean_unsafe flag.
OpId opid;
- Status s = FindLastCommittedOpId(&fs_manager, tablet_id, &opid);
+ Status s = FindLastLoggedOpId(&fs_manager, tablet_id, &opid);
if (s.ok()) {
- last_committed_opid = opid;
+ last_logged_opid = opid;
} else if (s.IsNotFound()) {
- LOG(INFO) << "Could not find last committed OpId from WAL, "
+ LOG(INFO) << "Could not find any replicated OpId from WAL, "
<< "but proceeding with tablet tombstone: " << s.ToString();
} else {
- LOG(ERROR) << "Error attempting to find last committed OpId in WAL: " << s.ToString();
+ LOG(ERROR) << "Error attempting to find last replicated OpId from WAL: " << s.ToString();
LOG(ERROR) << "Cannot delete (tombstone) the tablet, use --clean_unsafe to delete"
<< " the tablet permanently from this server.";
return s;
@@ -325,7 +325,7 @@ Status DeleteLocalReplica(const RunnerContext& context) {
// Force the specified tablet on this node to be in 'state'.
scoped_refptr<TabletMetadata> meta;
RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, tablet_id, &meta));
- RETURN_NOT_OK(TSTabletManager::DeleteTabletData(meta, state, last_committed_opid));
+ RETURN_NOT_OK(TSTabletManager::DeleteTabletData(meta, state, last_logged_opid));
return Status::OK();
}