You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/10/26 16:12:36 UTC
[2/4] kudu git commit: data_dirs: be permissive if RefreshIsFull fails
data_dirs: be permissive if RefreshIsFull fails
In testing disk failures, at the DataDirManager-level, RefreshIsFull()
is a point of failure that can fail various functions, e.g. for block
placement. This doesn't need to be the case; it should be sufficient to
instead just not include the failed directory candidate.
This is favorable to returning early because RefreshIsFull() may cause
these functions to return before any error-handling has happened, and
any disk-failure handling will be triggered regardless the next time a
block is written to the bad disk.
Change-Id: Iffd0342e52e9f6a76ba17c179a36a8a971b94786
Reviewed-on: http://gerrit.cloudera.org:8080/8389
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e03b8e44
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e03b8e44
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e03b8e44
Branch: refs/heads/master
Commit: e03b8e4437920239ca5542143d36c7bd5a496eb2
Parents: 69b82b5
Author: Andrew Wong <aw...@cloudera.com>
Authored: Wed Oct 25 14:04:29 2017 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Thu Oct 26 16:11:42 2017 +0000
----------------------------------------------------------------------
src/kudu/fs/data_dirs.cc | 27 ++++++++++++++-------------
src/kudu/fs/data_dirs.h | 2 +-
2 files changed, 15 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/e03b8e44/src/kudu/fs/data_dirs.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc
index 5e9b0d0..1ae0e9f 100644
--- a/src/kudu/fs/data_dirs.cc
+++ b/src/kudu/fs/data_dirs.cc
@@ -670,7 +670,7 @@ Status DataDirManager::CreateDataDirGroup(const string& tablet_id,
return Status::IOError("No healthy data directories available", "", ENODEV);
}
}
- RETURN_NOT_OK(GetDirsForGroupUnlocked(group_target_size, &group_indices));
+ GetDirsForGroupUnlocked(group_target_size, &group_indices);
if (PREDICT_FALSE(group_indices.empty())) {
return Status::IOError("All healthy data directories are full", "", ENOSPC);
}
@@ -724,8 +724,9 @@ Status DataDirManager::GetNextDataDir(const CreateBlockOptions& opts, DataDir**
for (int i : random_indices) {
int uuid_idx = (*group_uuid_indices)[i];
DataDir* candidate = FindOrDie(data_dir_by_uuid_idx_, uuid_idx);
- RETURN_NOT_OK(candidate->RefreshIsFull(DataDir::RefreshMode::EXPIRED_ONLY));
- if (!candidate->is_full()) {
+ Status s = candidate->RefreshIsFull(DataDir::RefreshMode::EXPIRED_ONLY);
+ WARN_NOT_OK(s, Substitute("failed to refresh fullness of $0", candidate->dir()));
+ if (s.ok() && !candidate->is_full()) {
*dir = candidate;
return Status::OK();
}
@@ -740,8 +741,8 @@ Status DataDirManager::GetNextDataDir(const CreateBlockOptions& opts, DataDir**
metrics_->data_dirs_full->value(), dirs_state_str);
}
return Status::IOError(Substitute("No directories available to add to $0directory group ($1 "
- "dirs total, $2).", tablet_id_str, data_dirs_.size(), dirs_state_str),
- "", ENOSPC);
+ "dirs total, $2).", tablet_id_str, data_dirs_.size(), dirs_state_str),
+ "", ENOSPC);
}
void DataDirManager::DeleteDataDirGroup(const std::string& tablet_id) {
@@ -768,19 +769,20 @@ bool DataDirManager::GetDataDirGroupPB(const std::string& tablet_id,
return false;
}
-Status DataDirManager::GetDirsForGroupUnlocked(int target_size,
- vector<int>* group_indices) {
+void DataDirManager::GetDirsForGroupUnlocked(int target_size,
+ vector<int>* group_indices) {
DCHECK(dir_group_lock_.is_locked());
vector<int> candidate_indices;
for (auto& e : data_dir_by_uuid_idx_) {
if (ContainsKey(failed_data_dirs_, e.first)) {
continue;
}
- RETURN_NOT_OK(e.second->RefreshIsFull(DataDir::RefreshMode::ALWAYS));
- // TODO(awong): If a disk is unhealthy at the time of group creation, the
- // resulting group may be below targeted size. Add functionality to resize
- // groups. See KUDU-2040 for more details.
- if (!e.second->is_full()) {
+ Status s = e.second->RefreshIsFull(DataDir::RefreshMode::ALWAYS);
+ WARN_NOT_OK(s, Substitute("failed to refresh fullness of $0", e.second->dir()));
+ if (s.ok() && !e.second->is_full()) {
+ // TODO(awong): If a disk is unhealthy at the time of group creation, the
+ // resulting group may be below targeted size. Add functionality to
+ // resize groups. See KUDU-2040 for more details.
candidate_indices.push_back(e.first);
}
}
@@ -796,7 +798,6 @@ Status DataDirManager::GetDirsForGroupUnlocked(int target_size,
candidate_indices.erase(candidate_indices.begin() + 1);
}
}
- return Status::OK();
}
DataDir* DataDirManager::FindDataDirByUuidIndex(int uuid_idx) const {
http://git-wip-us.apache.org/repos/asf/kudu/blob/e03b8e44/src/kudu/fs/data_dirs.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/data_dirs.h b/src/kudu/fs/data_dirs.h
index 1dca128..0ed4389 100644
--- a/src/kudu/fs/data_dirs.h
+++ b/src/kudu/fs/data_dirs.h
@@ -414,7 +414,7 @@ class DataDirManager {
// added. Although this function does not itself change DataDirManager state,
// its expected usage warrants that it is called within the scope of a
// lock_guard of dir_group_lock_.
- Status GetDirsForGroupUnlocked(int target_size, std::vector<int>* group_indices);
+ void GetDirsForGroupUnlocked(int target_size, std::vector<int>* group_indices);
// Goes through the data dirs in 'uuid_indices' and populates
// 'healthy_indices' with those that haven't failed.