You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/10/26 16:12:36 UTC

[2/4] kudu git commit: data_dirs: be permissive if RefreshIsFull fails

data_dirs: be permissive if RefreshIsFull fails

In testing disk failures, at the DataDirManager-level, RefreshIsFull()
is a point of failure that can fail various functions, e.g. for block
placement. This doesn't need to be the case; it should be sufficient to
instead just not include the failed directory candidate.

This is favorable to returning early because RefreshIsFull() may cause
these functions to return before any error-handling has happened, and
any disk-failure handling will be triggered regardless the next time a
block is written to the bad disk.

Change-Id: Iffd0342e52e9f6a76ba17c179a36a8a971b94786
Reviewed-on: http://gerrit.cloudera.org:8080/8389
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e03b8e44
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e03b8e44
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e03b8e44

Branch: refs/heads/master
Commit: e03b8e4437920239ca5542143d36c7bd5a496eb2
Parents: 69b82b5
Author: Andrew Wong <aw...@cloudera.com>
Authored: Wed Oct 25 14:04:29 2017 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Thu Oct 26 16:11:42 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/data_dirs.cc | 27 ++++++++++++++-------------
 src/kudu/fs/data_dirs.h  |  2 +-
 2 files changed, 15 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/e03b8e44/src/kudu/fs/data_dirs.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc
index 5e9b0d0..1ae0e9f 100644
--- a/src/kudu/fs/data_dirs.cc
+++ b/src/kudu/fs/data_dirs.cc
@@ -670,7 +670,7 @@ Status DataDirManager::CreateDataDirGroup(const string& tablet_id,
         return Status::IOError("No healthy data directories available", "", ENODEV);
       }
     }
-    RETURN_NOT_OK(GetDirsForGroupUnlocked(group_target_size, &group_indices));
+    GetDirsForGroupUnlocked(group_target_size, &group_indices);
     if (PREDICT_FALSE(group_indices.empty())) {
       return Status::IOError("All healthy data directories are full", "", ENOSPC);
     }
@@ -724,8 +724,9 @@ Status DataDirManager::GetNextDataDir(const CreateBlockOptions& opts, DataDir**
   for (int i : random_indices) {
     int uuid_idx = (*group_uuid_indices)[i];
     DataDir* candidate = FindOrDie(data_dir_by_uuid_idx_, uuid_idx);
-    RETURN_NOT_OK(candidate->RefreshIsFull(DataDir::RefreshMode::EXPIRED_ONLY));
-    if (!candidate->is_full()) {
+    Status s = candidate->RefreshIsFull(DataDir::RefreshMode::EXPIRED_ONLY);
+    WARN_NOT_OK(s, Substitute("failed to refresh fullness of $0", candidate->dir()));
+    if (s.ok() && !candidate->is_full()) {
       *dir = candidate;
       return Status::OK();
     }
@@ -740,8 +741,8 @@ Status DataDirManager::GetNextDataDir(const CreateBlockOptions& opts, DataDir**
                                 metrics_->data_dirs_full->value(), dirs_state_str);
   }
   return Status::IOError(Substitute("No directories available to add to $0directory group ($1 "
-                        "dirs total, $2).", tablet_id_str, data_dirs_.size(), dirs_state_str),
-                        "", ENOSPC);
+                         "dirs total, $2).", tablet_id_str, data_dirs_.size(), dirs_state_str),
+                         "", ENOSPC);
 }
 
 void DataDirManager::DeleteDataDirGroup(const std::string& tablet_id) {
@@ -768,19 +769,20 @@ bool DataDirManager::GetDataDirGroupPB(const std::string& tablet_id,
   return false;
 }
 
-Status DataDirManager::GetDirsForGroupUnlocked(int target_size,
-                                               vector<int>* group_indices) {
+void DataDirManager::GetDirsForGroupUnlocked(int target_size,
+                                             vector<int>* group_indices) {
   DCHECK(dir_group_lock_.is_locked());
   vector<int> candidate_indices;
   for (auto& e : data_dir_by_uuid_idx_) {
     if (ContainsKey(failed_data_dirs_, e.first)) {
       continue;
     }
-    RETURN_NOT_OK(e.second->RefreshIsFull(DataDir::RefreshMode::ALWAYS));
-    // TODO(awong): If a disk is unhealthy at the time of group creation, the
-    // resulting group may be below targeted size. Add functionality to resize
-    // groups. See KUDU-2040 for more details.
-    if (!e.second->is_full()) {
+    Status s = e.second->RefreshIsFull(DataDir::RefreshMode::ALWAYS);
+    WARN_NOT_OK(s, Substitute("failed to refresh fullness of $0", e.second->dir()));
+    if (s.ok() && !e.second->is_full()) {
+      // TODO(awong): If a disk is unhealthy at the time of group creation, the
+      // resulting group may be below targeted size. Add functionality to
+      // resize groups. See KUDU-2040 for more details.
       candidate_indices.push_back(e.first);
     }
   }
@@ -796,7 +798,6 @@ Status DataDirManager::GetDirsForGroupUnlocked(int target_size,
       candidate_indices.erase(candidate_indices.begin() + 1);
     }
   }
-  return Status::OK();
 }
 
 DataDir* DataDirManager::FindDataDirByUuidIndex(int uuid_idx) const {

http://git-wip-us.apache.org/repos/asf/kudu/blob/e03b8e44/src/kudu/fs/data_dirs.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/data_dirs.h b/src/kudu/fs/data_dirs.h
index 1dca128..0ed4389 100644
--- a/src/kudu/fs/data_dirs.h
+++ b/src/kudu/fs/data_dirs.h
@@ -414,7 +414,7 @@ class DataDirManager {
   // added. Although this function does not itself change DataDirManager state,
   // its expected usage warrants that it is called within the scope of a
   // lock_guard of dir_group_lock_.
-  Status GetDirsForGroupUnlocked(int target_size, std::vector<int>* group_indices);
+  void GetDirsForGroupUnlocked(int target_size, std::vector<int>* group_indices);
 
   // Goes through the data dirs in 'uuid_indices' and populates
   // 'healthy_indices' with those that haven't failed.