You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/12/17 00:15:08 UTC

[kudu] branch master updated: [consensus] force fsync metadata on XFS

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 6908620  [consensus] force fsync metadata on XFS
6908620 is described below

commit 6908620821ab93cb964f0f7a3674abf9843a9b5f
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Tue Dec 14 11:48:44 2021 -0800

    [consensus] force fsync metadata on XFS
    
    We've seen issues like KUDU-2195 rear their heads more prominently on
    XFS, sometimes requiring costly surgery to fix by restoring metadata if
    all replicas lose their consensus metadata.
    
    This patch adds to the band-aid flag --cmeta_force_fsync, checking if
    the metadata is on an XFS mount, and if so, always fsyncing. This new
    behavior is enabled by default with the --cmeta_fsync_override_on_xfs
    flag.
    
    Change-Id: Ie97138e3522f20325be104d3665d887693b79a10
    Reviewed-on: http://gerrit.cloudera.org:8080/18101
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Kudu Jenkins
    Reviewed-by: Attila Bukor <ab...@apache.org>
---
 src/kudu/consensus/consensus_meta.cc |  6 +++++-
 src/kudu/fs/fs_manager.cc            | 26 ++++++++++++++++++++++++--
 src/kudu/fs/fs_manager.h             |  7 +++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/kudu/consensus/consensus_meta.cc b/src/kudu/consensus/consensus_meta.cc
index 18a8f47..60d3758 100644
--- a/src/kudu/consensus/consensus_meta.cc
+++ b/src/kudu/consensus/consensus_meta.cc
@@ -50,6 +50,8 @@ DEFINE_bool(cmeta_force_fsync, false,
             "Whether fsync() should be called when consensus metadata files are updated");
 TAG_FLAG(cmeta_force_fsync, advanced);
 
+DECLARE_bool(cmeta_fsync_override_on_xfs);
+
 using std::string;
 using strings::Substitute;
 
@@ -302,6 +304,8 @@ Status ConsensusMetadata::Flush(FlushMode flush_mode) {
                           "Unable to fsync consensus parent dir " + parent_dir);
   }
 
+  const bool cmeta_force_fsync =
+      FLAGS_cmeta_force_fsync || (FLAGS_cmeta_fsync_override_on_xfs && fs_manager_->meta_on_xfs());
   string meta_file_path = fs_manager_->GetConsensusMetadataPath(tablet_id_);
   RETURN_NOT_OK_PREPEND(pb_util::WritePBContainerToPath(
       fs_manager_->env(), meta_file_path, pb_,
@@ -315,7 +319,7 @@ Status ConsensusMetadata::Flush(FlushMode flush_mode) {
       // fsync() due to periodic commit with default settings, whereas other
       // filesystems such as XFS will not commit as often and need the fsync to
       // avoid significant data loss when a crash happens.
-      FLAGS_log_force_fsync_all || FLAGS_cmeta_force_fsync ? pb_util::SYNC : pb_util::NO_SYNC),
+      FLAGS_log_force_fsync_all || cmeta_force_fsync ? pb_util::SYNC : pb_util::NO_SYNC),
           Substitute("Unable to write consensus meta file for tablet $0 to path $1",
                      tablet_id_, meta_file_path));
   return UpdateOnDiskSize();
diff --git a/src/kudu/fs/fs_manager.cc b/src/kudu/fs/fs_manager.cc
index 03fd2cd..d84c751 100644
--- a/src/kudu/fs/fs_manager.cc
+++ b/src/kudu/fs/fs_manager.cc
@@ -60,6 +60,14 @@
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/timer.h"
 
+DEFINE_bool(cmeta_fsync_override_on_xfs, true,
+            "Whether to ignore --cmeta_force_fsync and instead always flush if Kudu detects "
+            "the server is on XFS. This can prevent consensus metadata corruption in the "
+            "event of sudden server failure. Disabling this flag may cause data loss in "
+            "the event of a system crash. See KUDU-2195 for more details.");
+TAG_FLAG(cmeta_fsync_override_on_xfs, experimental);
+TAG_FLAG(cmeta_fsync_override_on_xfs, advanced);
+
 DEFINE_bool(enable_data_block_fsync, true,
             "Whether to enable fsync() of data blocks, metadata, and their parent directories. "
             "Disabling this flag may cause data loss in the event of a system crash.");
@@ -175,7 +183,8 @@ FsManager::FsManager(Env* env, FsManagerOpts opts)
   : env_(DCHECK_NOTNULL(env)),
     opts_(std::move(opts)),
     error_manager_(new FsErrorManager()),
-    initted_(false) {
+    initted_(false),
+    meta_on_xfs_(false) {
   DCHECK(opts_.update_instances == UpdateInstanceBehavior::DONT_UPDATE ||
          !opts_.read_only) << "FsManager can only be for updated if not in read-only mode";
 }
@@ -361,10 +370,23 @@ Status FsManager::PartialOpen(CanonicalizedRootsList* missing_roots) {
           reference_instance_path, metadata_->uuid() , root.path, pb->uuid()));
     }
   }
-
   if (!metadata_) {
     return Status::NotFound("could not find a healthy instance file");
   }
+  const auto& meta_root_path = canonicalized_metadata_fs_root_.path;
+  const auto bad_meta_fs_msg =
+      Substitute("Could not determine file system of metadata directory $0",
+                 meta_root_path);
+  Status s = env_->IsOnXfsFilesystem(meta_root_path, &meta_on_xfs_);
+  if (FLAGS_cmeta_fsync_override_on_xfs) {
+    // Err on the side of visibility if we expect a behavior change based on
+    // the file system.
+    RETURN_NOT_OK_PREPEND(s, bad_meta_fs_msg);
+  } else {
+    WARN_NOT_OK(s, bad_meta_fs_msg);
+  }
+  VLOG(1) << Substitute("Detected metadata directory is $0on an XFS mount: $1",
+                        meta_on_xfs_ ? "" : "not ", meta_root_path);
   return Status::OK();
 }
 
diff --git a/src/kudu/fs/fs_manager.h b/src/kudu/fs/fs_manager.h
index ff04514..9243e26 100644
--- a/src/kudu/fs/fs_manager.h
+++ b/src/kudu/fs/fs_manager.h
@@ -311,6 +311,10 @@ class FsManager {
   // Prints the file system trees under the file system roots.
   void DumpFileSystemTree(std::ostream& out);
 
+  bool meta_on_xfs() const {
+    return meta_on_xfs_;
+  }
+
  private:
   FRIEND_TEST(fs::FsManagerTestBase, TestDuplicatePaths);
   FRIEND_TEST(fs::FsManagerTestBase, TestEIOWhileRunningUpdateDirsTool);
@@ -407,6 +411,9 @@ class FsManager {
 
   bool initted_;
 
+  // Cache whether or not the metadata directory is on an XFS directory.
+  bool meta_on_xfs_;
+
   DISALLOW_COPY_AND_ASSIGN(FsManager);
 };