You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/05/13 01:40:30 UTC

kudu git commit: block manager: require file cache

Repository: kudu
Updated Branches:
  refs/heads/master c26e09ef5 -> 4af33ba13


block manager: require file cache

The file cache has been in Kudu since 1.2 and we've yet to see any major
issues. So let's make it required; doing so lets us clean up some branches.

As for block_manager_max_open_files, I changed the meaning of the value 0
from "no limit" to "error"; this seemed net less disruptive than changing
the meaning of -1 from "40%" to "error" and 0 from "no limit" to "40%".

Change-Id: I9946e537e0b4abd66a9a90fc05df04232db68bc0
Reviewed-on: http://gerrit.cloudera.org:8080/6880
Reviewed-by: Todd Lipcon <to...@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4af33ba1
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4af33ba1
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4af33ba1

Branch: refs/heads/master
Commit: 4af33ba134372f6887bff288b882dd00c4fd8b43
Parents: c26e09e
Author: Adar Dembo <ad...@cloudera.com>
Authored: Wed May 10 16:17:57 2017 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Sat May 13 01:38:37 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/block_manager.cc      |  17 ++++--
 src/kudu/fs/file_block_manager.cc |  27 ++-------
 src/kudu/fs/file_block_manager.h  |   5 +-
 src/kudu/fs/log_block_manager.cc  | 100 +++++++++++----------------------
 src/kudu/fs/log_block_manager.h   |   5 +-
 5 files changed, 54 insertions(+), 100 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4af33ba1/src/kudu/fs/block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/block_manager.cc b/src/kudu/fs/block_manager.cc
index 856e1a7..8034fd4 100644
--- a/src/kudu/fs/block_manager.cc
+++ b/src/kudu/fs/block_manager.cc
@@ -38,11 +38,21 @@ TAG_FLAG(block_manager_lock_dirs, unsafe);
 
 DEFINE_int64(block_manager_max_open_files, -1,
              "Maximum number of open file descriptors to be used for data "
-             "blocks. If 0, there is no limit. If -1, Kudu will use 40% of "
-             "its resource limit as per getrlimit(). This is a soft limit.");
+             "blocks. If -1, Kudu will use 40% of its resource limit as per "
+             "getrlimit(). This is a soft limit. It is an error to use a "
+             "value of 0.");
 TAG_FLAG(block_manager_max_open_files, advanced);
 TAG_FLAG(block_manager_max_open_files, evolving);
 
+static bool ValidateMaxOpenFiles(const char* /*flagname*/, int64_t value) {
+  if (value == 0) {
+    LOG(ERROR) << "Invalid max open files: cannot be 0";
+    return false;
+  }
+  return true;
+}
+DEFINE_validator(block_manager_max_open_files, &ValidateMaxOpenFiles);
+
 using strings::Substitute;
 
 namespace kudu {
@@ -66,9 +76,6 @@ int64_t GetFileCacheCapacityForBlockManager(Env* env) {
   if (FLAGS_block_manager_max_open_files == -1) {
     return (2 * env->GetOpenFileLimit()) / 5;
   }
-  if (FLAGS_block_manager_max_open_files == 0) {
-    return kint64max;
-  }
   int64_t file_limit = env->GetOpenFileLimit();
   LOG_IF(FATAL, FLAGS_block_manager_max_open_files > file_limit) <<
       Substitute(

http://git-wip-us.apache.org/repos/asf/kudu/blob/4af33ba1/src/kudu/fs/file_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/file_block_manager.cc b/src/kudu/fs/file_block_manager.cc
index 45ced1e..c0c268a 100644
--- a/src/kudu/fs/file_block_manager.cc
+++ b/src/kudu/fs/file_block_manager.cc
@@ -532,20 +532,13 @@ FileBlockManager::FileBlockManager(Env* env, const BlockManagerOptions& opts)
   : env_(DCHECK_NOTNULL(env)),
     read_only_(opts.read_only),
     dd_manager_(env, opts.metric_entity, kBlockManagerType, opts.root_paths),
+    file_cache_("fbm", env_, GetFileCacheCapacityForBlockManager(env_),
+                opts.metric_entity),
     rand_(GetRandomSeed32()),
     next_block_id_(rand_.Next64()),
     mem_tracker_(MemTracker::CreateTracker(-1,
                                            "file_block_manager",
                                            opts.parent_mem_tracker)) {
-
-  int64_t file_cache_capacity = GetFileCacheCapacityForBlockManager(env_);
-  if (file_cache_capacity != kint64max) {
-    file_cache_.reset(new FileCache<RandomAccessFile>("fbm",
-                                                      env_,
-                                                      file_cache_capacity,
-                                                      opts.metric_entity));
-  }
-
   if (opts.metric_entity) {
     metrics_.reset(new internal::BlockManagerMetrics(opts.metric_entity));
   }
@@ -571,9 +564,7 @@ Status FileBlockManager::Open(FsReport* report) {
   }
   RETURN_NOT_OK(dd_manager_.Open(kMaxPaths, mode));
 
-  if (file_cache_) {
-    RETURN_NOT_OK(file_cache_->Init());
-  }
+  RETURN_NOT_OK(file_cache_.Init());
 
   // Prepare the filesystem report and either return or log it.
   FsReport local_report;
@@ -661,11 +652,7 @@ Status FileBlockManager::OpenBlock(const BlockId& block_id,
   VLOG(1) << "Opening block with id " << block_id.ToString() << " at " << path;
 
   shared_ptr<RandomAccessFile> reader;
-  if (file_cache_) {
-    RETURN_NOT_OK(file_cache_->OpenExistingFile(path, &reader));
-  } else {
-    RETURN_NOT_OK(env_util::OpenFileForRandom(env_, path, &reader));
-  }
+  RETURN_NOT_OK(file_cache_.OpenExistingFile(path, &reader));
   block->reset(new internal::FileReadableBlock(this, block_id, reader));
   return Status::OK();
 }
@@ -678,11 +665,7 @@ Status FileBlockManager::DeleteBlock(const BlockId& block_id) {
     return Status::NotFound(
         Substitute("Block $0 not found", block_id.ToString()));
   }
-  if (file_cache_) {
-    RETURN_NOT_OK(file_cache_->DeleteFile(path));
-  } else {
-    RETURN_NOT_OK(env_->DeleteFile(path));
-  }
+  RETURN_NOT_OK(file_cache_.DeleteFile(path));
 
   // We don't bother fsyncing the parent directory as there's nothing to be
   // gained by ensuring that the deletion is made durable. Even if we did

http://git-wip-us.apache.org/repos/asf/kudu/blob/4af33ba1/src/kudu/fs/file_block_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/file_block_manager.h b/src/kudu/fs/file_block_manager.h
index 5ceab34..ebedfb1 100644
--- a/src/kudu/fs/file_block_manager.h
+++ b/src/kudu/fs/file_block_manager.h
@@ -27,14 +27,13 @@
 #include "kudu/fs/block_manager.h"
 #include "kudu/fs/data_dirs.h"
 #include "kudu/util/atomic.h"
+#include "kudu/util/file_cache.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/random.h"
 
 namespace kudu {
 
 class Env;
-template <class FileType>
-class FileCache;
 class MemTracker;
 class MetricEntity;
 class RandomAccessFile;
@@ -122,7 +121,7 @@ class FileBlockManager : public BlockManager {
   DataDirManager dd_manager_;
 
   // Manages files opened for reading.
-  std::unique_ptr<FileCache<RandomAccessFile>> file_cache_;
+  FileCache<RandomAccessFile> file_cache_;
 
   // For generating block IDs.
   ThreadSafeRandom rand_;

http://git-wip-us.apache.org/repos/asf/kudu/blob/4af33ba1/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 2efdc09..445b9d2 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -518,22 +518,16 @@ Status LogBlockContainer::Create(LogBlockManager* block_manager,
     unique_ptr<WritablePBContainerFile> metadata_file;
     shared_ptr<RWFile> cached_data_file;
 
-    if (block_manager->file_cache_) {
-      metadata_writer.reset();
-      shared_ptr<RWFile> cached_metadata_writer;
-      RETURN_NOT_OK(block_manager->file_cache_->OpenExistingFile(
-          metadata_path, &cached_metadata_writer));
-      metadata_file.reset(new WritablePBContainerFile(
-          std::move(cached_metadata_writer)));
-
-      data_file.reset();
-      RETURN_NOT_OK(block_manager->file_cache_->OpenExistingFile(
-          data_path, &cached_data_file));
-    } else {
-      metadata_file.reset(new WritablePBContainerFile(
-          std::move(metadata_writer)));
-      cached_data_file = std::move(data_file);
-    }
+    metadata_writer.reset();
+    shared_ptr<RWFile> cached_metadata_writer;
+    RETURN_NOT_OK(block_manager->file_cache_.OpenExistingFile(
+        metadata_path, &cached_metadata_writer));
+    metadata_file.reset(new WritablePBContainerFile(
+        std::move(cached_metadata_writer)));
+
+    data_file.reset();
+    RETURN_NOT_OK(block_manager->file_cache_.OpenExistingFile(
+        data_path, &cached_data_file));
     RETURN_NOT_OK(metadata_file->Init(BlockRecordPB()));
 
     container->reset(new LogBlockContainer(block_manager,
@@ -582,36 +576,17 @@ Status LogBlockContainer::Open(LogBlockManager* block_manager,
   }
 
   // Open the existing metadata and data files for writing.
+  shared_ptr<RWFile> metadata_file;
+  RETURN_NOT_OK(block_manager->file_cache_.OpenExistingFile(
+      metadata_path, &metadata_file));
   unique_ptr<WritablePBContainerFile> metadata_pb_writer;
-  shared_ptr<RWFile> data_file;
-
-  if (block_manager->file_cache_) {
-    shared_ptr<RWFile> metadata_writer;
-    RETURN_NOT_OK(block_manager->file_cache_->OpenExistingFile(
-        metadata_path, &metadata_writer));
-    metadata_pb_writer.reset(new WritablePBContainerFile(
-        std::move(metadata_writer)));
+  metadata_pb_writer.reset(new WritablePBContainerFile(
+      std::move(metadata_file)));
+  RETURN_NOT_OK(metadata_pb_writer->Reopen());
 
-    RETURN_NOT_OK(block_manager->file_cache_->OpenExistingFile(
+  shared_ptr<RWFile> data_file;
+  RETURN_NOT_OK(block_manager->file_cache_.OpenExistingFile(
         data_path, &data_file));
-  } else {
-    RWFileOptions wr_opts;
-    wr_opts.mode = Env::OPEN_EXISTING;
-
-    unique_ptr<RWFile> metadata_writer;
-    RETURN_NOT_OK(block_manager->env_->NewRWFile(wr_opts,
-                                                 metadata_path,
-                                                 &metadata_writer));
-    metadata_pb_writer.reset(new WritablePBContainerFile(
-        std::move(metadata_writer)));
-
-    unique_ptr<RWFile> uw;
-    RETURN_NOT_OK(block_manager->env_->NewRWFile(wr_opts,
-                                                 data_path,
-                                                 &uw));
-    data_file = std::move(uw);
-  }
-  RETURN_NOT_OK(metadata_pb_writer->Reopen());
 
   uint64_t data_file_size;
   RETURN_NOT_OK(data_file->Size(&data_file_size));
@@ -878,7 +853,16 @@ Status LogBlockContainer::SyncMetadata() {
 }
 
 Status LogBlockContainer::ReopenMetadataWriter() {
-  return metadata_file_->Reopen();
+  shared_ptr<RWFile> f;
+  RETURN_NOT_OK(block_manager_->file_cache_.OpenExistingFile(
+      metadata_file_->filename(), &f));
+  unique_ptr<WritablePBContainerFile> w;
+  w.reset(new WritablePBContainerFile(std::move(f)));
+  RETURN_NOT_OK(w->Reopen());
+
+  RETURN_NOT_OK(metadata_file_->Close());
+  metadata_file_.swap(w);
+  return Status::OK();
 }
 
 Status LogBlockContainer::EnsurePreallocated(int64_t block_start_offset,
@@ -1394,6 +1378,8 @@ LogBlockManager::LogBlockManager(Env* env, const BlockManagerOptions& opts)
                                            "log_block_manager",
                                            opts.parent_mem_tracker)),
     dd_manager_(env, opts.metric_entity, kBlockManagerType, opts.root_paths),
+    file_cache_("lbm", env, GetFileCacheCapacityForBlockManager(env),
+                opts.metric_entity),
     blocks_by_block_id_(10,
                         BlockMap::hasher(),
                         BlockMap::key_equal(),
@@ -1404,14 +1390,6 @@ LogBlockManager::LogBlockManager(Env* env, const BlockManagerOptions& opts)
     next_block_id_(1) {
   blocks_by_block_id_.set_deleted_key(BlockId());
 
-  int64_t file_cache_capacity = GetFileCacheCapacityForBlockManager(env_);
-  if (file_cache_capacity != kint64max) {
-    file_cache_.reset(new FileCache<RWFile>("lbm",
-                                            env_,
-                                            file_cache_capacity,
-                                            opts.metric_entity));
-  }
-
   // HACK: when running in a test environment, we often instantiate many
   // LogBlockManagers in the same process, eg corresponding to different
   // tablet servers in a minicluster, or due to running many separate test
@@ -1466,9 +1444,7 @@ Status LogBlockManager::Open(FsReport* report) {
   }
   RETURN_NOT_OK(dd_manager_.Open(kuint32max, mode));
 
-  if (file_cache_) {
-    RETURN_NOT_OK(file_cache_->Init());
-  }
+  RETURN_NOT_OK(file_cache_.Init());
 
   // Establish (and log) block limits for each data directory using kernel,
   // filesystem, and gflags information.
@@ -2088,19 +2064,9 @@ Status LogBlockManager::Repair(
     string data_file_name = StrCat(d, kContainerDataFileSuffix);
     string metadata_file_name = StrCat(d, kContainerMetadataFileSuffix);
 
-    Status data_file_status;
-    Status metadata_file_status;
-    if (file_cache_) {
-      data_file_status = file_cache_->DeleteFile(data_file_name);
-      metadata_file_status = file_cache_->DeleteFile(metadata_file_name);
-    } else {
-      data_file_status = env_->DeleteFile(data_file_name);
-      metadata_file_status = env_->DeleteFile(metadata_file_name);
-    }
-
-    WARN_NOT_OK(data_file_status,
+    WARN_NOT_OK(file_cache_.DeleteFile(data_file_name),
                 "Could not delete dead container data file " + data_file_name);
-    WARN_NOT_OK(metadata_file_status,
+    WARN_NOT_OK(file_cache_.DeleteFile(metadata_file_name),
                 "Could not delete dead container metadata file " + metadata_file_name);
   }
   if (!dead_containers.empty()) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/4af33ba1/src/kudu/fs/log_block_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.h b/src/kudu/fs/log_block_manager.h
index 5a23f37..71ba0c3 100644
--- a/src/kudu/fs/log_block_manager.h
+++ b/src/kudu/fs/log_block_manager.h
@@ -37,14 +37,13 @@
 #include "kudu/fs/fs.pb.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/util/atomic.h"
+#include "kudu/util/file_cache.h"
 #include "kudu/util/mem_tracker.h"
 #include "kudu/util/oid_generator.h"
 #include "kudu/util/random.h"
 
 namespace kudu {
 class Env;
-template <class FileType>
-class FileCache;
 class MetricEntity;
 class RWFile;
 class ThreadPool;
@@ -324,7 +323,7 @@ class LogBlockManager : public BlockManager {
                      boost::optional<int64_t>> block_limits_by_data_dir_;
 
   // Manages files opened for reading.
-  std::unique_ptr<FileCache<RWFile>> file_cache_;
+  FileCache<RWFile> file_cache_;
 
   // Maps block IDs to blocks that are now readable, either because they
   // already existed on disk when the block manager was opened, or because