You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2020/01/21 22:41:12 UTC

[kudu] 04/04: file cache: evict open fd when descriptor goes out of scope

This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 0f22f1268c4fda855456d6245c28024daf881d00
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Sun Jan 19 15:54:25 2020 -0800

    file cache: evict open fd when descriptor goes out of scope
    
    Previously, an open fd remained in cache when its descriptor was destroyed
    unless the file was marked for deletion. This avoided an extra cache lookup
    at destruction time and provided faster access to a file if it was closed
    and then reopened (though that second benefit was irrelevant to the block
    managers where files were kept open permanently unless they were deleted).
    
    With this change, an open fd is forcefully evicted when its descriptor is
    destroyed. This provides better semantics if the goal is to close a file
    without deleting it and test that the fd is indeed closed. It also prevents
    "false positive" cache hits when the user closes the file, renames it, and
    creates a new file with the old file name. This is an access pattern used by
    the WAL when a replica fails and it is brought back to life via tablet copy
    from a healthy server.
    
    Change-Id: Iea5317add630753716ef538cc8a198c9b3547822
    Reviewed-on: http://gerrit.cloudera.org:8080/15080
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    Tested-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/util/file_cache-test.cc | 12 ++++++------
 src/kudu/util/file_cache.cc      | 18 +++++++++++++-----
 src/kudu/util/file_cache.h       | 10 +++++-----
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/kudu/util/file_cache-test.cc b/src/kudu/util/file_cache-test.cc
index 0c2dea8..1fbfc38 100644
--- a/src/kudu/util/file_cache-test.cc
+++ b/src/kudu/util/file_cache-test.cc
@@ -175,10 +175,10 @@ TYPED_TEST(FileCacheTest, TestBasicOperations) {
     }
   }
 
-  // The descriptors are all out of scope, but the open fds remain in the cache.
-  NO_FATALS(this->AssertFdsAndDescriptors(1, 0));
+  // The descriptors are all out of scope, and so are the cached fds.
+  NO_FATALS(this->AssertFdsAndDescriptors(0, 0));
 
-  // With the cache gone, so are the cached fds.
+  // With the cache gone, nothing changes.
   this->cache_.reset();
   ASSERT_EQ(this->initial_open_fds_, this->CountOpenFds());
 }
@@ -222,8 +222,8 @@ TYPED_TEST(FileCacheTest, TestDeletion) {
   ASSERT_EQ(this->initial_open_fds_, this->CountOpenFds());
 
   // Create a test file, open it, and let it go out of scope before
-  // deleting it. The deletion should evict the fd and close it, despite
-  // happening after the descriptor is gone.
+  // deleting it. The fd is already evicted and closed; this is effectively a
+  // non-cached deletion.
   const string kFile3 = this->GetTestPath("baz");
   const string kData3 = "test data 3";
   ASSERT_OK(this->WriteTestFile(kFile3, kData3));
@@ -232,7 +232,7 @@ TYPED_TEST(FileCacheTest, TestDeletion) {
     ASSERT_OK(this->cache_->OpenExistingFile(kFile3, &f3));
   }
   ASSERT_TRUE(this->env_->FileExists(kFile3));
-  ASSERT_EQ(this->initial_open_fds_ + 1, this->CountOpenFds());
+  ASSERT_EQ(this->initial_open_fds_, this->CountOpenFds());
   ASSERT_OK(this->cache_->DeleteFile(kFile3));
   ASSERT_FALSE(this->env_->FileExists(kFile3));
   ASSERT_EQ(this->initial_open_fds_, this->CountOpenFds());
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index 06e8965..8fb0adf 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -100,16 +100,24 @@ class BaseDescriptor {
   ~BaseDescriptor() {
     VLOG(2) << "Out of scope descriptor with file name: " << filename();
 
-    // The (now expired) weak_ptr remains in 'descriptors_', to be removed by
-    // the next call to RunDescriptorExpiry(). Removing it here would risk a
-    // deadlock on recursive acquisition of 'lock_'.
+    // The destruction of the descriptor indicates that there's no active user
+    // of the file at the moment. However, if the fd is still open in the LRU
+    // cache, should we leave it there, or should we evict it?
+    //
+    // We opt for eviction: your typical filesystem user is more likely to want
+    // the underlying resource released when they're done working with a file
+    // than they are to want the resource to be quickly accessible should the
+    // file be reopened.
+    cache()->Erase(filename());
 
     if (deleted()) {
-      cache()->Erase(filename());
-
       VLOG(1) << "Deleting file: " << filename();
       WARN_NOT_OK(env()->DeleteFile(filename()), "");
     }
+
+    // The (now expired) weak_ptr remains in 'descriptors_', to be removed by
+    // the next call to RunDescriptorExpiry(). Removing it here would risk a
+    // deadlock on recursive acquisition of 'lock_'.
   }
 
   // Insert a pointer to an open file object into the file cache with the
diff --git a/src/kudu/util/file_cache.h b/src/kudu/util/file_cache.h
index e8c3250..ed3663c 100644
--- a/src/kudu/util/file_cache.h
+++ b/src/kudu/util/file_cache.h
@@ -63,7 +63,7 @@ class Thread;
 // The core of the client-facing API is the cache descriptor. A descriptor
 // uniquely identifies an opened file. To a client, a descriptor is just an
 // open file interface of the variety defined in util/env.h. Clients open
-// descriptors via the OpenExisting*() cache methods.
+// descriptors via the OpenExistingFile() cache methods.
 //
 // Descriptors are shared objects; an existing descriptor is handed back to a
 // client if a file with the same name is already opened. To facilitate
@@ -118,9 +118,9 @@ class FileCache {
   // to a file-like interface but interfaces with the cache under the hood to
   // reopen a file as needed during file operations.
   //
-  // The descriptor is opened immediately to verify that the on-disk file can
-  // be opened, but may be closed later if the cache reaches its upper bound on
-  // the number of open files.
+  // The underlying file is opened immediately to verify that it indeed exists,
+  // but may be closed later if the cache reaches its upper bound on the number
+  // of open files. It is also closed when the descriptor's last reference is dropped.
   template <class FileType>
   Status OpenExistingFile(const std::string& file_name,
                           std::shared_ptr<FileType>* file);
@@ -128,7 +128,7 @@ class FileCache {
   // Deletes a file by name through the cache.
   //
   // If there is an outstanding descriptor for the file, the deletion will be
-  // deferred until the last referent is dropped. Otherwise, the file is
+  // deferred until the last reference is dropped. Otherwise, the file is
   // deleted immediately.
   Status DeleteFile(const std::string& file_name);