You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/12/19 04:44:55 UTC

[GitHub] [doris] luozenglin commented on a diff in pull request #15114: [enhancement](gc) sub_file_cache checks the directory files when gc

luozenglin commented on code in PR #15114:
URL: https://github.com/apache/doris/pull/15114#discussion_r1051788040


##########
be/src/io/cache/file_cache.cpp:
##########
@@ -87,32 +89,87 @@ Status FileCache::download_cache_to_local(const Path& cache_file, const Path& ca
     return Status::OK();
 }
 
-Status FileCache::_remove_file(const Path& cache_file, const Path& cache_done_file,
-                               size_t* cleaned_size) {
-    bool done_file_exist = false;
-    RETURN_NOT_OK_STATUS_WITH_WARN(
-            io::global_local_filesystem()->exists(cache_done_file, &done_file_exist),
-            "Check local done file exist failed.");
-    if (done_file_exist) {
-        RETURN_NOT_OK_STATUS_WITH_WARN(
-                io::global_local_filesystem()->delete_file(cache_done_file),
-                fmt::format("Delete local done file failed: {}", cache_done_file.native()));
-    }
+Status FileCache::_remove_file(const Path& file, size_t* cleaned_size) {
     bool cache_file_exist = false;
-    RETURN_NOT_OK_STATUS_WITH_WARN(
-            io::global_local_filesystem()->exists(cache_file, &cache_file_exist),
-            "Check local cache file exist failed.");
+    RETURN_NOT_OK_STATUS_WITH_WARN(io::global_local_filesystem()->exists(file, &cache_file_exist),
+                                   "Check local cache file exist failed.");
     if (cache_file_exist) {
         if (cleaned_size) {
             RETURN_NOT_OK_STATUS_WITH_WARN(
-                    io::global_local_filesystem()->file_size(cache_file, cleaned_size),
-                    fmt::format("get local cache file size failed: {}", cache_file.native()));
+                    io::global_local_filesystem()->file_size(file, cleaned_size),
+                    fmt::format("get local cache file size failed: {}", file.native()));
         }
         RETURN_NOT_OK_STATUS_WITH_WARN(
-                io::global_local_filesystem()->delete_file(cache_file),
-                fmt::format("Delete local cache file failed: {}", cache_file.native()));
+                io::global_local_filesystem()->delete_file(file),
+                fmt::format("Delete local cache file failed: {}", file.native()));
+    }
+    LOG(INFO) << "Delete local cache file successfully: " << file.native();
+    return Status::OK();
+}
+
+Status FileCache::_remove_cache_and_done(const Path& cache_file, const Path& cache_done_file,
+                                         size_t* cleaned_size) {
+    RETURN_IF_ERROR(_remove_file(cache_done_file, nullptr));
+    RETURN_IF_ERROR(_remove_file(cache_file, cleaned_size));
+    return Status::OK();
+}
+
+Status FileCache::_get_dir_file(const Path& cache_dir, std::vector<Path>& cache_names,
+                                std::vector<Path>& unfinished_files) {
+    // list all files
+    std::vector<Path> cache_file_names;
+    RETURN_NOT_OK_STATUS_WITH_WARN(
+            io::global_local_filesystem()->list(cache_dir, &cache_file_names),
+            fmt::format("List dir failed: {}", cache_dir.native()))
+
+    // separate DATA file and DONE file
+    std::set<Path> cache_names_temp;
+    std::list<Path> done_names_temp;
+    for (auto& cache_file_name : cache_file_names) {
+        if (ends_with(cache_file_name.native(), CACHE_DONE_FILE_SUFFIX)) {
+            done_names_temp.push_back(std::move(cache_file_name));
+        } else {
+            cache_names_temp.insert(std::move(cache_file_name));
+        }
+    }
+
+    // match DONE file with DATA file
+    for (auto done_file : done_names_temp) {
+        Path cache_filename = StringReplace(done_file.native(), CACHE_DONE_FILE_SUFFIX, "", true);
+        if (auto cache_iter = cache_names_temp.find(cache_filename);
+            cache_iter != cache_names_temp.end()) {
+            cache_names_temp.erase(cache_iter);
+            cache_names.push_back(std::move(cache_filename));
+        } else {
+            // not data file, but with DONE file
+            unfinished_files.push_back(std::move(done_file));
+        }
+    }
+    // data file without DONE file
+    for (auto& file : cache_names_temp) {

Review Comment:
   Downloading a file with a write lock, while `_get_dir_file` requires a read lock, I don't think this will happen as described above.



##########
be/src/io/cache/file_cache.cpp:
##########
@@ -87,32 +89,87 @@ Status FileCache::download_cache_to_local(const Path& cache_file, const Path& ca
     return Status::OK();
 }
 
-Status FileCache::_remove_file(const Path& cache_file, const Path& cache_done_file,
-                               size_t* cleaned_size) {
-    bool done_file_exist = false;
-    RETURN_NOT_OK_STATUS_WITH_WARN(
-            io::global_local_filesystem()->exists(cache_done_file, &done_file_exist),
-            "Check local done file exist failed.");
-    if (done_file_exist) {
-        RETURN_NOT_OK_STATUS_WITH_WARN(
-                io::global_local_filesystem()->delete_file(cache_done_file),
-                fmt::format("Delete local done file failed: {}", cache_done_file.native()));
-    }
+Status FileCache::_remove_file(const Path& file, size_t* cleaned_size) {
     bool cache_file_exist = false;
-    RETURN_NOT_OK_STATUS_WITH_WARN(
-            io::global_local_filesystem()->exists(cache_file, &cache_file_exist),
-            "Check local cache file exist failed.");
+    RETURN_NOT_OK_STATUS_WITH_WARN(io::global_local_filesystem()->exists(file, &cache_file_exist),
+                                   "Check local cache file exist failed.");
     if (cache_file_exist) {
         if (cleaned_size) {
             RETURN_NOT_OK_STATUS_WITH_WARN(
-                    io::global_local_filesystem()->file_size(cache_file, cleaned_size),
-                    fmt::format("get local cache file size failed: {}", cache_file.native()));
+                    io::global_local_filesystem()->file_size(file, cleaned_size),
+                    fmt::format("get local cache file size failed: {}", file.native()));
         }
         RETURN_NOT_OK_STATUS_WITH_WARN(
-                io::global_local_filesystem()->delete_file(cache_file),
-                fmt::format("Delete local cache file failed: {}", cache_file.native()));
+                io::global_local_filesystem()->delete_file(file),
+                fmt::format("Delete local cache file failed: {}", file.native()));
+    }
+    LOG(INFO) << "Delete local cache file successfully: " << file.native();
+    return Status::OK();
+}
+
+Status FileCache::_remove_cache_and_done(const Path& cache_file, const Path& cache_done_file,
+                                         size_t* cleaned_size) {
+    RETURN_IF_ERROR(_remove_file(cache_done_file, nullptr));
+    RETURN_IF_ERROR(_remove_file(cache_file, cleaned_size));
+    return Status::OK();
+}
+
+Status FileCache::_get_dir_file(const Path& cache_dir, std::vector<Path>& cache_names,

Review Comment:
   What is the reason for using pointers instead of references, please? It seems to me that references are better because you need to check if a pointer is a null pointer, while references are not.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org