You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/01/20 21:11:12 UTC

[2/2] kudu git commit: env_util: Factor out helper DeleteExcessFilesByPattern()

env_util: Factor out helper DeleteExcessFilesByPattern()

The logic contained in here is generally useful for log rotation
purposes, and we can reuse it for minidump rotation in a later patch.

Change-Id: I6e76911b0a68f7c1397d93eb027b6a5c99e2fbed
Reviewed-on: http://gerrit.cloudera.org:8080/5740
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/fb3bbbc8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/fb3bbbc8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/fb3bbbc8

Branch: refs/heads/master
Commit: fb3bbbc8ce8527a63144b268cbad4aee92da4c1f
Parents: 6985a54
Author: Mike Percy <mp...@apache.org>
Authored: Wed Jan 18 17:49:05 2017 -0800
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Fri Jan 20 21:10:47 2017 +0000

----------------------------------------------------------------------
 src/kudu/util/env_util-test.cc | 44 ++++++++++++++++++++++++++++++++++---
 src/kudu/util/env_util.cc      | 31 ++++++++++++++++++++++++++
 src/kudu/util/env_util.h       |  7 ++++++
 src/kudu/util/logging.cc       | 25 ++-------------------
 4 files changed, 81 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util-test.cc b/src/kudu/util/env_util-test.cc
index 90b308d..7c79068 100644
--- a/src/kudu/util/env_util-test.cc
+++ b/src/kudu/util/env_util-test.cc
@@ -15,15 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <sys/statvfs.h>
+#include <sys/time.h>
 #include <unistd.h>
 
-#include "kudu/util/env_util.h"
+#include <memory>
+#include <unordered_set>
 
 #include <gflags/gflags.h>
-#include <memory>
-#include <sys/statvfs.h>
+#include <glog/stl_logging.h>
 
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/walltime.h"
+#include "kudu/util/env_util.h"
 #include "kudu/util/path_util.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
@@ -33,6 +38,7 @@ DECLARE_int64(disk_reserved_bytes_free_for_testing);
 
 using std::string;
 using std::unique_ptr;
+using std::unordered_set;
 using strings::Substitute;
 
 namespace kudu {
@@ -108,5 +114,37 @@ TEST_F(EnvUtilTest, TestCreateDirsRecursively) {
   ASSERT_TRUE(is_dir);
 }
 
+// Ensure that DeleteExcessFilesByPattern() works.
+// We ensure that the number of files remaining after running it is the number
+// expected, and we manually set the modification times on the relevant files
+// to allow us to test that files are deleted oldest-first.
+TEST_F(EnvUtilTest, TestDeleteExcessFilesByPattern) {
+  string dir = JoinPathSegments(test_dir_, "excess");
+  ASSERT_OK(env_->CreateDir(dir));
+  vector<string> filenames = {"a", "b", "c", "d"};
+  int now_sec = GetCurrentTimeMicros() / 1000;
+  for (int i = 0; i < filenames.size(); i++) {
+    const string& filename = filenames[i];
+    string path = JoinPathSegments(dir, filename);
+    unique_ptr<WritableFile> file;
+    ASSERT_OK(env_->NewWritableFile(path, &file));
+    ASSERT_OK(file->Close());
+
+    // Set the last-modified time of the file.
+    struct timeval target_time { .tv_sec = now_sec + (i * 2), .tv_usec = 0 };
+    struct timeval times[2] = { target_time, target_time };
+    ASSERT_EQ(0, utimes(path.c_str(), times)) << errno;
+  }
+  vector<string> children;
+  ASSERT_OK(env_->GetChildren(dir, &children));
+  ASSERT_EQ(6, children.size()); // 4 files plus "." and "..".
+  ASSERT_OK(DeleteExcessFilesByPattern(env_, dir + "/*", 2));
+  ASSERT_OK(env_->GetChildren(dir, &children));
+  ASSERT_EQ(4, children.size()); // 2 files plus "." and "..".
+  unordered_set<string> children_set(children.begin(), children.end());
+  unordered_set<string> expected_set({".", "..", "c", "d"});
+  ASSERT_EQ(expected_set, children_set) << children;
+}
+
 } // namespace env_util
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.cc b/src/kudu/util/env_util.cc
index e398831..178d391 100644
--- a/src/kudu/util/env_util.cc
+++ b/src/kudu/util/env_util.cc
@@ -236,6 +236,37 @@ Status CopyFile(Env* env, const string& source_path, const string& dest_path,
   return Status::OK();
 }
 
+Status DeleteExcessFilesByPattern(Env* env, const string& pattern, int max_matches) {
+  // Negative numbers don't make sense for our interface.
+  DCHECK_GE(max_matches, 0);
+
+  vector<string> matching_files;
+  RETURN_NOT_OK(env->Glob(pattern, &matching_files));
+
+  if (matching_files.size() <= max_matches) {
+    return Status::OK();
+  }
+
+  vector<pair<time_t, string>> matching_file_mtimes;
+  for (string& matching_file_path : matching_files) {
+    int64_t mtime;
+    RETURN_NOT_OK(env->GetFileModifiedTime(matching_file_path, &mtime));
+    matching_file_mtimes.emplace_back(mtime, std::move(matching_file_path));
+  }
+
+  // Use mtime to determine which matching files to delete. This could
+  // potentially be ambiguous, depending on the resolution of last-modified
+  // timestamp in the filesystem, but that is part of the contract.
+  std::sort(matching_file_mtimes.begin(), matching_file_mtimes.end());
+  matching_file_mtimes.resize(matching_file_mtimes.size() - max_matches);
+
+  for (const auto& matching_file : matching_file_mtimes) {
+    RETURN_NOT_OK(env->DeleteFile(matching_file.second));
+  }
+
+  return Status::OK();
+}
+
 ScopedFileDeleter::ScopedFileDeleter(Env* env, std::string path)
     : env_(DCHECK_NOTNULL(env)), path_(std::move(path)), should_delete_(true) {}
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.h b/src/kudu/util/env_util.h
index c54bfa8..884b17c 100644
--- a/src/kudu/util/env_util.h
+++ b/src/kudu/util/env_util.h
@@ -81,6 +81,13 @@ Status CreateDirsRecursively(Env* env, const std::string& path);
 Status CopyFile(Env* env, const std::string& source_path, const std::string& dest_path,
                 WritableFileOptions opts);
 
+// Deletes files matching 'pattern' in excess of 'max_matches' files.
+// 'max_matches' must be greater than or equal to 0.
+// The oldest files are deleted first, as determined by last modified time.
+// In the case that multiple files have the same last modified time, it is not
+// defined which file will be deleted first.
+Status DeleteExcessFilesByPattern(Env* env, const std::string& pattern, int max_matches);
+
 // Deletes a file or directory when this object goes out of scope.
 //
 // The deletion may be cancelled by calling .Cancel().

http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/logging.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.cc b/src/kudu/util/logging.cc
index 4438380..8332349 100644
--- a/src/kudu/util/logging.cc
+++ b/src/kudu/util/logging.cc
@@ -38,6 +38,7 @@
 #include "kudu/util/debug-util.h"
 #include "kudu/util/debug/leakcheck_disabler.h"
 #include "kudu/util/env.h"
+#include "kudu/util/env_util.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/status.h"
 
@@ -359,23 +360,10 @@ Status DeleteExcessLogFiles(Env* env) {
   if (max_log_files <= 0) return Status::OK();
 
   for (int severity = 0; severity < google::NUM_SEVERITIES; ++severity) {
-    vector<string> logfiles;
     // Build glob pattern for input
     // e.g. /var/log/kudu/kudu-master.*.INFO.*
     string pattern = strings::Substitute("$0/$1.*.$2.*", FLAGS_log_dir, FLAGS_log_filename,
                                          google::GetLogSeverityName(severity));
-    RETURN_NOT_OK(env->Glob(pattern, &logfiles));
-
-    if (logfiles.size() <= max_log_files) {
-      continue;
-    }
-
-    vector<pair<time_t, string>> logfile_mtimes;
-    for (string& logfile : logfiles) {
-      int64_t mtime;
-      RETURN_NOT_OK(env->GetFileModifiedTime(logfile, &mtime));
-      logfile_mtimes.emplace_back(mtime, std::move(logfile));
-    }
 
     // Keep the 'max_log_files' most recent log files, as compared by
     // modification time. Glog files contain a second-granularity timestamp in
@@ -383,16 +371,7 @@ Status DeleteExcessLogFiles(Env* env) {
     // guaranteed by glob, however this code has been adapted from Impala which
     // uses mtime to determine which files to delete, and there haven't been any
     // issues in production settings.
-    std::sort(logfile_mtimes.begin(), logfile_mtimes.end());
-    logfile_mtimes.resize(logfile_mtimes.size() - max_log_files);
-
-    VLOG(2) << "Deleting " << logfile_mtimes.size()
-            << " excess glog files at " << google::GetLogSeverityName(severity)
-            << " severity";
-
-    for (const auto& logfile: logfile_mtimes) {
-      RETURN_NOT_OK(env->DeleteFile(logfile.second));
-    }
+    RETURN_NOT_OK(env_util::DeleteExcessFilesByPattern(env, pattern, max_log_files));
   }
   return Status::OK();
 }