You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2016/12/06 04:46:02 UTC

kudu git commit: KUDU-766: limit number of glog files

Repository: kudu
Updated Branches:
  refs/heads/master e98e4b6d8 -> 35d1b0142


KUDU-766: limit number of glog files

This commit introduces a new flag, 'max_log_files', that limits the
maximum number of rolled glog files to keep around at each severity
level. This flag matches an Apache Impala flag of the same name, and the
implementation is partly borrowed from it.

I initially went the route of making a new maintenance manager operation
type to schedule the file cleanup, but it proved difficult to shoehorn
this usecase into that abstraction.  Instead, a thead is spawned on
startup that checks for excess files every 60 seconds.

Change-Id: Ia3f81638a6dfcf134665322f091ff26d29640fd9
Reviewed-on: http://gerrit.cloudera.org:8080/5340
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <da...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/35d1b014
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/35d1b014
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/35d1b014

Branch: refs/heads/master
Commit: 35d1b014251bb541f9cb8f87ea27098e4b89c79f
Parents: e98e4b6
Author: Dan Burkert <da...@apache.org>
Authored: Fri Dec 2 11:42:35 2016 -0800
Committer: Dan Burkert <da...@apache.org>
Committed: Tue Dec 6 04:45:42 2016 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/CMakeLists.txt       |   1 +
 .../integration-tests/external_mini_cluster.cc  | 111 +++++++++++++------
 .../integration-tests/external_mini_cluster.h   |  54 ++++++---
 src/kudu/integration-tests/log-rolling-itest.cc |  61 ++++++++++
 src/kudu/server/server_base.cc                  |  37 ++++++-
 src/kudu/server/server_base.h                   |   6 +-
 src/kudu/util/logging.cc                        |  72 ++++++++++--
 src/kudu/util/logging.h                         |  12 +-
 src/kudu/util/metrics.h                         |   2 +-
 9 files changed, 291 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt b/src/kudu/integration-tests/CMakeLists.txt
index 811ec36..113edf9 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -58,6 +58,7 @@ ADD_KUDU_TEST(exactly_once_writes-itest)
 ADD_KUDU_TEST(external_mini_cluster-test RESOURCE_LOCK "master-rpc-ports")
 ADD_KUDU_TEST(fuzz-itest)
 ADD_KUDU_TEST(linked_list-test RESOURCE_LOCK "master-rpc-ports")
+ADD_KUDU_TEST(log-rolling-itest)
 ADD_KUDU_TEST(master_failover-itest RESOURCE_LOCK "master-rpc-ports")
 ADD_KUDU_TEST(master_migration-itest RESOURCE_LOCK "master-rpc-ports")
 ADD_KUDU_TEST_DEPENDENCIES(master_migration-itest

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/integration-tests/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.cc b/src/kudu/integration-tests/external_mini_cluster.cc
index 9bf09b3..7665ca7 100644
--- a/src/kudu/integration-tests/external_mini_cluster.cc
+++ b/src/kudu/integration-tests/external_mini_cluster.cc
@@ -85,7 +85,8 @@ ExternalMiniClusterOptions::ExternalMiniClusterOptions()
     : num_masters(1),
       num_tablet_servers(1),
       bind_to_unique_loopback_addresses(kBindToUniqueLoopbackAddress),
-      enable_kerberos(false) {
+      enable_kerberos(false),
+      logtostderr(true) {
 }
 
 ExternalMiniClusterOptions::~ExternalMiniClusterOptions() {
@@ -227,6 +228,13 @@ string ExternalMiniCluster::GetDataPath(const string& daemon_id) const {
   return JoinPathSegments(data_root_, daemon_id);
 }
 
+boost::optional<string> ExternalMiniCluster::GetLogPath(const string& daemon_id) const {
+  if (opts_.logtostderr) {
+    return boost::none;
+  }
+  return GetDataPath(daemon_id) + "-logs";
+}
+
 namespace {
 vector<string> SubstituteInFlags(const vector<string>& orig_flags,
                                  int index) {
@@ -241,12 +249,13 @@ vector<string> SubstituteInFlags(const vector<string>& orig_flags,
 } // anonymous namespace
 
 Status ExternalMiniCluster::StartSingleMaster() {
-  string exe = GetBinaryPath(kMasterBinaryName);
-  vector<string> flags = SubstituteInFlags(opts_.extra_master_flags, 0);
-
+  string daemon_id = "master-0";
   scoped_refptr<ExternalMaster> master =
-      new ExternalMaster(messenger_, exe, GetDataPath("master-0"), flags);
-
+      new ExternalMaster(messenger_,
+                        GetBinaryPath(kMasterBinaryName),
+                        GetDataPath(daemon_id),
+                        GetLogPath(daemon_id),
+                        SubstituteInFlags(opts_.extra_master_flags, 0));
   if (opts_.enable_kerberos) {
     RETURN_NOT_OK_PREPEND(master->EnableKerberos(kdc_.get(), "127.0.0.1"),
                           "could not enable Kerberos");
@@ -276,10 +285,12 @@ Status ExternalMiniCluster::StartDistributedMasters() {
 
   // Start the masters.
   for (int i = 0; i < num_masters; i++) {
+    string daemon_id = Substitute("master-$0", i);
     scoped_refptr<ExternalMaster> peer =
         new ExternalMaster(messenger_,
                            exe,
-                           GetDataPath(Substitute("master-$0", i)),
+                           GetDataPath(daemon_id),
+                           GetLogPath(daemon_id),
                            peer_addrs[i],
                            SubstituteInFlags(flags, i));
     if (opts_.enable_kerberos) {
@@ -309,18 +320,21 @@ Status ExternalMiniCluster::AddTabletServer() {
       << "Must have started at least 1 master before adding tablet servers";
 
   int idx = tablet_servers_.size();
+  string daemon_id = Substitute("ts-$0", idx);
 
-  string exe = GetBinaryPath(kTabletServerBinaryName);
   vector<HostPort> master_hostports;
   for (int i = 0; i < num_masters(); i++) {
     master_hostports.push_back(DCHECK_NOTNULL(master(i))->bound_rpc_hostport());
   }
   string bind_host = GetBindIpForTabletServer(idx);
   scoped_refptr<ExternalTabletServer> ts =
-    new ExternalTabletServer(messenger_, exe, GetDataPath(Substitute("ts-$0", idx)),
-                             bind_host,
-                             master_hostports,
-                             SubstituteInFlags(opts_.extra_tserver_flags, idx));
+      new ExternalTabletServer(messenger_,
+                               GetBinaryPath(kTabletServerBinaryName),
+                               GetDataPath(daemon_id),
+                               GetLogPath(daemon_id),
+                               bind_host,
+                               master_hostports,
+                               SubstituteInFlags(opts_.extra_tserver_flags, idx));
   if (opts_.enable_kerberos) {
     RETURN_NOT_OK_PREPEND(ts->EnableKerberos(kdc_.get(), bind_host),
                           "could not enable Kerberos");
@@ -567,10 +581,13 @@ Status ExternalMiniCluster::SetFlag(ExternalDaemon* daemon,
 //------------------------------------------------------------
 
 ExternalDaemon::ExternalDaemon(std::shared_ptr<rpc::Messenger> messenger,
-                               string exe, string data_dir,
+                               string exe,
+                               string data_dir,
+                               boost::optional<string> log_dir,
                                vector<string> extra_flags)
     : messenger_(std::move(messenger)),
       data_dir_(std::move(data_dir)),
+      log_dir_(std::move(log_dir)),
       exe_(std::move(exe)),
       extra_flags_(std::move(extra_flags)) {}
 
@@ -600,10 +617,22 @@ Status ExternalDaemon::StartProcess(const vector<string>& user_flags) {
   argv.insert(argv.end(), user_flags.begin(), user_flags.end());
 
   // Enable metrics logging.
-  // Even though we set -logtostderr down below, metrics logs end up being written
-  // based on -log_dir. So, we have to set that too.
   argv.push_back("--metrics_log_interval_ms=1000");
-  argv.push_back("--log_dir=" + data_dir_);
+
+  if (log_dir_) {
+    argv.push_back("--log_dir=" + *log_dir_);
+    Env* env = Env::Default();
+    if (!env->FileExists(*log_dir_)) {
+      RETURN_NOT_OK(env->CreateDir(*log_dir_));
+    }
+  } else {
+    // Ensure that logging goes to the test output and doesn't get buffered.
+    argv.push_back("--logtostderr");
+    argv.push_back("--logbuflevel=-1");
+    // Even though we are logging to stderr, metrics logs end up being written
+    // based on -log_dir. So, we have to set that too.
+    argv.push_back("--log_dir=" + data_dir_);
+  }
 
   // Then the "extra flags" passed into the ctor (from the ExternalMiniCluster
   // options struct). These come at the end so they can override things like
@@ -623,10 +652,6 @@ Status ExternalDaemon::StartProcess(const vector<string>& user_flags) {
   // the previous info file if it's there.
   ignore_result(Env::Default()->DeleteFile(info_path));
 
-  // Ensure that logging goes to the test output and doesn't get buffered.
-  argv.push_back("--logtostderr");
-  argv.push_back("--logbuflevel=-1");
-
   // Allow unsafe and experimental flags from tests, since we often use
   // fault injection, etc.
   argv.push_back("--unlock_experimental_flags");
@@ -912,19 +937,30 @@ ScopedResumeExternalDaemon::~ScopedResumeExternalDaemon() {
 // ExternalMaster
 //------------------------------------------------------------
 
-ExternalMaster::ExternalMaster(const std::shared_ptr<rpc::Messenger>& messenger,
-                               const string& exe,
-                               const string& data_dir,
-                               const vector<string>& extra_flags)
-    : ExternalDaemon(messenger, exe, data_dir, extra_flags),
+ExternalMaster::ExternalMaster(std::shared_ptr<rpc::Messenger> messenger,
+                               string exe,
+                               string data_dir,
+                               boost::optional<string> log_dir,
+                               vector<string> extra_flags)
+    : ExternalDaemon(std::move(messenger),
+                     std::move(exe),
+                     std::move(data_dir),
+                     std::move(log_dir),
+                     std::move(extra_flags)),
       rpc_bind_address_("127.0.0.1:0") {
 }
 
-ExternalMaster::ExternalMaster(const std::shared_ptr<rpc::Messenger>& messenger,
-                               const string& exe, const string& data_dir,
+ExternalMaster::ExternalMaster(std::shared_ptr<rpc::Messenger> messenger,
+                               string exe,
+                               string data_dir,
+                               boost::optional<string> log_dir,
                                string rpc_bind_address,
-                               const std::vector<string>& extra_flags)
-    : ExternalDaemon(messenger, exe, data_dir, extra_flags),
+                               std::vector<string> extra_flags)
+    : ExternalDaemon(std::move(messenger),
+                     std::move(exe),
+                     std::move(data_dir),
+                     std::move(log_dir),
+                     std::move(extra_flags)),
       rpc_bind_address_(std::move(rpc_bind_address)) {}
 
 ExternalMaster::~ExternalMaster() {
@@ -1003,11 +1039,18 @@ Status ExternalMaster::WaitForCatalogManager() {
 // ExternalTabletServer
 //------------------------------------------------------------
 
-ExternalTabletServer::ExternalTabletServer(
-    const std::shared_ptr<rpc::Messenger>& messenger, const string& exe,
-    const string& data_dir, string bind_host,
-    const vector<HostPort>& master_addrs, const vector<string>& extra_flags)
-    : ExternalDaemon(messenger, exe, data_dir, extra_flags),
+ExternalTabletServer::ExternalTabletServer(std::shared_ptr<rpc::Messenger> messenger,
+                                           string exe,
+                                           string data_dir,
+                                           boost::optional<string> log_dir,
+                                           string bind_host,
+                                           vector<HostPort> master_addrs,
+                                           vector<string> extra_flags)
+    : ExternalDaemon(std::move(messenger),
+                     std::move(exe),
+                     std::move(data_dir),
+                     std::move(log_dir),
+                     std::move(extra_flags)),
       master_addrs_(HostPort::ToCommaSeparatedString(master_addrs)),
       bind_host_(std::move(bind_host)) {}
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/integration-tests/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.h b/src/kudu/integration-tests/external_mini_cluster.h
index d7b0fe2..e3ed95c 100644
--- a/src/kudu/integration-tests/external_mini_cluster.h
+++ b/src/kudu/integration-tests/external_mini_cluster.h
@@ -24,6 +24,8 @@
 #include <sys/types.h>
 #include <vector>
 
+#include <boost/optional.hpp>
+
 #include "kudu/client/client.h"
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/macros.h"
@@ -122,6 +124,10 @@ struct ExternalMiniClusterOptions {
   // test process will be modified to include Kerberos credentials for
   // a principal named 'testuser'.
   bool enable_kerberos;
+
+  // If true, sends logging output to stderr instead of a log file. Defaults to
+  // true.
+  bool logtostderr;
 };
 
 // A mini-cluster made up of subprocesses running each of the daemons
@@ -279,6 +285,12 @@ class ExternalMiniCluster : public MiniClusterBase {
   // standard Kudu test directory otherwise.
   std::string GetDataPath(const std::string& daemon_id) const;
 
+  // Returns the path where 'daemon_id' is expected to store its logs, or none
+  // if it will log to stderr. Based on ExternalMiniClusterOptions.logtostderr
+  // and ExternalMiniClusterOptions.data_root, or on the standard Kudu test
+  // directory otherwise.
+  boost::optional<std::string> GetLogPath(const std::string& daemon_id) const;
+
  private:
   FRIEND_TEST(MasterFailoverTest, TestKillAnyMaster);
 
@@ -307,8 +319,11 @@ class ExternalMiniCluster : public MiniClusterBase {
 
 class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
  public:
-  ExternalDaemon(std::shared_ptr<rpc::Messenger> messenger, std::string exe,
-                 std::string data_dir, std::vector<std::string> extra_flags);
+  ExternalDaemon(std::shared_ptr<rpc::Messenger> messenger,
+                 std::string exe,
+                 std::string data_dir,
+                 boost::optional<std::string> log_dir,
+                 std::vector<std::string> extra_flags);
 
   HostPort bound_rpc_hostport() const;
   Sockaddr bound_rpc_addr() const;
@@ -366,6 +381,12 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
 
   const std::string& data_dir() const { return data_dir_; }
 
+  // Returns the log dir of the external daemon, or none if the daemon is
+  // configured to log to stderr.
+  const boost::optional<std::string>& log_dir() const {
+    return log_dir_;
+  }
+
   // Return a pointer to the flags used for this server on restart.
   // Modifying these flags will only take effect on the next restart.
   std::vector<std::string>* mutable_flags() { return &extra_flags_; }
@@ -408,6 +429,7 @@ class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
 
   const std::shared_ptr<rpc::Messenger> messenger_;
   const std::string data_dir_;
+  const boost::optional<std::string> log_dir_;
   std::string exe_;
   std::vector<std::string> extra_flags_;
   std::map<std::string, std::string> extra_env_;
@@ -444,14 +466,18 @@ class ScopedResumeExternalDaemon {
 
 class ExternalMaster : public ExternalDaemon {
  public:
-  ExternalMaster(const std::shared_ptr<rpc::Messenger>& messenger,
-                 const std::string& exe, const std::string& data_dir,
-                 const std::vector<std::string>& extra_flags);
-
-  ExternalMaster(const std::shared_ptr<rpc::Messenger>& messenger,
-                 const std::string& exe, const std::string& data_dir,
+  ExternalMaster(std::shared_ptr<rpc::Messenger> messenger,
+                 std::string exe,
+                 std::string data_dir,
+                 boost::optional<std::string> log_dir,
+                 std::vector<std::string> extra_flags);
+
+  ExternalMaster(std::shared_ptr<rpc::Messenger> messenger,
+                 std::string exe,
+                 std::string data_dir,
+                 boost::optional<std::string> log_dir,
                  std::string rpc_bind_address,
-                 const std::vector<std::string>& extra_flags);
+                 std::vector<std::string> extra_flags);
 
   Status Start();
 
@@ -472,11 +498,13 @@ class ExternalMaster : public ExternalDaemon {
 
 class ExternalTabletServer : public ExternalDaemon {
  public:
-  ExternalTabletServer(const std::shared_ptr<rpc::Messenger>& messenger,
-                       const std::string& exe, const std::string& data_dir,
+  ExternalTabletServer(std::shared_ptr<rpc::Messenger> messenger,
+                       std::string exe,
+                       std::string data_dir,
+                       boost::optional<std::string> log_dir,
                        std::string bind_host,
-                       const std::vector<HostPort>& master_addrs,
-                       const std::vector<std::string>& extra_flags);
+                       std::vector<HostPort> master_addrs,
+                       std::vector<std::string> extra_flags);
 
   Status Start();
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/integration-tests/log-rolling-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/log-rolling-itest.cc b/src/kudu/integration-tests/log-rolling-itest.cc
new file mode 100644
index 0000000..4b6172f
--- /dev/null
+++ b/src/kudu/integration-tests/log-rolling-itest.cc
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Tests that log rolling and excess logfile cleanup logic works correctly.
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/integration-tests/external_mini_cluster.h"
+#include "kudu/util/env.h"
+#include "kudu/util/test_util.h"
+
+using std::string;
+using std::vector;
+using strings::Substitute;
+
+namespace kudu {
+
+static int64_t CountInfoLogs(const string& log_dir) {
+    vector<string> logfiles;
+    string pattern = Substitute("$0/*.$1.*", log_dir, "INFO");
+    CHECK_OK(Env::Default()->Glob(pattern, &logfiles));
+    return logfiles.size();
+}
+
+// Tests that logs roll on startup, and get cleaned up appropriately.
+TEST(LogRollingITest, TestLogCleanupOnStartup) {
+  ExternalMiniClusterOptions opts;
+  opts.num_masters = 1;
+  opts.num_tablet_servers = 0;
+  opts.extra_master_flags = { "--max_log_files=3", };
+  opts.logtostderr = false;
+  opts.bind_to_unique_loopback_addresses = true;
+  ExternalMiniCluster cluster(opts);
+  ASSERT_OK(cluster.Start());
+
+  for (int i = 1; i <= 10; i++) {
+    ASSERT_EQ(std::min(3, i), CountInfoLogs(*cluster.master()->log_dir()));
+    cluster.master()->Shutdown();
+    ASSERT_OK(cluster.master()->Restart());
+  }
+}
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/server/server_base.cc
----------------------------------------------------------------------
diff --git a/src/kudu/server/server_base.cc b/src/kudu/server/server_base.cc
index 463fc59..1508c1f 100644
--- a/src/kudu/server/server_base.cc
+++ b/src/kudu/server/server_base.cc
@@ -37,16 +37,17 @@
 #include "kudu/server/hybrid_clock.h"
 #include "kudu/server/logical_clock.h"
 #include "kudu/server/rpc_server.h"
-#include "kudu/server/tcmalloc_metrics.h"
-#include "kudu/server/webserver.h"
 #include "kudu/server/rpcz-path-handler.h"
-#include "kudu/server/server_base_options.h"
 #include "kudu/server/server_base.pb.h"
+#include "kudu/server/server_base_options.h"
+#include "kudu/server/tcmalloc_metrics.h"
 #include "kudu/server/tracing-path-handlers.h"
+#include "kudu/server/webserver.h"
 #include "kudu/util/atomic.h"
 #include "kudu/util/env.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/jsonwriter.h"
+#include "kudu/util/logging.h"
 #include "kudu/util/mem_tracker.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/monotime.h"
@@ -106,7 +107,7 @@ ServerBase::ServerBase(string name, const ServerBaseOptions& options,
           MemTracker::CreateTracker(-1, "result-tracker", mem_tracker_)))),
       is_first_run_(false),
       options_(options),
-      stop_metrics_logging_latch_(1) {
+      stop_background_threads_latch_(1) {
   FsManagerOpts fs_opts;
   fs_opts.metric_entity = metric_entity_;
   fs_opts.parent_mem_tracker = mem_tracker_;
@@ -198,6 +199,7 @@ Status ServerBase::Init() {
   RETURN_NOT_OK_PREPEND(StartMetricsLogging(), "Could not enable metrics logging");
 
   result_tracker_->StartGCThread();
+  RETURN_NOT_OK(StartExcessGlogDeleterThread());
 
   return Status::OK();
 }
@@ -273,7 +275,7 @@ void ServerBase::MetricsLoggingThread() {
 
 
   MonoTime next_log = MonoTime::Now();
-  while (!stop_metrics_logging_latch_.WaitUntil(next_log)) {
+  while (!stop_background_threads_latch_.WaitUntil(next_log)) {
     next_log = MonoTime::Now() +
         MonoDelta::FromMilliseconds(options_.metrics_log_interval_ms);
 
@@ -307,6 +309,26 @@ void ServerBase::MetricsLoggingThread() {
   WARN_NOT_OK(log.Close(), "Unable to close metric log");
 }
 
+Status ServerBase::StartExcessGlogDeleterThread() {
+  if (FLAGS_logtostderr) {
+    return Status::OK();
+  }
+  // Try synchronously deleting excess log files once at startup to make sure it
+  // works, then start a background thread to continue deleting them in the
+  // future.
+  RETURN_NOT_OK_PREPEND(DeleteExcessLogFiles(options_.env), "Unable to delete excess log files");
+  return Thread::Create("server", "excess-glog-deleter", &ServerBase::ExcessGlogDeleterThread,
+                        this, &excess_glog_deleter_thread_);
+}
+
+void ServerBase::ExcessGlogDeleterThread() {
+  // How often to attempt to clean up excess glog files.
+  const MonoDelta kWait = MonoDelta::FromSeconds(60);
+  while (!stop_background_threads_latch_.WaitUntil(MonoTime::Now() + kWait)) {
+    WARN_NOT_OK(DeleteExcessLogFiles(options_.env), "Unable to delete excess log files");
+  }
+}
+
 std::string ServerBase::FooterHtml() const {
   return Substitute("<pre>$0\nserver uuid $1</pre>",
                     VersionInfo::GetShortVersionString(),
@@ -337,10 +359,13 @@ Status ServerBase::Start() {
 }
 
 void ServerBase::Shutdown() {
+  stop_background_threads_latch_.CountDown();
   if (metrics_logging_thread_) {
-    stop_metrics_logging_latch_.CountDown();
     metrics_logging_thread_->Join();
   }
+  if (excess_glog_deleter_thread_) {
+    excess_glog_deleter_thread_->Join();
+  }
   web_server_->Stop();
   rpc_server_->Shutdown();
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/server/server_base.h
----------------------------------------------------------------------
diff --git a/src/kudu/server/server_base.h b/src/kudu/server/server_base.h
index 7a5e435..2ad6978 100644
--- a/src/kudu/server/server_base.h
+++ b/src/kudu/server/server_base.h
@@ -124,10 +124,14 @@ class ServerBase {
   void MetricsLoggingThread();
   std::string FooterHtml() const;
 
+  Status StartExcessGlogDeleterThread();
+  void ExcessGlogDeleterThread();
+
   ServerBaseOptions options_;
 
   scoped_refptr<Thread> metrics_logging_thread_;
-  CountDownLatch stop_metrics_logging_latch_;
+  scoped_refptr<Thread> excess_glog_deleter_thread_;
+  CountDownLatch stop_background_threads_latch_;
 
   gscoped_ptr<ScopedGLogMetrics> glog_metrics_;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/util/logging.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.cc b/src/kudu/util/logging.cc
index d89988c..1cc0dda 100644
--- a/src/kudu/util/logging.cc
+++ b/src/kudu/util/logging.cc
@@ -16,22 +16,30 @@
 // under the License.
 #include "kudu/util/logging.h"
 
-#include <boost/uuid/uuid.hpp>
-#include <boost/uuid/uuid_io.hpp>
-#include <boost/uuid/uuid_generators.hpp>
-#include <mutex>
-#include <sstream>
 #include <stdio.h>
-#include <iostream>
+
+#include <algorithm>
 #include <fstream>
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+#include <boost/uuid/uuid.hpp>
+#include <boost/uuid/uuid_generators.hpp>
+#include <boost/uuid/uuid_io.hpp>
 #include <glog/logging.h>
 
 #include "kudu/gutil/callback.h"
 #include "kudu/gutil/spinlock.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/async_logger.h"
 #include "kudu/util/debug-util.h"
 #include "kudu/util/debug/leakcheck_disabler.h"
+#include "kudu/util/env.h"
 #include "kudu/util/flag_tags.h"
+#include "kudu/util/status.h"
 
 DEFINE_string(log_filename, "",
     "Prefix of log filename - "
@@ -48,6 +56,12 @@ DEFINE_int32(log_async_buffer_bytes_per_level, 2 * 1024 * 1024,
              "level. Only relevant when --log_async is enabled.");
 TAG_FLAG(log_async_buffer_bytes_per_level, hidden);
 
+DEFINE_int32(max_log_files, 10,
+    "Maximum number of log files to retain per severity level. The most recent "
+    "log files are retained. If set to 0, all log files are retained.");
+TAG_FLAG(max_log_files, runtime);
+TAG_FLAG(max_log_files, experimental);
+
 #define PROJ_NAME "kudu"
 
 bool logging_initialized = false;
@@ -331,8 +345,52 @@ void LogCommandLineFlags() {
             << google::CommandlineFlagsIntoString();
 }
 
+Status DeleteExcessLogFiles(Env* env) {
+  int32_t max_log_files = FLAGS_max_log_files;
+  // Ignore bad input or disable log rotation.
+  if (max_log_files <= 0) return Status::OK();
+
+  for (int severity = 0; severity < google::NUM_SEVERITIES; ++severity) {
+    vector<string> logfiles;
+    // Build glob pattern for input
+    // e.g. /var/log/kudu/kudu-master.*.INFO.*
+    string pattern = strings::Substitute("$0/$1.*.$2.*", FLAGS_log_dir, FLAGS_log_filename,
+                                         google::GetLogSeverityName(severity));
+    RETURN_NOT_OK(env->Glob(pattern, &logfiles));
+
+    if (logfiles.size() <= max_log_files) {
+      continue;
+    }
+
+    vector<pair<time_t, string>> logfile_mtimes;
+    for (string& logfile : logfiles) {
+      int64_t mtime;
+      RETURN_NOT_OK(env->GetFileModifiedTime(logfile, &mtime));
+      logfile_mtimes.emplace_back(mtime, std::move(logfile));
+    }
+
+    // Keep the 'max_log_files' most recent log files, as compared by
+    // modification time. Glog files contain a second-granularity timestamp in
+    // the name, so this could potentially use the filename sort order as
+    // guaranteed by glob, however this code has been adapted from Impala which
+    // uses mtime to determine which files to delete, and there haven't been any
+    // issues in production settings.
+    std::sort(logfile_mtimes.begin(), logfile_mtimes.end());
+    logfile_mtimes.resize(logfile_mtimes.size() - max_log_files);
+
+    VLOG(2) << "Deleting " << logfile_mtimes.size()
+            << " excess glog files at " << google::GetLogSeverityName(severity)
+            << " severity";
+
+    for (const auto& logfile: logfile_mtimes) {
+      RETURN_NOT_OK(env->DeleteFile(logfile.second));
+    }
+  }
+  return Status::OK();
+}
+
 // Support for the special THROTTLE_MSG token in a log message stream.
-ostream& operator<<(ostream &os, const PRIVATE_ThrottleMsg&) {
+ostream& operator<<(ostream &os, const PRIVATE_ThrottleMsg& /*unused*/) {
   using google::LogMessage;
 #ifdef DISABLE_RTTI
   LogMessage::LogStream *log = static_cast<LogMessage::LogStream*>(&os);

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/util/logging.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.h b/src/kudu/util/logging.h
index 48234be..5f8769c 100644
--- a/src/kudu/util/logging.h
+++ b/src/kudu/util/logging.h
@@ -25,6 +25,7 @@
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/walltime.h"
 #include "kudu/util/logging_callback.h"
+#include "kudu/util/status.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 // Throttled logging support
@@ -169,11 +170,10 @@ enum PRIVATE_ThrottleMsg {THROTTLE_MSG};
 #define LOG_IF_EVERY_N(severity, condition, n) \
   GOOGLE_GLOG_COMPILE_ASSERT(false, "LOG_IF_EVERY_N is deprecated. Please use KLOG_IF_EVERY_N.")
 
-
-
-
 namespace kudu {
 
+class Env;
+
 // glog doesn't allow multiple invocations of InitGoogleLogging. This method conditionally
 // calls InitGoogleLogging only if it hasn't been called before.
 //
@@ -212,6 +212,12 @@ void ShutdownLoggingSafe();
 // Writes all command-line flags to the log at level INFO.
 void LogCommandLineFlags();
 
+// Deletes excess rotated log files.
+//
+// Keeps at most 'FLAG_max_log_files' of the most recent log files at every
+// severity level, using the file's modified time to determine recency.
+Status DeleteExcessLogFiles(Env* env);
+
 namespace logging {
 
 // A LogThrottler instance tracks the throttling state for a particular

http://git-wip-us.apache.org/repos/asf/kudu/blob/35d1b014/src/kudu/util/metrics.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/metrics.h b/src/kudu/util/metrics.h
index 875bcd1..8aeaf93 100644
--- a/src/kudu/util/metrics.h
+++ b/src/kudu/util/metrics.h
@@ -24,7 +24,7 @@
 // Summary
 // ------------------------------------------------------------
 //
-// This API provides a basic set of metrics primitives along the lines of the Code Hale's
+// This API provides a basic set of metrics primitives along the lines of the Coda Hale's
 // metrics library along with JSON formatted output of running metrics.
 //
 // The metrics system has a few main concepts in its data model: