You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2018/02/23 05:37:28 UTC

[2/5] kudu git commit: KUDU-2297 (part 1): refactor metrics log out of ServerBase

KUDU-2297 (part 1): refactor metrics log out of ServerBase

This is preparing for putting more information into the metrics log
instead of just metrics snapshots. The logging code will get more
complex as it gains features, so this makes a new class for it.

This is slightly incompatible since I also changed the name on disk to
'diagnostics' instead of 'metrics' and updated the documentation, but I
am not aware of people using this in the wild. So long as we
release-note it, I think it's reasonable to expect any sophisticated
operators to adjust their scripting accordingly.

Change-Id: I1f7bc7335b1f4c2a5c62fc5fe672289d5027a10d
Reviewed-on: http://gerrit.cloudera.org:8080/9326
Tested-by: Todd Lipcon <to...@apache.org>
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/a8b51b29
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/a8b51b29
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/a8b51b29

Branch: refs/heads/master
Commit: a8b51b291883c0ab4fbfa5ee86525d4b9cc3a66a
Parents: a3ecd1a
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Feb 14 13:06:29 2018 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Feb 23 02:37:30 2018 +0000

----------------------------------------------------------------------
 docs/administration.adoc           |  12 +--
 src/kudu/server/CMakeLists.txt     |   1 +
 src/kudu/server/diagnostics_log.cc | 155 ++++++++++++++++++++++++++++++++
 src/kudu/server/diagnostics_log.h  |  70 +++++++++++++++
 src/kudu/server/server_base.cc     |  73 ++-------------
 src/kudu/server/server_base.h      |   3 +-
 6 files changed, 241 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/a8b51b29/docs/administration.adoc
----------------------------------------------------------------------
diff --git a/docs/administration.adoc b/docs/administration.adoc
index 076fa99..78a70cc 100644
--- a/docs/administration.adoc
+++ b/docs/administration.adoc
@@ -182,10 +182,11 @@ NOTE: All histograms and counters are measured since the server start time, and
 
 Kudu may be configured to periodically dump all of its metrics to a local log file using the
 `--metrics_log_interval_ms` flag. Set this flag to the interval at which metrics should be written
-to a log file.
+to a `diagnostics` log file.
 
-The metrics log will be written to the same directory as the other Kudu log files, with the same
-naming format. After any metrics log file reaches 64MB uncompressed, the log will be rolled and
+The diagnostics log will be written to the same directory as the other Kudu log files, with a
+similar naming format, substituting `diagnostics` instead of a log level like `INFO`.
+After any diagnostics log file reaches 64MB uncompressed, the log will be rolled and
 the previous file will be gzip-compressed.
 
 The log file generated has three space-separated fields. The first field is the word
@@ -193,11 +194,6 @@ The log file generated has three space-separated fields. The first field is the
 The third is the current value of all metrics on the server, using a compact JSON encoding.
 The encoding is the same as the metrics fetched via HTTP described above.
 
-WARNING: Although metrics logging automatically rolls and compresses previous log files, it does
-not remove old ones. Since metrics logging can use significant amounts of disk space,
-consider setting up a system utility to monitor space in the log directory and archive or
-delete old segments.
-
 == Common Kudu workflows
 
 [[migrate_to_multi_master]]

http://git-wip-us.apache.org/repos/asf/kudu/blob/a8b51b29/src/kudu/server/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/server/CMakeLists.txt b/src/kudu/server/CMakeLists.txt
index eb2a0e0..859fa32 100644
--- a/src/kudu/server/CMakeLists.txt
+++ b/src/kudu/server/CMakeLists.txt
@@ -40,6 +40,7 @@ target_link_libraries(server_base_proto
 
 set(SERVER_PROCESS_SRCS
   default_path_handlers.cc
+  diagnostics_log.cc
   generic_service.cc
   glog_metrics.cc
   pprof_path_handlers.cc

http://git-wip-us.apache.org/repos/asf/kudu/blob/a8b51b29/src/kudu/server/diagnostics_log.cc
----------------------------------------------------------------------
diff --git a/src/kudu/server/diagnostics_log.cc b/src/kudu/server/diagnostics_log.cc
new file mode 100644
index 0000000..862fd81
--- /dev/null
+++ b/src/kudu/server/diagnostics_log.cc
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/walltime.h"
+#include "kudu/server/diagnostics_log.h"
+#include "kudu/util/condition_variable.h"
+#include "kudu/util/env.h"
+#include "kudu/util/jsonwriter.h"
+#include "kudu/util/metrics.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/mutex.h"
+#include "kudu/util/rolling_log.h"
+#include "kudu/util/status.h"
+#include "kudu/util/thread.h"
+
+
+using std::string;
+using std::unique_ptr;
+using strings::Substitute;
+
+namespace kudu {
+namespace server {
+
+DiagnosticsLog::DiagnosticsLog(string log_dir,
+                               MetricRegistry* metric_registry) :
+    log_dir_(std::move(log_dir)),
+    metric_registry_(metric_registry),
+    wake_(&lock_),
+    metrics_log_interval_(MonoDelta::FromSeconds(60)) {
+}
+DiagnosticsLog::~DiagnosticsLog() {
+  Stop();
+}
+
+void DiagnosticsLog::SetMetricsLogInterval(MonoDelta interval) {
+  MutexLock l(lock_);
+  metrics_log_interval_ = interval;
+}
+
+
+Status DiagnosticsLog::Start() {
+  unique_ptr<RollingLog> l(new RollingLog(Env::Default(), log_dir_, "diagnostics"));
+  RETURN_NOT_OK_PREPEND(l->Open(), "unable to open diagnostics log");
+  log_ = std::move(l);
+  Status s = Thread::Create("server", "diag-logger",
+                            &DiagnosticsLog::RunThread,
+                            this, &thread_);
+  if (!s.ok()) {
+    // Don't leave the log open if we failed to start our thread.
+    log_.reset();
+  }
+  return s;
+}
+
+void DiagnosticsLog::Stop() {
+  if (!thread_) return;
+
+  {
+    MutexLock l(lock_);
+    stop_ = true;
+    wake_.Signal();
+  }
+  thread_->Join();
+  thread_.reset();
+  stop_ = false;
+  WARN_NOT_OK(log_->Close(), "Unable to close diagnostics log");
+}
+
+void DiagnosticsLog::RunThread() {
+  // How long to wait before trying again if we experience a failure
+  // logging metrics.
+  const MonoDelta kWaitBetweenFailures = MonoDelta::FromSeconds(60);
+
+  MutexLock l(lock_);
+
+  MonoTime next_log = MonoTime::Now();
+  while (!stop_) {
+    wake_.TimedWait(next_log - MonoTime::Now());
+    if (MonoTime::Now() > next_log) {
+      Status s = LogMetrics();
+      if (!s.ok()) {
+        WARN_NOT_OK(s, Substitute(
+            "Unable to collect metrics to diagnostics log. Will try again in $0",
+            kWaitBetweenFailures.ToString()));
+        next_log = MonoTime::Now() + kWaitBetweenFailures;
+      } else {
+        next_log = MonoTime::Now() + metrics_log_interval_;
+      }
+    }
+  }
+}
+
+Status DiagnosticsLog::LogMetrics() {
+  MetricJsonOptions opts;
+  opts.include_raw_histograms = true;
+
+  opts.only_modified_in_or_after_epoch = metrics_epoch_;
+
+  // We don't output any metrics which have never been incremented. Though
+  // this seems redundant with the "only include changed metrics" above, it
+  // also ensures that we don't dump a bunch of zero data on startup.
+  opts.include_untouched_metrics = false;
+
+  // Entity attributes aren't that useful in the context of this log. We can
+  // always grab the entity attributes separately if necessary.
+  opts.include_entity_attributes = false;
+
+  std::ostringstream buf;
+  buf << "metrics " << GetCurrentTimeMicros() << " ";
+
+  // Collect the metrics JSON string.
+  int64_t this_log_epoch = Metric::current_epoch();
+  Metric::IncrementEpoch();
+  JsonWriter writer(&buf, JsonWriter::COMPACT);
+  RETURN_NOT_OK(metric_registry_->WriteAsJson(&writer, {"*"}, opts));
+  buf << "\n";
+
+  RETURN_NOT_OK(log_->Append(buf.str()));
+
+  // Next time we fetch, only show those that changed after the epoch
+  // we just logged.
+  //
+  // NOTE: we only bump this in the successful log case so that if we failed to
+  // write above, we wouldn't skip any changes.
+  metrics_epoch_ = this_log_epoch + 1;
+  return Status::OK();
+}
+
+
+} // namespace server
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/a8b51b29/src/kudu/server/diagnostics_log.h
----------------------------------------------------------------------
diff --git a/src/kudu/server/diagnostics_log.h b/src/kudu/server/diagnostics_log.h
new file mode 100644
index 0000000..75e1a69
--- /dev/null
+++ b/src/kudu/server/diagnostics_log.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "kudu/gutil/macros.h"
+#include "kudu/gutil/ref_counted.h"
+#include "kudu/util/condition_variable.h"
+#include "kudu/util/monotime.h"
+#include "kudu/util/mutex.h"
+
+namespace kudu {
+
+class MetricRegistry;
+class RollingLog;
+class Thread;
+class Status;
+
+namespace server {
+
+class DiagnosticsLog {
+ public:
+  DiagnosticsLog(std::string log_dir, MetricRegistry* metric_registry);
+  ~DiagnosticsLog();
+
+  void SetMetricsLogInterval(MonoDelta interval);
+
+  Status Start();
+  void Stop();
+
+ private:
+  void RunThread();
+  Status LogMetrics();
+
+  const std::string log_dir_;
+  const MetricRegistry* metric_registry_;
+
+  scoped_refptr<Thread> thread_;
+  std::unique_ptr<RollingLog> log_;
+
+  Mutex lock_;
+  ConditionVariable wake_;
+  bool stop_ = false;
+
+  MonoDelta metrics_log_interval_;
+
+  int64_t metrics_epoch_ = 0;
+
+  DISALLOW_COPY_AND_ASSIGN(DiagnosticsLog);
+};
+
+} // namespace server
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/a8b51b29/src/kudu/server/server_base.cc
----------------------------------------------------------------------
diff --git a/src/kudu/server/server_base.cc b/src/kudu/server/server_base.cc
index 5d63cdd..c996f70 100644
--- a/src/kudu/server/server_base.cc
+++ b/src/kudu/server/server_base.cc
@@ -42,7 +42,6 @@
 #include "kudu/gutil/move.h"
 #include "kudu/gutil/strings/strcat.h"
 #include "kudu/gutil/strings/substitute.h"
-#include "kudu/gutil/walltime.h"
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/remote_user.h"
 #include "kudu/rpc/result_tracker.h"
@@ -51,6 +50,7 @@
 #include "kudu/security/init.h"
 #include "kudu/security/security_flags.h"
 #include "kudu/server/default_path_handlers.h"
+#include "kudu/server/diagnostics_log.h"
 #include "kudu/server/generic_service.h"
 #include "kudu/server/glog_metrics.h"
 #include "kudu/server/rpc_server.h"
@@ -74,7 +74,6 @@
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/pb_util.h"
-#include "kudu/util/rolling_log.h"
 #include "kudu/util/slice.h"
 #include "kudu/util/spinlock_profiling.h"
 #include "kudu/util/thread.h"
@@ -210,6 +209,7 @@ using kudu::security::RpcEncryption;
 using std::ostringstream;
 using std::shared_ptr;
 using std::string;
+using std::unique_ptr;
 using std::vector;
 using strings::Substitute;
 
@@ -605,68 +605,13 @@ Status ServerBase::StartMetricsLogging() {
     LOG(INFO) << "Not starting metrics log since no log directory was specified.";
     return Status::OK();
   }
-
-  return Thread::Create("server", "metrics-logger", &ServerBase::MetricsLoggingThread,
-                        this, &metrics_logging_thread_);
+  unique_ptr<DiagnosticsLog> l(new DiagnosticsLog(FLAGS_log_dir, metric_registry_.get()));
+  l->SetMetricsLogInterval(MonoDelta::FromMilliseconds(options_.metrics_log_interval_ms));
+  RETURN_NOT_OK(l->Start());
+  diag_log_ = std::move(l);
+  return Status::OK();
 }
 
-void ServerBase::MetricsLoggingThread() {
-  RollingLog log(Env::Default(), FLAGS_log_dir, "metrics");
-
-  // How long to wait before trying again if we experience a failure
-  // logging metrics.
-  const MonoDelta kWaitBetweenFailures = MonoDelta::FromSeconds(60);
-
-  MetricJsonOptions opts;
-  opts.include_raw_histograms = true;
-
-  // We don't output any metrics which have never been incremented. Though
-  // this seems redundant with the "only include changed metrics" above, it
-  // also ensures that we don't dump a bunch of zero data on startup.
-  opts.include_untouched_metrics = false;
-
-  // Entity attributes aren't that useful in the context of this log. We can
-  // always grab the entity attributes separately if necessary.
-  opts.include_entity_attributes = false;
-
-  MonoTime next_log = MonoTime::Now();
-  while (!stop_background_threads_latch_.WaitUntil(next_log)) {
-    next_log = MonoTime::Now() +
-        MonoDelta::FromMilliseconds(options_.metrics_log_interval_ms);
-
-    std::ostringstream buf;
-    buf << "metrics " << GetCurrentTimeMicros() << " ";
-
-    // Collect the metrics JSON string.
-    int64_t this_log_epoch = Metric::current_epoch();
-    Metric::IncrementEpoch();
-    JsonWriter writer(&buf, JsonWriter::COMPACT);
-    Status s = metric_registry_->WriteAsJson(&writer, {"*"}, opts);
-    if (!s.ok()) {
-      WARN_NOT_OK(s, "Unable to collect metrics to log");
-      next_log += kWaitBetweenFailures;
-      continue;
-    }
-
-    buf << "\n";
-
-    s = log.Append(buf.str());
-    if (!s.ok()) {
-      WARN_NOT_OK(s, "Unable to write metrics to log");
-      next_log += kWaitBetweenFailures;
-      continue;
-    }
-
-    // Next time we fetch, only show those that changed after the epoch
-    // we just logged.
-    //
-    // NOTE: we only bump this in the successful log case so that if we failed to
-    // write above, we wouldn't skip any changes.
-    opts.only_modified_in_or_after_epoch = this_log_epoch + 1;
-  }
-
-  WARN_NOT_OK(log.Close(), "Unable to close metric log");
-}
 
 Status ServerBase::StartExcessLogFileDeleterThread() {
   // Try synchronously deleting excess log files once at startup to make sure it
@@ -739,8 +684,8 @@ void ServerBase::Shutdown() {
 
   // Next, shut down remaining server components.
   stop_background_threads_latch_.CountDown();
-  if (metrics_logging_thread_) {
-    metrics_logging_thread_->Join();
+  if (diag_log_) {
+    diag_log_->Stop();
   }
   if (excess_log_deleter_thread_) {
     excess_log_deleter_thread_->Join();

http://git-wip-us.apache.org/repos/asf/kudu/blob/a8b51b29/src/kudu/server/server_base.h
----------------------------------------------------------------------
diff --git a/src/kudu/server/server_base.h b/src/kudu/server/server_base.h
index 701b47f..4ac06eb 100644
--- a/src/kudu/server/server_base.h
+++ b/src/kudu/server/server_base.h
@@ -60,6 +60,7 @@ class TokenVerifier;
 } // namespace security
 
 namespace server {
+class DiagnosticsLog;
 class ServerStatusPB;
 
 // Base class for tablet server and master.
@@ -200,7 +201,7 @@ class ServerBase {
 
   ServerBaseOptions options_;
 
-  scoped_refptr<Thread> metrics_logging_thread_;
+  std::unique_ptr<DiagnosticsLog> diag_log_;
   scoped_refptr<Thread> excess_log_deleter_thread_;
   CountDownLatch stop_background_threads_latch_;