You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/09/26 22:08:37 UTC

[3/3] incubator-impala git commit: IMPALA-4187: Switch RPC latency metrics to histograms

IMPALA-4187: Switch RPC latency metrics to histograms

It's usually better to measure latency distributions with histograms,
not avg / min / max. This change switches out the metrics that measure
the latency of RPC processing time to histograms.

Add HistogramMetric::Reset() to allow histogram to remove all
entries. Added spinlock around histogram access.

On a 8-core i7 @ 3.4GHz, the following throughputs were observed:

1 thread -> 25M updates/sec
4 threads -> 7M updates/sec
16 threads -> 5M updates/sec

Each histogram takes ~108KB of storage for its buckets. This can be
reduced by reducing the maximum value, currently 60 minutes.

The new metrics have the following text representation:

Count: 148, 25th %-ile: 0, 50th %-ile: 0, 75th %-ile: 0, 90th %-ile: 0,
95th %-ile: 0, 99.9th %-ile: 1ms

Which changes from the old:

count: 345, last: 6ms, min: 0, max: 12ms, mean: 1ms, stddev: 1ms

Change-Id: I9ba6d4270dd5676eeeff35ad8d9dc5dcddd95e34
Reviewed-on: http://gerrit.cloudera.org:8080/4516
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/872d5462
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/872d5462
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/872d5462

Branch: refs/heads/master
Commit: 872d5462b9a17d9f2d9edeb08728e29d6eb6eca6
Parents: 435aec5
Author: Henry Robinson <he...@cloudera.com>
Authored: Wed Sep 21 16:29:57 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Sep 26 22:07:17 2016 +0000

----------------------------------------------------------------------
 be/src/rpc/rpc-trace.cc        | 22 +++++++----
 be/src/rpc/rpc-trace.h         |  6 +--
 be/src/util/histogram-metric.h | 73 ++++++++++++++++++++++++-------------
 common/thrift/metrics.json     |  2 +-
 4 files changed, 65 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/be/src/rpc/rpc-trace.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-trace.cc b/be/src/rpc/rpc-trace.cc
index 407c570..f4da19b 100644
--- a/be/src/rpc/rpc-trace.cc
+++ b/be/src/rpc/rpc-trace.cc
@@ -33,7 +33,7 @@ using namespace rapidjson;
 using namespace strings;
 
 // Metric key format for rpc call duration metrics.
-const string RPC_TIME_STATS_METRIC_KEY = "rpc-method.$0.call_duration";
+const string RPC_PROCESSING_TIME_DISTRIBUTION_METRIC_KEY = "rpc-method.$0.call_duration";
 
 // Singleton class to keep track of all RpcEventHandlers, and to render them to a
 // web-based summary page.
@@ -123,14 +123,14 @@ void RpcEventHandler::Reset(const string& method_name) {
   lock_guard<mutex> l(method_map_lock_);
   MethodMap::iterator it = method_map_.find(method_name);
   if (it == method_map_.end()) return;
-  it->second->time_stats->Reset();
+  it->second->processing_time_distribution->Reset();
   it->second->num_in_flight.Store(0L);
 }
 
 void RpcEventHandler::ResetAll() {
   lock_guard<mutex> l(method_map_lock_);
   for (const MethodMap::value_type& method: method_map_) {
-    method.second->time_stats->Reset();
+    method.second->processing_time_distribution->Reset();
     method.second->num_in_flight.Store(0L);
   }
 }
@@ -149,7 +149,8 @@ void RpcEventHandler::ToJson(Value* server, Document* document) {
     Value method(kObjectType);
     Value method_name(rpc.first.c_str(), document->GetAllocator());
     method.AddMember("name", method_name, document->GetAllocator());
-    const string& human_readable = rpc.second->time_stats->ToHumanReadable();
+    const string& human_readable =
+        rpc.second->processing_time_distribution->ToHumanReadable();
     Value summary(human_readable.c_str(), document->GetAllocator());
     method.AddMember("summary", summary, document->GetAllocator());
     method.AddMember("in_flight", rpc.second->num_in_flight.Load(),
@@ -171,9 +172,14 @@ void* RpcEventHandler::getContext(const char* fn_name, void* server_context) {
     if (it == method_map_.end()) {
       MethodDescriptor* descriptor = new MethodDescriptor();
       descriptor->name = fn_name;
-      const string& rpc_name = Substitute("$0.$1", server_name_, descriptor->name);
-      descriptor->time_stats = StatsMetric<double>::CreateAndRegister(metrics_,
-          RPC_TIME_STATS_METRIC_KEY, rpc_name);
+      const string& rpc_name = Substitute(RPC_PROCESSING_TIME_DISTRIBUTION_METRIC_KEY,
+          Substitute("$0.$1", server_name_, descriptor->name));
+      const TMetricDef& def =
+          MakeTMetricDef(rpc_name, TMetricKind::HISTOGRAM, TUnit::TIME_MS);
+      constexpr int32_t SIXTY_MINUTES_IN_MS = 60 * 1000 * 60;
+      // Store processing times of up to 60 minutes with 3 sig. fig.
+      descriptor->processing_time_distribution =
+          metrics_->RegisterMetric(new HistogramMetric(def, SIXTY_MINUTES_IN_MS, 3));
       it = method_map_.insert(make_pair(descriptor->name, descriptor)).first;
     }
   }
@@ -197,5 +203,5 @@ void RpcEventHandler::postWrite(void* ctx, const char* fn_name, uint32_t bytes)
   MethodDescriptor* descriptor = rpc_ctx->method_descriptor;
   delete rpc_ctx;
   descriptor->num_in_flight.Add(-1);
-  descriptor->time_stats->Update(elapsed_time);
+  descriptor->processing_time_distribution->Update(elapsed_time);
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/be/src/rpc/rpc-trace.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-trace.h b/be/src/rpc/rpc-trace.h
index 3ee2c6e..b2861af 100644
--- a/be/src/rpc/rpc-trace.h
+++ b/be/src/rpc/rpc-trace.h
@@ -18,8 +18,8 @@
 #ifndef IMPALA_RPC_RPC_TRACE_H
 #define IMPALA_RPC_RPC_TRACE_H
 
-#include "util/collection-metrics.h"
 #include "rpc/thrift-server.h"
+#include "util/histogram-metric.h"
 #include "util/internal-queue.h"
 
 #include <thrift/TProcessor.h>
@@ -83,8 +83,8 @@ class RpcEventHandler : public apache::thrift::TProcessorEventHandler {
     /// Name of the method
     std::string name;
 
-    /// Summary statistics for the time taken to respond to this method
-    StatsMetric<double>* time_stats;
+    /// Distribution of the time taken to process this RPC.
+    HistogramMetric* processing_time_distribution;
 
     /// Number of invocations in flight
     AtomicInt32 num_in_flight;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/be/src/util/histogram-metric.h
----------------------------------------------------------------------
diff --git a/be/src/util/histogram-metric.h b/be/src/util/histogram-metric.h
index 211deee..5a61648 100644
--- a/be/src/util/histogram-metric.h
+++ b/be/src/util/histogram-metric.h
@@ -20,10 +20,12 @@
 
 #include "util/hdr-histogram.h"
 #include "util/metrics.h"
+#include "util/spinlock.h"
 
 namespace impala {
 
 /// Metric which constructs (using HdrHistogram) a histogram of a set of values.
+/// Thread-safe: histogram access protected by a spin lock.
 class HistogramMetric : public Metric {
  public:
   /// Constructs a new histogram metric. `highest_trackable_value` is the maximum value
@@ -31,27 +33,33 @@ class HistogramMetric : public Metric {
   /// that values must be stored with.
   HistogramMetric(
       const TMetricDef& def, uint64_t highest_trackable_value, int num_significant_digits)
-      : Metric(def), histogram_(highest_trackable_value, num_significant_digits),
-        unit_(def.units) { }
+    : Metric(def),
+      histogram_(new HdrHistogram(highest_trackable_value, num_significant_digits)),
+      unit_(def.units) {
+    DCHECK_EQ(TMetricKind::HISTOGRAM, def.kind);
+  }
 
   virtual void ToJson(rapidjson::Document* document, rapidjson::Value* value) {
     rapidjson::Value container(rapidjson::kObjectType);
     AddStandardFields(document, &container);
 
-    container.AddMember("25th %-ile", histogram_.ValueAtPercentile(25),
-        document->GetAllocator());
-    container.AddMember("50th %-ile", histogram_.ValueAtPercentile(50),
-        document->GetAllocator());
-    container.AddMember("75th %-ile", histogram_.ValueAtPercentile(75),
-        document->GetAllocator());
-    container.AddMember("90th %-ile", histogram_.ValueAtPercentile(90),
-        document->GetAllocator());
-    container.AddMember("95th %-ile", histogram_.ValueAtPercentile(95),
-        document->GetAllocator());
-    container.AddMember("99.9th %-ile", histogram_.ValueAtPercentile(99.9),
-        document->GetAllocator());
-    container.AddMember("count", histogram_.TotalCount(), document->GetAllocator());
-
+    {
+      boost::lock_guard<SpinLock> l(lock_);
+
+      container.AddMember(
+          "25th %-ile", histogram_->ValueAtPercentile(25), document->GetAllocator());
+      container.AddMember(
+          "50th %-ile", histogram_->ValueAtPercentile(50), document->GetAllocator());
+      container.AddMember(
+          "75th %-ile", histogram_->ValueAtPercentile(75), document->GetAllocator());
+      container.AddMember(
+          "90th %-ile", histogram_->ValueAtPercentile(90), document->GetAllocator());
+      container.AddMember(
+          "95th %-ile", histogram_->ValueAtPercentile(95), document->GetAllocator());
+      container.AddMember(
+          "99.9th %-ile", histogram_->ValueAtPercentile(99.9), document->GetAllocator());
+      container.AddMember("count", histogram_->TotalCount(), document->GetAllocator());
+    }
     rapidjson::Value type_value(PrintTMetricKind(TMetricKind::HISTOGRAM).c_str(),
         document->GetAllocator());
     container.AddMember("kind", type_value, document->GetAllocator());
@@ -61,33 +69,46 @@ class HistogramMetric : public Metric {
     *value = container;
   }
 
-  void Update(int64_t val) { histogram_.Increment(val); }
+  void Update(int64_t val) {
+    boost::lock_guard<SpinLock> l(lock_);
+    histogram_->Increment(val);
+  }
+
+  /// Reset the histogram by removing all previous entries.
+  void Reset() {
+    boost::lock_guard<SpinLock> l(lock_);
+    uint64_t highest = histogram_->highest_trackable_value();
+    int digits = histogram_->num_significant_digits();
+    histogram_.reset(new HdrHistogram(highest, digits));
+  }
 
   virtual void ToLegacyJson(rapidjson::Document*) { }
 
   const TUnit::type& unit() const { return unit_; }
 
   virtual std::string ToHumanReadable() {
+    boost::lock_guard<SpinLock> l(lock_);
     std::stringstream out;
-    out <<"Count: " << histogram_.TotalCount() << ", "
+    out << "Count: " << histogram_->TotalCount() << ", "
         << "25th %-ile: "
-        << PrettyPrinter::Print(histogram_.ValueAtPercentile(25), unit_) << ", "
+        << PrettyPrinter::Print(histogram_->ValueAtPercentile(25), unit_) << ", "
         << "50th %-ile: "
-        << PrettyPrinter::Print(histogram_.ValueAtPercentile(50), unit_) << ", "
+        << PrettyPrinter::Print(histogram_->ValueAtPercentile(50), unit_) << ", "
         << "75th %-ile: "
-        << PrettyPrinter::Print(histogram_.ValueAtPercentile(75), unit_) << ", "
+        << PrettyPrinter::Print(histogram_->ValueAtPercentile(75), unit_) << ", "
         << "90th %-ile: "
-        << PrettyPrinter::Print(histogram_.ValueAtPercentile(90), unit_) << ", "
+        << PrettyPrinter::Print(histogram_->ValueAtPercentile(90), unit_) << ", "
         << "95th %-ile: "
-        << PrettyPrinter::Print(histogram_.ValueAtPercentile(95), unit_) << ", "
+        << PrettyPrinter::Print(histogram_->ValueAtPercentile(95), unit_) << ", "
         << "99.9th %-ile: "
-        << PrettyPrinter::Print(histogram_.ValueAtPercentile(99.9), unit_);
+        << PrettyPrinter::Print(histogram_->ValueAtPercentile(99.9), unit_);
     return out.str();
   }
 
  private:
-  HdrHistogram histogram_;
-
+  // Protects histogram_ pointer itself.
+  SpinLock lock_;
+  boost::scoped_ptr<HdrHistogram> histogram_;
   const TUnit::type unit_;
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/common/thrift/metrics.json
----------------------------------------------------------------------
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index b600f80..70dbb12 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -748,7 +748,7 @@
     ],
     "label": "$0 RPC Call Duration",
     "units": "TIME_MS",
-    "kind": "STATS",
+    "kind": "HISTOGRAM",
     "key": "rpc-method.$0.call_duration"
   },
   {