You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/09/26 22:08:37 UTC
[3/3] incubator-impala git commit: IMPALA-4187: Switch RPC latency
metrics to histograms
IMPALA-4187: Switch RPC latency metrics to histograms
It's usually better to measure latency distributions with histograms,
not avg / min / max. This change switches out the metrics that measure
the latency of RPC processing time to histograms.
Add HistogramMetric::Reset() to allow histogram to remove all
entries. Added spinlock around histogram access.
On a 8-core i7 @ 3.4GHz, the following throughputs were observed:
1 thread -> 25M updates/sec
4 threads -> 7M updates/sec
16 threads -> 5M updates/sec
Each histogram takes ~108KB of storage for its buckets. This can be
reduced by reducing the maximum value, currently 60 minutes.
The new metrics have the following text representation:
Count: 148, 25th %-ile: 0, 50th %-ile: 0, 75th %-ile: 0, 90th %-ile: 0,
95th %-ile: 0, 99.9th %-ile: 1ms
Which changes from the old:
count: 345, last: 6ms, min: 0, max: 12ms, mean: 1ms, stddev: 1ms
Change-Id: I9ba6d4270dd5676eeeff35ad8d9dc5dcddd95e34
Reviewed-on: http://gerrit.cloudera.org:8080/4516
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/872d5462
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/872d5462
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/872d5462
Branch: refs/heads/master
Commit: 872d5462b9a17d9f2d9edeb08728e29d6eb6eca6
Parents: 435aec5
Author: Henry Robinson <he...@cloudera.com>
Authored: Wed Sep 21 16:29:57 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Sep 26 22:07:17 2016 +0000
----------------------------------------------------------------------
be/src/rpc/rpc-trace.cc | 22 +++++++----
be/src/rpc/rpc-trace.h | 6 +--
be/src/util/histogram-metric.h | 73 ++++++++++++++++++++++++-------------
common/thrift/metrics.json | 2 +-
4 files changed, 65 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/be/src/rpc/rpc-trace.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-trace.cc b/be/src/rpc/rpc-trace.cc
index 407c570..f4da19b 100644
--- a/be/src/rpc/rpc-trace.cc
+++ b/be/src/rpc/rpc-trace.cc
@@ -33,7 +33,7 @@ using namespace rapidjson;
using namespace strings;
// Metric key format for rpc call duration metrics.
-const string RPC_TIME_STATS_METRIC_KEY = "rpc-method.$0.call_duration";
+const string RPC_PROCESSING_TIME_DISTRIBUTION_METRIC_KEY = "rpc-method.$0.call_duration";
// Singleton class to keep track of all RpcEventHandlers, and to render them to a
// web-based summary page.
@@ -123,14 +123,14 @@ void RpcEventHandler::Reset(const string& method_name) {
lock_guard<mutex> l(method_map_lock_);
MethodMap::iterator it = method_map_.find(method_name);
if (it == method_map_.end()) return;
- it->second->time_stats->Reset();
+ it->second->processing_time_distribution->Reset();
it->second->num_in_flight.Store(0L);
}
void RpcEventHandler::ResetAll() {
lock_guard<mutex> l(method_map_lock_);
for (const MethodMap::value_type& method: method_map_) {
- method.second->time_stats->Reset();
+ method.second->processing_time_distribution->Reset();
method.second->num_in_flight.Store(0L);
}
}
@@ -149,7 +149,8 @@ void RpcEventHandler::ToJson(Value* server, Document* document) {
Value method(kObjectType);
Value method_name(rpc.first.c_str(), document->GetAllocator());
method.AddMember("name", method_name, document->GetAllocator());
- const string& human_readable = rpc.second->time_stats->ToHumanReadable();
+ const string& human_readable =
+ rpc.second->processing_time_distribution->ToHumanReadable();
Value summary(human_readable.c_str(), document->GetAllocator());
method.AddMember("summary", summary, document->GetAllocator());
method.AddMember("in_flight", rpc.second->num_in_flight.Load(),
@@ -171,9 +172,14 @@ void* RpcEventHandler::getContext(const char* fn_name, void* server_context) {
if (it == method_map_.end()) {
MethodDescriptor* descriptor = new MethodDescriptor();
descriptor->name = fn_name;
- const string& rpc_name = Substitute("$0.$1", server_name_, descriptor->name);
- descriptor->time_stats = StatsMetric<double>::CreateAndRegister(metrics_,
- RPC_TIME_STATS_METRIC_KEY, rpc_name);
+ const string& rpc_name = Substitute(RPC_PROCESSING_TIME_DISTRIBUTION_METRIC_KEY,
+ Substitute("$0.$1", server_name_, descriptor->name));
+ const TMetricDef& def =
+ MakeTMetricDef(rpc_name, TMetricKind::HISTOGRAM, TUnit::TIME_MS);
+ constexpr int32_t SIXTY_MINUTES_IN_MS = 60 * 1000 * 60;
+ // Store processing times of up to 60 minutes with 3 sig. fig.
+ descriptor->processing_time_distribution =
+ metrics_->RegisterMetric(new HistogramMetric(def, SIXTY_MINUTES_IN_MS, 3));
it = method_map_.insert(make_pair(descriptor->name, descriptor)).first;
}
}
@@ -197,5 +203,5 @@ void RpcEventHandler::postWrite(void* ctx, const char* fn_name, uint32_t bytes)
MethodDescriptor* descriptor = rpc_ctx->method_descriptor;
delete rpc_ctx;
descriptor->num_in_flight.Add(-1);
- descriptor->time_stats->Update(elapsed_time);
+ descriptor->processing_time_distribution->Update(elapsed_time);
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/be/src/rpc/rpc-trace.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-trace.h b/be/src/rpc/rpc-trace.h
index 3ee2c6e..b2861af 100644
--- a/be/src/rpc/rpc-trace.h
+++ b/be/src/rpc/rpc-trace.h
@@ -18,8 +18,8 @@
#ifndef IMPALA_RPC_RPC_TRACE_H
#define IMPALA_RPC_RPC_TRACE_H
-#include "util/collection-metrics.h"
#include "rpc/thrift-server.h"
+#include "util/histogram-metric.h"
#include "util/internal-queue.h"
#include <thrift/TProcessor.h>
@@ -83,8 +83,8 @@ class RpcEventHandler : public apache::thrift::TProcessorEventHandler {
/// Name of the method
std::string name;
- /// Summary statistics for the time taken to respond to this method
- StatsMetric<double>* time_stats;
+ /// Distribution of the time taken to process this RPC.
+ HistogramMetric* processing_time_distribution;
/// Number of invocations in flight
AtomicInt32 num_in_flight;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/be/src/util/histogram-metric.h
----------------------------------------------------------------------
diff --git a/be/src/util/histogram-metric.h b/be/src/util/histogram-metric.h
index 211deee..5a61648 100644
--- a/be/src/util/histogram-metric.h
+++ b/be/src/util/histogram-metric.h
@@ -20,10 +20,12 @@
#include "util/hdr-histogram.h"
#include "util/metrics.h"
+#include "util/spinlock.h"
namespace impala {
/// Metric which constructs (using HdrHistogram) a histogram of a set of values.
+/// Thread-safe: histogram access protected by a spin lock.
class HistogramMetric : public Metric {
public:
/// Constructs a new histogram metric. `highest_trackable_value` is the maximum value
@@ -31,27 +33,33 @@ class HistogramMetric : public Metric {
/// that values must be stored with.
HistogramMetric(
const TMetricDef& def, uint64_t highest_trackable_value, int num_significant_digits)
- : Metric(def), histogram_(highest_trackable_value, num_significant_digits),
- unit_(def.units) { }
+ : Metric(def),
+ histogram_(new HdrHistogram(highest_trackable_value, num_significant_digits)),
+ unit_(def.units) {
+ DCHECK_EQ(TMetricKind::HISTOGRAM, def.kind);
+ }
virtual void ToJson(rapidjson::Document* document, rapidjson::Value* value) {
rapidjson::Value container(rapidjson::kObjectType);
AddStandardFields(document, &container);
- container.AddMember("25th %-ile", histogram_.ValueAtPercentile(25),
- document->GetAllocator());
- container.AddMember("50th %-ile", histogram_.ValueAtPercentile(50),
- document->GetAllocator());
- container.AddMember("75th %-ile", histogram_.ValueAtPercentile(75),
- document->GetAllocator());
- container.AddMember("90th %-ile", histogram_.ValueAtPercentile(90),
- document->GetAllocator());
- container.AddMember("95th %-ile", histogram_.ValueAtPercentile(95),
- document->GetAllocator());
- container.AddMember("99.9th %-ile", histogram_.ValueAtPercentile(99.9),
- document->GetAllocator());
- container.AddMember("count", histogram_.TotalCount(), document->GetAllocator());
-
+ {
+ boost::lock_guard<SpinLock> l(lock_);
+
+ container.AddMember(
+ "25th %-ile", histogram_->ValueAtPercentile(25), document->GetAllocator());
+ container.AddMember(
+ "50th %-ile", histogram_->ValueAtPercentile(50), document->GetAllocator());
+ container.AddMember(
+ "75th %-ile", histogram_->ValueAtPercentile(75), document->GetAllocator());
+ container.AddMember(
+ "90th %-ile", histogram_->ValueAtPercentile(90), document->GetAllocator());
+ container.AddMember(
+ "95th %-ile", histogram_->ValueAtPercentile(95), document->GetAllocator());
+ container.AddMember(
+ "99.9th %-ile", histogram_->ValueAtPercentile(99.9), document->GetAllocator());
+ container.AddMember("count", histogram_->TotalCount(), document->GetAllocator());
+ }
rapidjson::Value type_value(PrintTMetricKind(TMetricKind::HISTOGRAM).c_str(),
document->GetAllocator());
container.AddMember("kind", type_value, document->GetAllocator());
@@ -61,33 +69,46 @@ class HistogramMetric : public Metric {
*value = container;
}
- void Update(int64_t val) { histogram_.Increment(val); }
+ void Update(int64_t val) {
+ boost::lock_guard<SpinLock> l(lock_);
+ histogram_->Increment(val);
+ }
+
+ /// Reset the histogram by removing all previous entries.
+ void Reset() {
+ boost::lock_guard<SpinLock> l(lock_);
+ uint64_t highest = histogram_->highest_trackable_value();
+ int digits = histogram_->num_significant_digits();
+ histogram_.reset(new HdrHistogram(highest, digits));
+ }
virtual void ToLegacyJson(rapidjson::Document*) { }
const TUnit::type& unit() const { return unit_; }
virtual std::string ToHumanReadable() {
+ boost::lock_guard<SpinLock> l(lock_);
std::stringstream out;
- out <<"Count: " << histogram_.TotalCount() << ", "
+ out << "Count: " << histogram_->TotalCount() << ", "
<< "25th %-ile: "
- << PrettyPrinter::Print(histogram_.ValueAtPercentile(25), unit_) << ", "
+ << PrettyPrinter::Print(histogram_->ValueAtPercentile(25), unit_) << ", "
<< "50th %-ile: "
- << PrettyPrinter::Print(histogram_.ValueAtPercentile(50), unit_) << ", "
+ << PrettyPrinter::Print(histogram_->ValueAtPercentile(50), unit_) << ", "
<< "75th %-ile: "
- << PrettyPrinter::Print(histogram_.ValueAtPercentile(75), unit_) << ", "
+ << PrettyPrinter::Print(histogram_->ValueAtPercentile(75), unit_) << ", "
<< "90th %-ile: "
- << PrettyPrinter::Print(histogram_.ValueAtPercentile(90), unit_) << ", "
+ << PrettyPrinter::Print(histogram_->ValueAtPercentile(90), unit_) << ", "
<< "95th %-ile: "
- << PrettyPrinter::Print(histogram_.ValueAtPercentile(95), unit_) << ", "
+ << PrettyPrinter::Print(histogram_->ValueAtPercentile(95), unit_) << ", "
<< "99.9th %-ile: "
- << PrettyPrinter::Print(histogram_.ValueAtPercentile(99.9), unit_);
+ << PrettyPrinter::Print(histogram_->ValueAtPercentile(99.9), unit_);
return out.str();
}
private:
- HdrHistogram histogram_;
-
+ // Protects histogram_ pointer itself.
+ SpinLock lock_;
+ boost::scoped_ptr<HdrHistogram> histogram_;
const TUnit::type unit_;
};
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/872d5462/common/thrift/metrics.json
----------------------------------------------------------------------
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index b600f80..70dbb12 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -748,7 +748,7 @@
],
"label": "$0 RPC Call Duration",
"units": "TIME_MS",
- "kind": "STATS",
+ "kind": "HISTOGRAM",
"key": "rpc-method.$0.call_duration"
},
{