You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/05/29 15:34:07 UTC
[impala] 01/02: IMPALA-8560: Prometheus metrics support in Impala
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit c2aeb93c4f5269e2a0ad2f027ef239767abd32dd
Author: Harshil <ha...@cloudera.com>
AuthorDate: Thu May 23 18:37:48 2019 -0700
IMPALA-8560: Prometheus metrics support in Impala
-- This change adds Prometheus text explosion format metric
generation.
-- More details can be found below:
-- https://prometheus.io/docs/instrumenting/exposition_formats
-- Added unit test to test this change
Tests:
-- Feed all this metrics to prometheus running on local host
-- Also ran it against a "./promtool" to check for any error in
metrics format for prometheus.
Change-Id: I5349085a2007b568cb97f9b8130804ea64d7bb08
Reviewed-on: http://gerrit.cloudera.org:8080/13345
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/util/collection-metrics.h | 62 ++++++
be/src/util/histogram-metric.h | 77 +++++++
be/src/util/metrics-test.cc | 411 ++++++++++++++++++++++++++++++++++++++
be/src/util/metrics.cc | 55 +++++
be/src/util/metrics.h | 68 +++++++
tests/webserver/test_web_pages.py | 8 +
6 files changed, 681 insertions(+)
diff --git a/be/src/util/collection-metrics.h b/be/src/util/collection-metrics.h
index dc7fb54..4cf3330 100644
--- a/be/src/util/collection-metrics.h
+++ b/be/src/util/collection-metrics.h
@@ -74,6 +74,12 @@ class SetMetric : public Metric {
void Reset() { value_.clear(); }
+ virtual TMetricKind::type ToPrometheus(
+ std::string name, std::stringstream* val, std::stringstream* metric_kind) {
+ // this is not supported type in prometheus, so ignore
+ return TMetricKind::SET;
+ }
+
virtual void ToJson(rapidjson::Document* document, rapidjson::Value* value) {
rapidjson::Value container(rapidjson::kObjectType);
AddStandardFields(document, &container);
@@ -157,6 +163,62 @@ class StatsMetric : public Metric {
acc_ = Accumulator();
}
+ virtual TMetricKind::type ToPrometheus(
+ std::string name, std::stringstream* val, std::stringstream* metric_kind) {
+ boost::lock_guard<boost::mutex> l(lock_);
+
+ *val << name << "_total " << boost::accumulators::count(acc_) << "\n";
+
+ if (boost::accumulators::count(acc_) > 0) {
+ if (IsUnitTimeBased(unit_)) {
+ *val << name << "_last " << ConvertToPrometheusSecs(value_, unit_) << "\n";
+ } else {
+ *val << name << "_last " << value_ << "\n";
+ }
+
+ if (StatsSelection & StatsType::MIN) {
+ if (IsUnitTimeBased(unit_)) {
+ *val << name << "_min "
+ << ConvertToPrometheusSecs(boost::accumulators::min(acc_), unit_) << "\n";
+ } else {
+ *val << name << "_min " << boost::accumulators::min(acc_) << "\n";
+ }
+ }
+
+ if (StatsSelection & StatsType::MAX) {
+ if (IsUnitTimeBased(unit_)) {
+ *val << name << "_max "
+ << ConvertToPrometheusSecs(boost::accumulators::max(acc_), unit_) << "\n";
+ } else {
+ *val << name << "_max " << boost::accumulators::max(acc_) << "\n";
+ }
+ }
+
+ if (StatsSelection & StatsType::MEAN) {
+ if (IsUnitTimeBased(unit_)) {
+ *val << name << "_mean "
+ << ConvertToPrometheusSecs(boost::accumulators::mean(acc_), unit_) << "\n";
+ } else {
+ *val << name << "_mean " << boost::accumulators::mean(acc_) << "\n";
+ }
+ }
+
+ if (StatsSelection & StatsType::STDDEV) {
+ if (IsUnitTimeBased(unit_)) {
+ *val << name << "_stddev "
+ << ConvertToPrometheusSecs(
+ std::sqrt(boost::accumulators::variance(acc_)), unit_)
+ << "\n";
+ } else {
+ *val << name << "_stddev " << std::sqrt(boost::accumulators::variance(acc_))
+ << "\n";
+ }
+ }
+ }
+ *metric_kind << "# TYPE " << name << " counter";
+ return TMetricKind::STATS;
+ }
+
virtual void ToJson(rapidjson::Document* document, rapidjson::Value* val) {
boost::lock_guard<boost::mutex> l(lock_);
rapidjson::Value container(rapidjson::kObjectType);
diff --git a/be/src/util/histogram-metric.h b/be/src/util/histogram-metric.h
index 43d4eaf..ca4499f 100644
--- a/be/src/util/histogram-metric.h
+++ b/be/src/util/histogram-metric.h
@@ -71,6 +71,83 @@ class HistogramMetric : public Metric {
*value = container;
}
+ virtual TMetricKind::type ToPrometheus(std::string name, std::stringstream* value,
+ std::stringstream* metric_kind) override {
+ {
+ boost::lock_guard<SpinLock> l(lock_);
+
+ // check if unit its 'TIME_MS','TIME_US' or 'TIME_NS' and convert it to seconds,
+ // this is because prometheus only supports time format in seconds
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "{le=\"0.2\"} "
+ << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(25), unit_)
+ << "\n";
+ } else {
+ *value << name << "{le=\"0.2\"} " << histogram_->ValueAtPercentile(25) << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "{le=\"0.5\"} "
+ << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(50), unit_)
+ << "\n";
+ } else {
+ *value << name << "{le=\"0.5\"} " << histogram_->ValueAtPercentile(50) << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "{le=\"0.7\"} "
+ << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(75), unit_)
+ << "\n";
+ } else {
+ *value << name << "{le=\"0.7\"} " << histogram_->ValueAtPercentile(75) << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "{le=\"0.9\"} "
+ << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(90), unit_)
+ << "\n";
+ } else {
+ *value << name << "{le=\"0.9\"} " << histogram_->ValueAtPercentile(90) << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "{le=\"0.95\"} "
+ << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(95), unit_)
+ << "\n";
+ } else {
+ *value << name << "{le=\"0.95\"} " << histogram_->ValueAtPercentile(95) << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "{le=\"0.999\"} "
+ << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(99.9), unit_)
+ << "\n";
+ } else {
+ *value << name << "{le=\"0.999\"} " << histogram_->ValueAtPercentile(99.9)
+ << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "_max "
+ << ConvertToPrometheusSecs(histogram_->MaxValue(), unit_) << "\n";
+ } else {
+ *value << name << "_max " << histogram_->MaxValue() << "\n";
+ }
+
+ if (IsUnitTimeBased(unit_)) {
+ *value << name << "_min "
+ << ConvertToPrometheusSecs(histogram_->MinValue(), unit_) << "\n";
+ } else {
+ *value << name << "_min " << histogram_->MinValue() << "\n";
+ }
+
+ *value << name << "_count " << histogram_->TotalCount();
+ }
+
+ *metric_kind << "# TYPE " << name << " histogram";
+ return TMetricKind::HISTOGRAM;
+ }
+
void Update(int64_t val) {
boost::lock_guard<SpinLock> l(lock_);
histogram_->Increment(val);
diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc
index accbdcd..3302fb3 100644
--- a/be/src/util/metrics-test.cc
+++ b/be/src/util/metrics-test.cc
@@ -463,5 +463,416 @@ TEST_F(MetricsTest, MetricGroupJson) {
EXPECT_EQ(val2["name"].GetString(), string("child1"));
}
+void AssertPrometheus(const std::stringstream& val, const string& name,
+ const string& value, const string& desc, const string& kind = "") {
+ std::stringstream exp_val;
+ // convert to all values to expected format
+ exp_val << "# HELP " << name << " " << desc << "\n"
+ << "# TYPE " << name << " " << kind << "\n";
+ if (name == "stats_metric" || name == "histogram_metric") {
+ exp_val << value + "\n";
+ } else {
+ exp_val << name << " " << value + "\n";
+ }
+ EXPECT_EQ(val.str(), exp_val.str());
+}
+
+TEST_F(MetricsTest, CountersPrometheus) {
+ MetricGroup metrics("CounterMetrics");
+ AddMetricDef("counter", TMetricKind::COUNTER, TUnit::UNIT, "description");
+ metrics.AddCounter("counter", 0);
+ std::stringstream counter_val;
+ metrics.ToPrometheus(true, &counter_val);
+ AssertPrometheus(counter_val, "counter", "0", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersBytesPrometheus) {
+ MetricGroup metrics("CounterMetrics");
+ AddMetricDef("counter", TMetricKind::COUNTER, TUnit::BYTES, "description");
+ metrics.AddCounter("counter", 555);
+ std::stringstream counter_val;
+ metrics.ToPrometheus(true, &counter_val);
+ AssertPrometheus(counter_val, "counter", "555", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersNonePrometheus) {
+ MetricGroup metrics("CounterMetrics");
+ AddMetricDef("counter", TMetricKind::COUNTER, TUnit::NONE, "description");
+ metrics.AddCounter("counter", 0);
+ std::stringstream counter_val;
+ metrics.ToPrometheus(true, &counter_val);
+ AssertPrometheus(counter_val, "counter", "0", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersTimeMSPrometheus) {
+ MetricGroup metrics("CounterMetrics");
+ AddMetricDef("counter", TMetricKind::COUNTER, TUnit::TIME_MS, "description");
+ metrics.AddCounter("counter", 4354364);
+ std::stringstream counter_val;
+ metrics.ToPrometheus(true, &counter_val);
+ AssertPrometheus(counter_val, "counter", "4354.36", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersTimeNSPrometheus) {
+ MetricGroup metrics("CounterMetrics");
+ AddMetricDef("counter", TMetricKind::COUNTER, TUnit::TIME_NS, "description");
+ metrics.AddCounter("counter", 4354364234);
+ std::stringstream counter_val;
+ metrics.ToPrometheus(true, &counter_val);
+ AssertPrometheus(counter_val, "counter", "4.35436", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersTimeSPrometheus) {
+ MetricGroup metrics("CounterMetrics");
+ AddMetricDef("counter", TMetricKind::COUNTER, TUnit::TIME_S, "description");
+ metrics.AddCounter("counter", 120);
+ std::stringstream counter_val;
+ metrics.ToPrometheus(true, &counter_val);
+ AssertPrometheus(counter_val, "counter", "120", "description", "counter");
+}
+
+TEST_F(MetricsTest, GaugesPrometheus) {
+ MetricGroup metrics("GaugeMetrics");
+ AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::NONE);
+ metrics.AddGauge("gauge", 10);
+ std::stringstream gauge_val;
+ metrics.ToPrometheus(true, &gauge_val);
+ AssertPrometheus(gauge_val, "gauge", "10", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesBytesPrometheus) {
+ MetricGroup metrics("GaugeMetrics");
+ AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::BYTES);
+ metrics.AddGauge("gauge", 150000);
+ std::stringstream gauge_val;
+ metrics.ToPrometheus(true, &gauge_val);
+ AssertPrometheus(gauge_val, "gauge", "150000", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesTimeMSPrometheus) {
+ MetricGroup metrics("GaugeMetrics");
+ AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::TIME_MS);
+ metrics.AddGauge("gauge", 10000);
+ std::stringstream gauge_val;
+ metrics.ToPrometheus(true, &gauge_val);
+ AssertPrometheus(gauge_val, "gauge", "10", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesTimeNSPrometheus) {
+ MetricGroup metrics("GaugeMetrics");
+ AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::TIME_NS);
+ metrics.AddGauge("gauge", 2334123456);
+ std::stringstream gauge_val;
+ metrics.ToPrometheus(true, &gauge_val);
+ AssertPrometheus(gauge_val, "gauge", "2.33412", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesTimeSPrometheus) {
+ MetricGroup metrics("GaugeMetrics");
+ AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::TIME_S);
+ metrics.AddGauge("gauge", 1500);
+ std::stringstream gauge_val;
+ metrics.ToPrometheus(true, &gauge_val);
+ AssertPrometheus(gauge_val, "gauge", "1500", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesUnitPrometheus) {
+ MetricGroup metrics("GaugeMetrics");
+ AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::UNIT);
+ metrics.AddGauge("gauge", 111);
+ std::stringstream gauge_val;
+ metrics.ToPrometheus(true, &gauge_val);
+ AssertPrometheus(gauge_val, "gauge", "111", "", "gauge");
+}
+
+TEST_F(MetricsTest, StatsMetricsPrometheus) {
+ MetricGroup metrics("StatsMetrics");
+ AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::UNIT);
+ StatsMetric<double>* metric =
+ StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+ metric->Update(10.0);
+ metric->Update(20.0);
+ std::stringstream stats_val;
+ metrics.ToPrometheus(true, &stats_val);
+ AssertPrometheus(stats_val, "stats_metric",
+ "stats_metric_total 2\n"
+ "stats_metric_last 20\n"
+ "stats_metric_min 10\n"
+ "stats_metric_max 20\n"
+ "stats_metric_mean 15\n"
+ "stats_metric_stddev 5\n",
+ "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsBytesPrometheus) {
+ MetricGroup metrics("StatsMetrics");
+ AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::BYTES);
+ StatsMetric<double>* metric =
+ StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+ metric->Update(10.0);
+ metric->Update(2230.1234567);
+ std::stringstream stats_val;
+ metrics.ToPrometheus(true, &stats_val);
+ AssertPrometheus(stats_val, "stats_metric",
+ "stats_metric_total 2\n"
+ "stats_metric_last 2230.12\n"
+ "stats_metric_min 10\n"
+ "stats_metric_max 2230.12\n"
+ "stats_metric_mean 1120.06\n"
+ "stats_metric_stddev 1110.06\n",
+ "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsNonePrometheus) {
+ MetricGroup metrics("StatsMetrics");
+ AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::NONE);
+ StatsMetric<double>* metric =
+ StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+ metric->Update(10.0);
+ metric->Update(20.0);
+ std::stringstream stats_val;
+ metrics.ToPrometheus(true, &stats_val);
+ AssertPrometheus(stats_val, "stats_metric",
+ "stats_metric_total 2\n"
+ "stats_metric_last 20\n"
+ "stats_metric_min 10\n"
+ "stats_metric_max 20\n"
+ "stats_metric_mean 15\n"
+ "stats_metric_stddev 5\n",
+ "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsTimeMSPrometheus) {
+ MetricGroup metrics("StatsMetrics");
+ AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::TIME_MS);
+ StatsMetric<double>* metric =
+ StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+ metric->Update(10.0);
+ metric->Update(20.0);
+ std::stringstream stats_val;
+ metrics.ToPrometheus(true, &stats_val);
+ AssertPrometheus(stats_val, "stats_metric",
+ "stats_metric_total 2\n"
+ "stats_metric_last 0.02\n"
+ "stats_metric_min 0.01\n"
+ "stats_metric_max 0.02\n"
+ "stats_metric_mean 0.015\n"
+ "stats_metric_stddev 0.005\n",
+ "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsTimeNSPrometheus) {
+ MetricGroup metrics("StatsMetrics");
+ AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::TIME_NS);
+ StatsMetric<double>* metric =
+ StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+ metric->Update(10.12345);
+ metric->Update(20.567);
+ std::stringstream stats_val;
+ metrics.ToPrometheus(true, &stats_val);
+ AssertPrometheus(stats_val, "stats_metric",
+ "stats_metric_total 2\n"
+ "stats_metric_last 2.0567e-08\n"
+ "stats_metric_min 1.01235e-08\n"
+ "stats_metric_max 2.0567e-08\n"
+ "stats_metric_mean 1.53452e-08\n"
+ "stats_metric_stddev 5.22178e-09\n",
+ "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsTimeSPrometheus) {
+ MetricGroup metrics("StatsMetrics");
+ AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::TIME_S);
+ StatsMetric<double>* metric =
+ StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+ metric->Update(10.22);
+ metric->Update(20.22);
+ std::stringstream stats_val;
+ metrics.ToPrometheus(true, &stats_val);
+ AssertPrometheus(stats_val, "stats_metric",
+ "stats_metric_total 2\n"
+ "stats_metric_last 20.22\n"
+ "stats_metric_min 10.22\n"
+ "stats_metric_max 20.22\n"
+ "stats_metric_mean 15.22\n"
+ "stats_metric_stddev 5\n",
+ "", "counter");
+}
+
+TEST_F(MetricsTest, HistogramPrometheus) {
+ MetricGroup metrics("HistoMetrics");
+ TMetricDef metric_def =
+ MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::TIME_MS);
+ constexpr int MAX_VALUE = 10000;
+ HistogramMetric* metric =
+ metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+ // Add value beyond limit to make sure it's recorded accurately.
+ for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+ std::stringstream val;
+ metrics.ToPrometheus(true, &val);
+ AssertPrometheus(val, "histogram_metric",
+ "histogram_metric{le=\"0.2\"} 2.5\n"
+ "histogram_metric{le=\"0.5\"} 5\n"
+ "histogram_metric{le=\"0.7\"} 7.5\n"
+ "histogram_metric{le=\"0.9\"} 9\n"
+ "histogram_metric{le=\"0.95\"} 9.496\n"
+ "histogram_metric{le=\"0.999\"} 9.984\n"
+ "histogram_metric_max 10.001\n"
+ "histogram_metric_min 0\n"
+ "histogram_metric_count 10002",
+ "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramTimeNSPrometheus) {
+ MetricGroup metrics("HistoMetrics");
+ TMetricDef metric_def =
+ MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::TIME_NS);
+ constexpr int MAX_VALUE = 10000;
+ HistogramMetric* metric =
+ metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+ // Add value beyond limit to make sure it's recorded accurately.
+ for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+ std::stringstream val;
+ metrics.ToPrometheus(true, &val);
+ AssertPrometheus(val, "histogram_metric",
+ "histogram_metric{le=\"0.2\"} 2.5e-06\n"
+ "histogram_metric{le=\"0.5\"} 5e-06\n"
+ "histogram_metric{le=\"0.7\"} 7.5e-06\n"
+ "histogram_metric{le=\"0.9\"} 9e-06\n"
+ "histogram_metric{le=\"0.95\"} 9.496e-06\n"
+ "histogram_metric{le=\"0.999\"} 9.984e-06\n"
+ "histogram_metric_max 1.0001e-05\n"
+ "histogram_metric_min 0\n"
+ "histogram_metric_count 10002",
+ "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramTimeSPrometheus) {
+ MetricGroup metrics("HistoMetrics");
+ TMetricDef metric_def =
+ MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::TIME_S);
+ constexpr int MAX_VALUE = 10000;
+ HistogramMetric* metric =
+ metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+ // Add value beyond limit to make sure it's recorded accurately.
+ for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+ std::stringstream val;
+ metrics.ToPrometheus(true, &val);
+ AssertPrometheus(val, "histogram_metric",
+ "histogram_metric{le=\"0.2\"} 2500\n"
+ "histogram_metric{le=\"0.5\"} 5000\n"
+ "histogram_metric{le=\"0.7\"} 7500\n"
+ "histogram_metric{le=\"0.9\"} 9000\n"
+ "histogram_metric{le=\"0.95\"} 9496\n"
+ "histogram_metric{le=\"0.999\"} 9984\n"
+ "histogram_metric_max 10001\n"
+ "histogram_metric_min 0\n"
+ "histogram_metric_count 10002",
+ "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramBytesPrometheus) {
+ MetricGroup metrics("HistoMetrics");
+ TMetricDef metric_def =
+ MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::BYTES);
+ constexpr int MAX_VALUE = 10000;
+ HistogramMetric* metric =
+ metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+ // Add value beyond limit to make sure it's recorded accurately.
+ for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+ std::stringstream val;
+ metrics.ToPrometheus(true, &val);
+ AssertPrometheus(val, "histogram_metric",
+ "histogram_metric{le=\"0.2\"} 2500\n"
+ "histogram_metric{le=\"0.5\"} 5000\n"
+ "histogram_metric{le=\"0.7\"} 7500\n"
+ "histogram_metric{le=\"0.9\"} 9000\n"
+ "histogram_metric{le=\"0.95\"} 9496\n"
+ "histogram_metric{le=\"0.999\"} 9984\n"
+ "histogram_metric_max 10001\n"
+ "histogram_metric_min 0\n"
+ "histogram_metric_count 10002",
+ "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramUnitPrometheus) {
+ MetricGroup metrics("HistoMetrics");
+ TMetricDef metric_def =
+ MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::UNIT);
+ constexpr int MAX_VALUE = 10000;
+ HistogramMetric* metric =
+ metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+ // Add value beyond limit to make sure it's recorded accurately.
+ for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+ std::stringstream val;
+ metrics.ToPrometheus(true, &val);
+ AssertPrometheus(val, "histogram_metric",
+ "histogram_metric{le=\"0.2\"} 2500\n"
+ "histogram_metric{le=\"0.5\"} 5000\n"
+ "histogram_metric{le=\"0.7\"} 7500\n"
+ "histogram_metric{le=\"0.9\"} 9000\n"
+ "histogram_metric{le=\"0.95\"} 9496\n"
+ "histogram_metric{le=\"0.999\"} 9984\n"
+ "histogram_metric_max 10001\n"
+ "histogram_metric_min 0\n"
+ "histogram_metric_count 10002",
+ "", "histogram");
+}
+
+TEST_F(MetricsTest, MetricGroupPrometheus) {
+ std::stringstream exp_val;
+ exp_val << "# HELP counter1 description\n"
+ "# TYPE counter1 counter\n"
+ "counter1 2048\n"
+ "# HELP counter2 description\n"
+ "# TYPE counter2 counter\n"
+ "counter2 2048\n"
+ "# HELP child_counter description\n"
+ "# TYPE child_counter counter\n"
+ "child_counter 0\n";
+ MetricGroup metrics("PrometheusTest");
+ AddMetricDef("counter1", TMetricKind::COUNTER, TUnit::BYTES, "description");
+ AddMetricDef("counter2", TMetricKind::COUNTER, TUnit::BYTES, "description");
+ metrics.AddCounter("counter1", 2048);
+ metrics.AddCounter("counter2", 2048);
+
+ MetricGroup* find_result = metrics.FindChildGroup("child1");
+ EXPECT_EQ(find_result, reinterpret_cast<MetricGroup*>(NULL));
+
+ metrics.GetOrCreateChildGroup("child1");
+ AddMetricDef("child_counter", TMetricKind::COUNTER, TUnit::BYTES, "description");
+ metrics.GetOrCreateChildGroup("child2")->AddCounter("child_counter", 0);
+
+ IntCounter* counter = metrics.FindMetricForTesting<IntCounter>(string("child_counter"));
+ ASSERT_NE(counter, reinterpret_cast<IntCounter*>(NULL));
+
+ std::stringstream val;
+ metrics.ToPrometheus(true, &val);
+ EXPECT_EQ(val.str(), exp_val.str());
+}
+
+// test with null metrics
+TEST_F(MetricsTest, StatsMetricsNullPrometheus) {
+ MetricGroup nullMetrics("StatsMetrics");
+ AddMetricDef("", TMetricKind::STATS, TUnit::TIME_S);
+ std::stringstream stats_val;
+ nullMetrics.ToPrometheus(true, &stats_val);
+ EXPECT_EQ("", stats_val.str());
+
+ MetricGroup metrics("Metrics");
+ AddMetricDef("test", TMetricKind::STATS, TUnit::TIME_S);
+ metrics.ToPrometheus(true, &stats_val);
+ EXPECT_EQ("", stats_val.str());
+}
}
diff --git a/be/src/util/metrics.cc b/be/src/util/metrics.cc
index 34e5018..2a9cbf6 100644
--- a/be/src/util/metrics.cc
+++ b/be/src/util/metrics.cc
@@ -31,6 +31,7 @@
#include "common/names.h"
+using boost::algorithm::replace_all_copy;
using namespace impala;
using namespace rapidjson;
using namespace strings;
@@ -91,6 +92,10 @@ Status MetricGroup::Init(Webserver* webserver) {
Webserver::UrlCallback json_callback =
bind<void>(mem_fn(&MetricGroup::TemplateCallback), this, _1, _2);
webserver->RegisterUrlCallback("/metrics", "metrics.tmpl", json_callback, true);
+
+ Webserver::RawUrlCallback prometheus_callback =
+ bind<void>(mem_fn(&MetricGroup::PrometheusCallback), this, _1, _2);
+ webserver->RegisterUrlCallback("/metrics_prometheus", prometheus_callback);
}
return Status::OK();
@@ -172,6 +177,20 @@ void MetricGroup::TemplateCallback(const Webserver::WebRequest& req,
}
}
+void MetricGroup::PrometheusCallback(
+ const Webserver::WebRequest& req, stringstream* data) {
+ const auto& args = req.parsed_args;
+ Webserver::ArgumentMap::const_iterator metric_group = args.find("metric_group");
+
+ lock_guard<SpinLock> l(lock_);
+ // If no particular metric group is requested, render this metric group (and all its
+ // children).
+ if (metric_group == args.end()) {
+ Value container;
+ ToPrometheus(true, data);
+ }
+}
+
void MetricGroup::ToJson(bool include_children, Document* document, Value* out_val) {
Value metric_list(kArrayType);
for (const MetricMap::value_type& m: metric_map_) {
@@ -197,6 +216,42 @@ void MetricGroup::ToJson(bool include_children, Document* document, Value* out_v
*out_val = container;
}
+void MetricGroup::ToPrometheus(bool include_children, stringstream* out_val) {
+ for (auto const& m : metric_map_) {
+ stringstream metric_value;
+ stringstream metric_kind;
+
+ // replace all occurrence of '.' and '-'
+ string name = replace_all_copy(m.first, ".", "_");
+ name = replace_all_copy(name, "-", "_");
+ TMetricKind::type metric_type =
+ m.second->ToPrometheus(name, &metric_value, &metric_kind);
+ if (metric_type == TMetricKind::SET || metric_type == TMetricKind::PROPERTY) {
+ // not supported in prometheus
+ continue;
+ }
+ *out_val << "# HELP " << name << " ";
+ *out_val << m.second->description_;
+ *out_val << "\n";
+ *out_val << metric_kind.str();
+ *out_val << "\n";
+ // append only if metric type is not stats, set or histogram
+ if (metric_type != TMetricKind::HISTOGRAM && metric_type != TMetricKind::STATS) {
+ *out_val << name;
+ *out_val << " ";
+ }
+ *out_val << metric_value.str();
+ *out_val << "\n";
+ }
+
+ if (include_children) {
+ Value child_groups(kArrayType);
+ for (const ChildGroupMap::value_type& child : children_) {
+ child.second->ToPrometheus(true, out_val);
+ }
+ }
+}
+
MetricGroup* MetricGroup::GetOrCreateChildGroup(const string& name) {
lock_guard<SpinLock> l(lock_);
ChildGroupMap::iterator it = children_.find(name);
diff --git a/be/src/util/metrics.h b/be/src/util/metrics.h
index 80f899d..b319f35 100644
--- a/be/src/util/metrics.h
+++ b/be/src/util/metrics.h
@@ -96,6 +96,16 @@ class Metric {
/// This method is kept for backwards-compatibility with CM5.0.
virtual void ToLegacyJson(rapidjson::Document* document) = 0;
+ /// Builds a new Value into 'val', based on prometheus text exposition format
+ /// Details of this format can be found below:
+ /// https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/
+ // exposition_formats.md
+ /// Should set the following fields where appropriate:
+ //
+ /// name, value, metric_kind
+ virtual TMetricKind::type ToPrometheus(
+ string name, std::stringstream* val, std::stringstream* metric_kind) = 0;
+
/// Writes a human-readable representation of this metric to 'out'. This is the
/// representation that is often displayed in webpages etc.
virtual std::string ToHumanReadable() = 0;
@@ -103,6 +113,11 @@ class Metric {
const std::string& key() const { return key_; }
const std::string& description() const { return description_; }
+ bool IsUnitTimeBased(TUnit::type type) {
+ return (type == TUnit::type::TIME_MS || type == TUnit::type::TIME_US
+ || type == TUnit::type::TIME_NS);
+ }
+
protected:
/// Unique key identifying this metric
const std::string key_;
@@ -120,6 +135,26 @@ class Metric {
void AddStandardFields(rapidjson::Document* document, rapidjson::Value* val);
};
+template <typename T>
+inline double ConvertToPrometheusSecs(const T& val, TUnit::type unit) {
+ double value = val;
+ if (unit == TUnit::type::TIME_MS) {
+ value /= 1000;
+ } else if (unit == TUnit::type::TIME_US) {
+ value /= 1000000;
+ } else if (unit == TUnit::type::TIME_NS) {
+ value /= 1000000000;
+ }
+ return value;
+}
+
+template <>
+inline double ConvertToPrometheusSecs<std::string>(
+ const std::string& val, TUnit::type unit) {
+ DCHECK(false) << "Should not be called for string metrics";
+ return 0.0;
+}
+
/// A ScalarMetric has a value which is a simple primitive type: e.g. integers, strings
/// and floats. It is parameterised not only by the type of its value, but by both the
/// unit (e.g. bytes/s), drawn from TUnit and the 'kind' of the metric itself.
@@ -160,6 +195,30 @@ class ScalarMetric: public Metric {
*val = container;
}
+ virtual TMetricKind::type ToPrometheus(
+ std::string name, std::stringstream* val, std::stringstream* metric_kind) override {
+ std::string metric_type = PrintThriftEnum(kind()).c_str();
+ // prometheus doesn't support 'property', so ignore it
+ if (!metric_type.compare("property")) {
+ return TMetricKind::PROPERTY;
+ }
+
+ if (IsUnitTimeBased(unit())) {
+ // check if unit its 'TIME_MS','TIME_US' or 'TIME_NS' and convert it to seconds,
+ // this is because prometheus only supports time format in seconds
+ *val << ConvertToPrometheusSecs(GetValue(), unit());
+ } else {
+ *val << GetValue();
+ }
+
+ // convert metric type to lower case, that's what prometheus expects
+ std::transform(
+ metric_type.begin(), metric_type.end(), metric_type.begin(), ::tolower);
+
+ *metric_kind << "# TYPE " << name << " " << metric_type;
+ return kind();
+ }
+
virtual std::string ToHumanReadable() override {
return PrettyPrinter::Print(GetValue(), unit());
}
@@ -440,6 +499,9 @@ class MetricGroup {
void ToJson(bool include_children, rapidjson::Document* document,
rapidjson::Value* out_val);
+ /// Converts this metric group (and optionally all of its children recursively) to JSON.
+ void ToPrometheus(bool include_children, std::stringstream* out_val);
+
/// Creates or returns an already existing child metric group.
MetricGroup* GetOrCreateChildGroup(const std::string& name);
@@ -476,6 +538,12 @@ class MetricGroup {
void TemplateCallback(const Webserver::WebRequest& req,
rapidjson::Document* document);
+ /// Webserver callback for /metricsPrometheus. Produces string in prometheus format,
+ /// each representing metric group, and each including a list of metrics, and a list
+ /// of immediate children. If args contains a paramater 'metric', only the json for
+ /// that metric is returned.
+ void PrometheusCallback(const Webserver::WebRequest& req, std::stringstream* data);
+
/// Legacy webpage callback for CM 5.0 and earlier. Produces a flattened map of (key,
/// value) pairs for all metrics in this hierarchy.
/// If args contains a paramater 'metric', only the json for that metric is returned.
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index 581df17..9dd3405 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -46,6 +46,7 @@ class TestWebPage(ImpalaTestSuite):
ADMISSION_URL = "http://localhost:{0}/admission"
RESET_RESOURCE_POOL_STATS_URL = "http://localhost:{0}/resource_pool_reset"
BACKENDS_URL = "http://localhost:{0}/backends"
+ PROMETHEUS_METRICS_URL = "http://localhost:{0}/metrics_prometheus"
# log4j changes do not apply to the statestore since it doesn't
# have an embedded JVM. So we make two sets of ports to test the
@@ -564,3 +565,10 @@ class TestWebPage(ImpalaTestSuite):
# Check the query id is in the content of the reponse.
assert len(responses) == 1
assert query_id in responses[0].text
+
+ def test_prometheus_metrics(self):
+ """Test to check prometheus metrics"""
+ resp = self.get_and_check_status(self.PROMETHEUS_METRICS_URL)
+ assert len(resp) == 3
+ # check if metric shows up
+ assert 'statestore_subscriber_heartbeat_interval_time_min' in resp[0].text