You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/05/29 15:34:07 UTC

[impala] 01/02: IMPALA-8560: Prometheus metrics support in Impala

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c2aeb93c4f5269e2a0ad2f027ef239767abd32dd
Author: Harshil <ha...@cloudera.com>
AuthorDate: Thu May 23 18:37:48 2019 -0700

    IMPALA-8560: Prometheus metrics support in Impala
    
        -- This change adds Prometheus text explosion format metric
           generation.
        -- More details can be found below:
        -- https://prometheus.io/docs/instrumenting/exposition_formats
        -- Added unit test to test this change
    
    Tests:
        -- Feed all this metrics to prometheus running on local host
        -- Also ran it against a "./promtool" to check for any error in
           metrics format for prometheus.
    Change-Id: I5349085a2007b568cb97f9b8130804ea64d7bb08
    Reviewed-on: http://gerrit.cloudera.org:8080/13345
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/collection-metrics.h  |  62 ++++++
 be/src/util/histogram-metric.h    |  77 +++++++
 be/src/util/metrics-test.cc       | 411 ++++++++++++++++++++++++++++++++++++++
 be/src/util/metrics.cc            |  55 +++++
 be/src/util/metrics.h             |  68 +++++++
 tests/webserver/test_web_pages.py |   8 +
 6 files changed, 681 insertions(+)

diff --git a/be/src/util/collection-metrics.h b/be/src/util/collection-metrics.h
index dc7fb54..4cf3330 100644
--- a/be/src/util/collection-metrics.h
+++ b/be/src/util/collection-metrics.h
@@ -74,6 +74,12 @@ class SetMetric : public Metric {
 
   void Reset() { value_.clear(); }
 
+  virtual TMetricKind::type ToPrometheus(
+      std::string name, std::stringstream* val, std::stringstream* metric_kind) {
+    // this is not supported type in prometheus, so ignore
+    return TMetricKind::SET;
+  }
+
   virtual void ToJson(rapidjson::Document* document, rapidjson::Value* value) {
     rapidjson::Value container(rapidjson::kObjectType);
     AddStandardFields(document, &container);
@@ -157,6 +163,62 @@ class StatsMetric : public Metric {
     acc_ = Accumulator();
   }
 
+  virtual TMetricKind::type ToPrometheus(
+      std::string name, std::stringstream* val, std::stringstream* metric_kind) {
+    boost::lock_guard<boost::mutex> l(lock_);
+
+    *val << name << "_total " << boost::accumulators::count(acc_) << "\n";
+
+    if (boost::accumulators::count(acc_) > 0) {
+      if (IsUnitTimeBased(unit_)) {
+        *val << name << "_last " << ConvertToPrometheusSecs(value_, unit_) << "\n";
+      } else {
+        *val << name << "_last " << value_ << "\n";
+      }
+
+      if (StatsSelection & StatsType::MIN) {
+        if (IsUnitTimeBased(unit_)) {
+          *val << name << "_min "
+               << ConvertToPrometheusSecs(boost::accumulators::min(acc_), unit_) << "\n";
+        } else {
+          *val << name << "_min " << boost::accumulators::min(acc_) << "\n";
+        }
+      }
+
+      if (StatsSelection & StatsType::MAX) {
+        if (IsUnitTimeBased(unit_)) {
+          *val << name << "_max "
+               << ConvertToPrometheusSecs(boost::accumulators::max(acc_), unit_) << "\n";
+        } else {
+          *val << name << "_max " << boost::accumulators::max(acc_) << "\n";
+        }
+      }
+
+      if (StatsSelection & StatsType::MEAN) {
+        if (IsUnitTimeBased(unit_)) {
+          *val << name << "_mean "
+               << ConvertToPrometheusSecs(boost::accumulators::mean(acc_), unit_) << "\n";
+        } else {
+          *val << name << "_mean " << boost::accumulators::mean(acc_) << "\n";
+        }
+      }
+
+      if (StatsSelection & StatsType::STDDEV) {
+        if (IsUnitTimeBased(unit_)) {
+          *val << name << "_stddev "
+               << ConvertToPrometheusSecs(
+                      std::sqrt(boost::accumulators::variance(acc_)), unit_)
+               << "\n";
+        } else {
+          *val << name << "_stddev " << std::sqrt(boost::accumulators::variance(acc_))
+               << "\n";
+        }
+      }
+    }
+    *metric_kind << "# TYPE " << name << " counter";
+    return TMetricKind::STATS;
+  }
+
   virtual void ToJson(rapidjson::Document* document, rapidjson::Value* val) {
     boost::lock_guard<boost::mutex> l(lock_);
     rapidjson::Value container(rapidjson::kObjectType);
diff --git a/be/src/util/histogram-metric.h b/be/src/util/histogram-metric.h
index 43d4eaf..ca4499f 100644
--- a/be/src/util/histogram-metric.h
+++ b/be/src/util/histogram-metric.h
@@ -71,6 +71,83 @@ class HistogramMetric : public Metric {
     *value = container;
   }
 
+  virtual TMetricKind::type ToPrometheus(std::string name, std::stringstream* value,
+      std::stringstream* metric_kind) override {
+    {
+      boost::lock_guard<SpinLock> l(lock_);
+
+      // check if unit its 'TIME_MS','TIME_US' or 'TIME_NS' and convert it to seconds,
+      // this is because prometheus only supports time format in seconds
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "{le=\"0.2\"} "
+               << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(25), unit_)
+               << "\n";
+      } else {
+        *value << name << "{le=\"0.2\"} " << histogram_->ValueAtPercentile(25) << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "{le=\"0.5\"} "
+               << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(50), unit_)
+               << "\n";
+      } else {
+        *value << name << "{le=\"0.5\"} " << histogram_->ValueAtPercentile(50) << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "{le=\"0.7\"} "
+               << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(75), unit_)
+               << "\n";
+      } else {
+        *value << name << "{le=\"0.7\"} " << histogram_->ValueAtPercentile(75) << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "{le=\"0.9\"} "
+               << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(90), unit_)
+               << "\n";
+      } else {
+        *value << name << "{le=\"0.9\"} " << histogram_->ValueAtPercentile(90) << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "{le=\"0.95\"} "
+               << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(95), unit_)
+               << "\n";
+      } else {
+        *value << name << "{le=\"0.95\"} " << histogram_->ValueAtPercentile(95) << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "{le=\"0.999\"} "
+               << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(99.9), unit_)
+               << "\n";
+      } else {
+        *value << name << "{le=\"0.999\"} " << histogram_->ValueAtPercentile(99.9)
+               << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "_max "
+               << ConvertToPrometheusSecs(histogram_->MaxValue(), unit_) << "\n";
+      } else {
+        *value << name << "_max " << histogram_->MaxValue() << "\n";
+      }
+
+      if (IsUnitTimeBased(unit_)) {
+        *value << name << "_min "
+               << ConvertToPrometheusSecs(histogram_->MinValue(), unit_) << "\n";
+      } else {
+        *value << name << "_min " << histogram_->MinValue() << "\n";
+      }
+
+      *value << name << "_count " << histogram_->TotalCount();
+    }
+
+    *metric_kind << "# TYPE " << name << " histogram";
+    return TMetricKind::HISTOGRAM;
+  }
+
   void Update(int64_t val) {
     boost::lock_guard<SpinLock> l(lock_);
     histogram_->Increment(val);
diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc
index accbdcd..3302fb3 100644
--- a/be/src/util/metrics-test.cc
+++ b/be/src/util/metrics-test.cc
@@ -463,5 +463,416 @@ TEST_F(MetricsTest, MetricGroupJson) {
   EXPECT_EQ(val2["name"].GetString(), string("child1"));
 }
 
+void AssertPrometheus(const std::stringstream& val, const string& name,
+    const string& value, const string& desc, const string& kind = "") {
+  std::stringstream exp_val;
+  // convert to all values to expected format
+  exp_val << "# HELP " << name << " " << desc << "\n"
+          << "# TYPE " << name << " " << kind << "\n";
+  if (name == "stats_metric" || name == "histogram_metric") {
+    exp_val << value + "\n";
+  } else {
+    exp_val << name << " " << value + "\n";
+  }
+  EXPECT_EQ(val.str(), exp_val.str());
+}
+
+TEST_F(MetricsTest, CountersPrometheus) {
+  MetricGroup metrics("CounterMetrics");
+  AddMetricDef("counter", TMetricKind::COUNTER, TUnit::UNIT, "description");
+  metrics.AddCounter("counter", 0);
+  std::stringstream counter_val;
+  metrics.ToPrometheus(true, &counter_val);
+  AssertPrometheus(counter_val, "counter", "0", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersBytesPrometheus) {
+  MetricGroup metrics("CounterMetrics");
+  AddMetricDef("counter", TMetricKind::COUNTER, TUnit::BYTES, "description");
+  metrics.AddCounter("counter", 555);
+  std::stringstream counter_val;
+  metrics.ToPrometheus(true, &counter_val);
+  AssertPrometheus(counter_val, "counter", "555", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersNonePrometheus) {
+  MetricGroup metrics("CounterMetrics");
+  AddMetricDef("counter", TMetricKind::COUNTER, TUnit::NONE, "description");
+  metrics.AddCounter("counter", 0);
+  std::stringstream counter_val;
+  metrics.ToPrometheus(true, &counter_val);
+  AssertPrometheus(counter_val, "counter", "0", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersTimeMSPrometheus) {
+  MetricGroup metrics("CounterMetrics");
+  AddMetricDef("counter", TMetricKind::COUNTER, TUnit::TIME_MS, "description");
+  metrics.AddCounter("counter", 4354364);
+  std::stringstream counter_val;
+  metrics.ToPrometheus(true, &counter_val);
+  AssertPrometheus(counter_val, "counter", "4354.36", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersTimeNSPrometheus) {
+  MetricGroup metrics("CounterMetrics");
+  AddMetricDef("counter", TMetricKind::COUNTER, TUnit::TIME_NS, "description");
+  metrics.AddCounter("counter", 4354364234);
+  std::stringstream counter_val;
+  metrics.ToPrometheus(true, &counter_val);
+  AssertPrometheus(counter_val, "counter", "4.35436", "description", "counter");
+}
+
+TEST_F(MetricsTest, CountersTimeSPrometheus) {
+  MetricGroup metrics("CounterMetrics");
+  AddMetricDef("counter", TMetricKind::COUNTER, TUnit::TIME_S, "description");
+  metrics.AddCounter("counter", 120);
+  std::stringstream counter_val;
+  metrics.ToPrometheus(true, &counter_val);
+  AssertPrometheus(counter_val, "counter", "120", "description", "counter");
+}
+
+TEST_F(MetricsTest, GaugesPrometheus) {
+  MetricGroup metrics("GaugeMetrics");
+  AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::NONE);
+  metrics.AddGauge("gauge", 10);
+  std::stringstream gauge_val;
+  metrics.ToPrometheus(true, &gauge_val);
+  AssertPrometheus(gauge_val, "gauge", "10", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesBytesPrometheus) {
+  MetricGroup metrics("GaugeMetrics");
+  AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::BYTES);
+  metrics.AddGauge("gauge", 150000);
+  std::stringstream gauge_val;
+  metrics.ToPrometheus(true, &gauge_val);
+  AssertPrometheus(gauge_val, "gauge", "150000", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesTimeMSPrometheus) {
+  MetricGroup metrics("GaugeMetrics");
+  AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::TIME_MS);
+  metrics.AddGauge("gauge", 10000);
+  std::stringstream gauge_val;
+  metrics.ToPrometheus(true, &gauge_val);
+  AssertPrometheus(gauge_val, "gauge", "10", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesTimeNSPrometheus) {
+  MetricGroup metrics("GaugeMetrics");
+  AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::TIME_NS);
+  metrics.AddGauge("gauge", 2334123456);
+  std::stringstream gauge_val;
+  metrics.ToPrometheus(true, &gauge_val);
+  AssertPrometheus(gauge_val, "gauge", "2.33412", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesTimeSPrometheus) {
+  MetricGroup metrics("GaugeMetrics");
+  AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::TIME_S);
+  metrics.AddGauge("gauge", 1500);
+  std::stringstream gauge_val;
+  metrics.ToPrometheus(true, &gauge_val);
+  AssertPrometheus(gauge_val, "gauge", "1500", "", "gauge");
+}
+
+TEST_F(MetricsTest, GaugesUnitPrometheus) {
+  MetricGroup metrics("GaugeMetrics");
+  AddMetricDef("gauge", TMetricKind::GAUGE, TUnit::UNIT);
+  metrics.AddGauge("gauge", 111);
+  std::stringstream gauge_val;
+  metrics.ToPrometheus(true, &gauge_val);
+  AssertPrometheus(gauge_val, "gauge", "111", "", "gauge");
+}
+
+TEST_F(MetricsTest, StatsMetricsPrometheus) {
+  MetricGroup metrics("StatsMetrics");
+  AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::UNIT);
+  StatsMetric<double>* metric =
+      StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+  metric->Update(10.0);
+  metric->Update(20.0);
+  std::stringstream stats_val;
+  metrics.ToPrometheus(true, &stats_val);
+  AssertPrometheus(stats_val, "stats_metric",
+      "stats_metric_total 2\n"
+      "stats_metric_last 20\n"
+      "stats_metric_min 10\n"
+      "stats_metric_max 20\n"
+      "stats_metric_mean 15\n"
+      "stats_metric_stddev 5\n",
+      "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsBytesPrometheus) {
+  MetricGroup metrics("StatsMetrics");
+  AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::BYTES);
+  StatsMetric<double>* metric =
+      StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+  metric->Update(10.0);
+  metric->Update(2230.1234567);
+  std::stringstream stats_val;
+  metrics.ToPrometheus(true, &stats_val);
+  AssertPrometheus(stats_val, "stats_metric",
+      "stats_metric_total 2\n"
+      "stats_metric_last 2230.12\n"
+      "stats_metric_min 10\n"
+      "stats_metric_max 2230.12\n"
+      "stats_metric_mean 1120.06\n"
+      "stats_metric_stddev 1110.06\n",
+      "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsNonePrometheus) {
+  MetricGroup metrics("StatsMetrics");
+  AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::NONE);
+  StatsMetric<double>* metric =
+      StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+  metric->Update(10.0);
+  metric->Update(20.0);
+  std::stringstream stats_val;
+  metrics.ToPrometheus(true, &stats_val);
+  AssertPrometheus(stats_val, "stats_metric",
+      "stats_metric_total 2\n"
+      "stats_metric_last 20\n"
+      "stats_metric_min 10\n"
+      "stats_metric_max 20\n"
+      "stats_metric_mean 15\n"
+      "stats_metric_stddev 5\n",
+      "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsTimeMSPrometheus) {
+  MetricGroup metrics("StatsMetrics");
+  AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::TIME_MS);
+  StatsMetric<double>* metric =
+      StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+  metric->Update(10.0);
+  metric->Update(20.0);
+  std::stringstream stats_val;
+  metrics.ToPrometheus(true, &stats_val);
+  AssertPrometheus(stats_val, "stats_metric",
+      "stats_metric_total 2\n"
+      "stats_metric_last 0.02\n"
+      "stats_metric_min 0.01\n"
+      "stats_metric_max 0.02\n"
+      "stats_metric_mean 0.015\n"
+      "stats_metric_stddev 0.005\n",
+      "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsTimeNSPrometheus) {
+  MetricGroup metrics("StatsMetrics");
+  AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::TIME_NS);
+  StatsMetric<double>* metric =
+      StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+  metric->Update(10.12345);
+  metric->Update(20.567);
+  std::stringstream stats_val;
+  metrics.ToPrometheus(true, &stats_val);
+  AssertPrometheus(stats_val, "stats_metric",
+      "stats_metric_total 2\n"
+      "stats_metric_last 2.0567e-08\n"
+      "stats_metric_min 1.01235e-08\n"
+      "stats_metric_max 2.0567e-08\n"
+      "stats_metric_mean 1.53452e-08\n"
+      "stats_metric_stddev 5.22178e-09\n",
+      "", "counter");
+}
+
+TEST_F(MetricsTest, StatsMetricsTimeSPrometheus) {
+  MetricGroup metrics("StatsMetrics");
+  AddMetricDef("stats_metric", TMetricKind::STATS, TUnit::TIME_S);
+  StatsMetric<double>* metric =
+      StatsMetric<double>::CreateAndRegister(&metrics, "stats_metric");
+  metric->Update(10.22);
+  metric->Update(20.22);
+  std::stringstream stats_val;
+  metrics.ToPrometheus(true, &stats_val);
+  AssertPrometheus(stats_val, "stats_metric",
+      "stats_metric_total 2\n"
+      "stats_metric_last 20.22\n"
+      "stats_metric_min 10.22\n"
+      "stats_metric_max 20.22\n"
+      "stats_metric_mean 15.22\n"
+      "stats_metric_stddev 5\n",
+      "", "counter");
+}
+
+TEST_F(MetricsTest, HistogramPrometheus) {
+  MetricGroup metrics("HistoMetrics");
+  TMetricDef metric_def =
+      MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::TIME_MS);
+  constexpr int MAX_VALUE = 10000;
+  HistogramMetric* metric =
+      metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+  // Add value beyond limit to make sure it's recorded accurately.
+  for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+  std::stringstream val;
+  metrics.ToPrometheus(true, &val);
+  AssertPrometheus(val, "histogram_metric",
+      "histogram_metric{le=\"0.2\"} 2.5\n"
+      "histogram_metric{le=\"0.5\"} 5\n"
+      "histogram_metric{le=\"0.7\"} 7.5\n"
+      "histogram_metric{le=\"0.9\"} 9\n"
+      "histogram_metric{le=\"0.95\"} 9.496\n"
+      "histogram_metric{le=\"0.999\"} 9.984\n"
+      "histogram_metric_max 10.001\n"
+      "histogram_metric_min 0\n"
+      "histogram_metric_count 10002",
+      "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramTimeNSPrometheus) {
+  MetricGroup metrics("HistoMetrics");
+  TMetricDef metric_def =
+      MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::TIME_NS);
+  constexpr int MAX_VALUE = 10000;
+  HistogramMetric* metric =
+      metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+  // Add value beyond limit to make sure it's recorded accurately.
+  for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+  std::stringstream val;
+  metrics.ToPrometheus(true, &val);
+  AssertPrometheus(val, "histogram_metric",
+      "histogram_metric{le=\"0.2\"} 2.5e-06\n"
+      "histogram_metric{le=\"0.5\"} 5e-06\n"
+      "histogram_metric{le=\"0.7\"} 7.5e-06\n"
+      "histogram_metric{le=\"0.9\"} 9e-06\n"
+      "histogram_metric{le=\"0.95\"} 9.496e-06\n"
+      "histogram_metric{le=\"0.999\"} 9.984e-06\n"
+      "histogram_metric_max 1.0001e-05\n"
+      "histogram_metric_min 0\n"
+      "histogram_metric_count 10002",
+      "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramTimeSPrometheus) {
+  MetricGroup metrics("HistoMetrics");
+  TMetricDef metric_def =
+      MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::TIME_S);
+  constexpr int MAX_VALUE = 10000;
+  HistogramMetric* metric =
+      metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+  // Add value beyond limit to make sure it's recorded accurately.
+  for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+  std::stringstream val;
+  metrics.ToPrometheus(true, &val);
+  AssertPrometheus(val, "histogram_metric",
+      "histogram_metric{le=\"0.2\"} 2500\n"
+      "histogram_metric{le=\"0.5\"} 5000\n"
+      "histogram_metric{le=\"0.7\"} 7500\n"
+      "histogram_metric{le=\"0.9\"} 9000\n"
+      "histogram_metric{le=\"0.95\"} 9496\n"
+      "histogram_metric{le=\"0.999\"} 9984\n"
+      "histogram_metric_max 10001\n"
+      "histogram_metric_min 0\n"
+      "histogram_metric_count 10002",
+      "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramBytesPrometheus) {
+  MetricGroup metrics("HistoMetrics");
+  TMetricDef metric_def =
+      MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::BYTES);
+  constexpr int MAX_VALUE = 10000;
+  HistogramMetric* metric =
+      metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+  // Add value beyond limit to make sure it's recorded accurately.
+  for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+  std::stringstream val;
+  metrics.ToPrometheus(true, &val);
+  AssertPrometheus(val, "histogram_metric",
+      "histogram_metric{le=\"0.2\"} 2500\n"
+      "histogram_metric{le=\"0.5\"} 5000\n"
+      "histogram_metric{le=\"0.7\"} 7500\n"
+      "histogram_metric{le=\"0.9\"} 9000\n"
+      "histogram_metric{le=\"0.95\"} 9496\n"
+      "histogram_metric{le=\"0.999\"} 9984\n"
+      "histogram_metric_max 10001\n"
+      "histogram_metric_min 0\n"
+      "histogram_metric_count 10002",
+      "", "histogram");
+}
+
+TEST_F(MetricsTest, HistogramUnitPrometheus) {
+  MetricGroup metrics("HistoMetrics");
+  TMetricDef metric_def =
+      MakeTMetricDef("histogram-metric", TMetricKind::HISTOGRAM, TUnit::UNIT);
+  constexpr int MAX_VALUE = 10000;
+  HistogramMetric* metric =
+      metrics.RegisterMetric(new HistogramMetric(metric_def, MAX_VALUE, 3));
+
+  // Add value beyond limit to make sure it's recorded accurately.
+  for (int i = 0; i <= MAX_VALUE + 1; ++i) metric->Update(i);
+
+  std::stringstream val;
+  metrics.ToPrometheus(true, &val);
+  AssertPrometheus(val, "histogram_metric",
+      "histogram_metric{le=\"0.2\"} 2500\n"
+      "histogram_metric{le=\"0.5\"} 5000\n"
+      "histogram_metric{le=\"0.7\"} 7500\n"
+      "histogram_metric{le=\"0.9\"} 9000\n"
+      "histogram_metric{le=\"0.95\"} 9496\n"
+      "histogram_metric{le=\"0.999\"} 9984\n"
+      "histogram_metric_max 10001\n"
+      "histogram_metric_min 0\n"
+      "histogram_metric_count 10002",
+      "", "histogram");
+}
+
+TEST_F(MetricsTest, MetricGroupPrometheus) {
+  std::stringstream exp_val;
+  exp_val << "# HELP counter1 description\n"
+             "# TYPE counter1 counter\n"
+             "counter1 2048\n"
+             "# HELP counter2 description\n"
+             "# TYPE counter2 counter\n"
+             "counter2 2048\n"
+             "# HELP child_counter description\n"
+             "# TYPE child_counter counter\n"
+             "child_counter 0\n";
+  MetricGroup metrics("PrometheusTest");
+  AddMetricDef("counter1", TMetricKind::COUNTER, TUnit::BYTES, "description");
+  AddMetricDef("counter2", TMetricKind::COUNTER, TUnit::BYTES, "description");
+  metrics.AddCounter("counter1", 2048);
+  metrics.AddCounter("counter2", 2048);
+
+  MetricGroup* find_result = metrics.FindChildGroup("child1");
+  EXPECT_EQ(find_result, reinterpret_cast<MetricGroup*>(NULL));
+
+  metrics.GetOrCreateChildGroup("child1");
+  AddMetricDef("child_counter", TMetricKind::COUNTER, TUnit::BYTES, "description");
+  metrics.GetOrCreateChildGroup("child2")->AddCounter("child_counter", 0);
+
+  IntCounter* counter = metrics.FindMetricForTesting<IntCounter>(string("child_counter"));
+  ASSERT_NE(counter, reinterpret_cast<IntCounter*>(NULL));
+
+  std::stringstream val;
+  metrics.ToPrometheus(true, &val);
+  EXPECT_EQ(val.str(), exp_val.str());
+}
+
+// test with null metrics
+TEST_F(MetricsTest, StatsMetricsNullPrometheus) {
+  MetricGroup nullMetrics("StatsMetrics");
+  AddMetricDef("", TMetricKind::STATS, TUnit::TIME_S);
+  std::stringstream stats_val;
+  nullMetrics.ToPrometheus(true, &stats_val);
+  EXPECT_EQ("", stats_val.str());
+
+  MetricGroup metrics("Metrics");
+  AddMetricDef("test", TMetricKind::STATS, TUnit::TIME_S);
+  metrics.ToPrometheus(true, &stats_val);
+  EXPECT_EQ("", stats_val.str());
+}
 }
 
diff --git a/be/src/util/metrics.cc b/be/src/util/metrics.cc
index 34e5018..2a9cbf6 100644
--- a/be/src/util/metrics.cc
+++ b/be/src/util/metrics.cc
@@ -31,6 +31,7 @@
 
 #include "common/names.h"
 
+using boost::algorithm::replace_all_copy;
 using namespace impala;
 using namespace rapidjson;
 using namespace strings;
@@ -91,6 +92,10 @@ Status MetricGroup::Init(Webserver* webserver) {
     Webserver::UrlCallback json_callback =
         bind<void>(mem_fn(&MetricGroup::TemplateCallback), this, _1, _2);
     webserver->RegisterUrlCallback("/metrics", "metrics.tmpl", json_callback, true);
+
+    Webserver::RawUrlCallback prometheus_callback =
+        bind<void>(mem_fn(&MetricGroup::PrometheusCallback), this, _1, _2);
+    webserver->RegisterUrlCallback("/metrics_prometheus", prometheus_callback);
   }
 
   return Status::OK();
@@ -172,6 +177,20 @@ void MetricGroup::TemplateCallback(const Webserver::WebRequest& req,
   }
 }
 
+void MetricGroup::PrometheusCallback(
+    const Webserver::WebRequest& req, stringstream* data) {
+  const auto& args = req.parsed_args;
+  Webserver::ArgumentMap::const_iterator metric_group = args.find("metric_group");
+
+  lock_guard<SpinLock> l(lock_);
+  // If no particular metric group is requested, render this metric group (and all its
+  // children).
+  if (metric_group == args.end()) {
+    Value container;
+    ToPrometheus(true, data);
+  }
+}
+
 void MetricGroup::ToJson(bool include_children, Document* document, Value* out_val) {
   Value metric_list(kArrayType);
   for (const MetricMap::value_type& m: metric_map_) {
@@ -197,6 +216,42 @@ void MetricGroup::ToJson(bool include_children, Document* document, Value* out_v
   *out_val = container;
 }
 
+void MetricGroup::ToPrometheus(bool include_children, stringstream* out_val) {
+  for (auto const& m : metric_map_) {
+    stringstream metric_value;
+    stringstream metric_kind;
+
+    // replace all occurrence of '.' and '-'
+    string name = replace_all_copy(m.first, ".", "_");
+    name = replace_all_copy(name, "-", "_");
+    TMetricKind::type metric_type =
+        m.second->ToPrometheus(name, &metric_value, &metric_kind);
+    if (metric_type == TMetricKind::SET || metric_type == TMetricKind::PROPERTY) {
+      // not supported in prometheus
+      continue;
+    }
+    *out_val << "# HELP " << name << " ";
+    *out_val << m.second->description_;
+    *out_val << "\n";
+    *out_val << metric_kind.str();
+    *out_val << "\n";
+    // append only if metric type is not stats, set or histogram
+    if (metric_type != TMetricKind::HISTOGRAM && metric_type != TMetricKind::STATS) {
+      *out_val << name;
+      *out_val << " ";
+    }
+    *out_val << metric_value.str();
+    *out_val << "\n";
+  }
+
+  if (include_children) {
+    Value child_groups(kArrayType);
+    for (const ChildGroupMap::value_type& child : children_) {
+      child.second->ToPrometheus(true, out_val);
+    }
+  }
+}
+
 MetricGroup* MetricGroup::GetOrCreateChildGroup(const string& name) {
   lock_guard<SpinLock> l(lock_);
   ChildGroupMap::iterator it = children_.find(name);
diff --git a/be/src/util/metrics.h b/be/src/util/metrics.h
index 80f899d..b319f35 100644
--- a/be/src/util/metrics.h
+++ b/be/src/util/metrics.h
@@ -96,6 +96,16 @@ class Metric {
   /// This method is kept for backwards-compatibility with CM5.0.
   virtual void ToLegacyJson(rapidjson::Document* document) = 0;
 
+  /// Builds a new Value into 'val', based on prometheus text exposition format
+  /// Details of this format can be found below:
+  /// https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/
+  //  exposition_formats.md
+  /// Should set the following fields where appropriate:
+  //
+  /// name, value, metric_kind
+  virtual TMetricKind::type ToPrometheus(
+      string name, std::stringstream* val, std::stringstream* metric_kind) = 0;
+
   /// Writes a human-readable representation of this metric to 'out'. This is the
   /// representation that is often displayed in webpages etc.
   virtual std::string ToHumanReadable() = 0;
@@ -103,6 +113,11 @@ class Metric {
   const std::string& key() const { return key_; }
   const std::string& description() const { return description_; }
 
+  bool IsUnitTimeBased(TUnit::type type) {
+    return (type == TUnit::type::TIME_MS || type == TUnit::type::TIME_US
+        || type == TUnit::type::TIME_NS);
+  }
+
  protected:
   /// Unique key identifying this metric
   const std::string key_;
@@ -120,6 +135,26 @@ class Metric {
   void AddStandardFields(rapidjson::Document* document, rapidjson::Value* val);
 };
 
+template <typename T>
+inline double ConvertToPrometheusSecs(const T& val, TUnit::type unit) {
+  double value = val;
+  if (unit == TUnit::type::TIME_MS) {
+    value /= 1000;
+  } else if (unit == TUnit::type::TIME_US) {
+    value /= 1000000;
+  } else if (unit == TUnit::type::TIME_NS) {
+    value /= 1000000000;
+  }
+  return value;
+}
+
+template <>
+inline double ConvertToPrometheusSecs<std::string>(
+    const std::string& val, TUnit::type unit) {
+  DCHECK(false) << "Should not be called for string metrics";
+  return 0.0;
+}
+
 /// A ScalarMetric has a value which is a simple primitive type: e.g. integers, strings
 /// and floats. It is parameterised not only by the type of its value, but by both the
 /// unit (e.g. bytes/s), drawn from TUnit and the 'kind' of the metric itself.
@@ -160,6 +195,30 @@ class ScalarMetric: public Metric {
     *val = container;
   }
 
+  virtual TMetricKind::type ToPrometheus(
+      std::string name, std::stringstream* val, std::stringstream* metric_kind) override {
+    std::string metric_type = PrintThriftEnum(kind()).c_str();
+    // prometheus doesn't support 'property', so ignore it
+    if (!metric_type.compare("property")) {
+      return TMetricKind::PROPERTY;
+    }
+
+    if (IsUnitTimeBased(unit())) {
+      // check if unit its 'TIME_MS','TIME_US' or 'TIME_NS' and convert it to seconds,
+      // this is because prometheus only supports time format in seconds
+      *val << ConvertToPrometheusSecs(GetValue(), unit());
+    } else {
+      *val << GetValue();
+    }
+
+    // convert metric type to lower case, that's what prometheus expects
+    std::transform(
+        metric_type.begin(), metric_type.end(), metric_type.begin(), ::tolower);
+
+    *metric_kind << "# TYPE " << name << " " << metric_type;
+    return kind();
+  }
+
   virtual std::string ToHumanReadable() override {
     return PrettyPrinter::Print(GetValue(), unit());
   }
@@ -440,6 +499,9 @@ class MetricGroup {
   void ToJson(bool include_children, rapidjson::Document* document,
       rapidjson::Value* out_val);
 
+  /// Converts this metric group (and optionally all of its children recursively) to JSON.
+  void ToPrometheus(bool include_children, std::stringstream* out_val);
+
   /// Creates or returns an already existing child metric group.
   MetricGroup* GetOrCreateChildGroup(const std::string& name);
 
@@ -476,6 +538,12 @@ class MetricGroup {
   void TemplateCallback(const Webserver::WebRequest& req,
       rapidjson::Document* document);
 
+  /// Webserver callback for /metricsPrometheus. Produces string in prometheus format,
+  /// each representing metric group, and each including a list of metrics, and a list
+  /// of immediate children.  If args contains a paramater 'metric', only the json for
+  /// that metric is returned.
+  void PrometheusCallback(const Webserver::WebRequest& req, std::stringstream* data);
+
   /// Legacy webpage callback for CM 5.0 and earlier. Produces a flattened map of (key,
   /// value) pairs for all metrics in this hierarchy.
   /// If args contains a paramater 'metric', only the json for that metric is returned.
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index 581df17..9dd3405 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -46,6 +46,7 @@ class TestWebPage(ImpalaTestSuite):
   ADMISSION_URL = "http://localhost:{0}/admission"
   RESET_RESOURCE_POOL_STATS_URL = "http://localhost:{0}/resource_pool_reset"
   BACKENDS_URL = "http://localhost:{0}/backends"
+  PROMETHEUS_METRICS_URL = "http://localhost:{0}/metrics_prometheus"
 
   # log4j changes do not apply to the statestore since it doesn't
   # have an embedded JVM. So we make two sets of ports to test the
@@ -564,3 +565,10 @@ class TestWebPage(ImpalaTestSuite):
     # Check the query id is in the content of the reponse.
     assert len(responses) == 1
     assert query_id in responses[0].text
+
+  def test_prometheus_metrics(self):
+    """Test to check prometheus metrics"""
+    resp = self.get_and_check_status(self.PROMETHEUS_METRICS_URL)
+    assert len(resp) == 3
+    # check if metric shows up
+    assert 'statestore_subscriber_heartbeat_interval_time_min' in resp[0].text