You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@apisix.apache.org by to...@apache.org on 2020/12/29 10:39:15 UTC
[apisix-ingress-controller] branch master updated: chore: add some
metrics (#143)
This is an automated email from the ASF dual-hosted git repository.
tokers pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix-ingress-controller.git
The following commit(s) were added to refs/heads/master by this push:
new e0658f4 chore: add some metrics (#143)
e0658f4 is described below
commit e0658f4fb8c54530dc9c6dd2f5d66633b2ae6bad
Author: Alex Zhang <zc...@gmail.com>
AuthorDate: Tue Dec 29 18:39:04 2020 +0800
chore: add some metrics (#143)
---
cmd/ingress/ingress.go | 13 ++++
go.mod | 1 +
pkg/metrics/prometheus.go | 159 +++++++++++++++++++++++++++++++++++++++++
pkg/metrics/prometheus_test.go | 140 ++++++++++++++++++++++++++++++++++++
4 files changed, 313 insertions(+)
diff --git a/cmd/ingress/ingress.go b/cmd/ingress/ingress.go
index fb41553..b633d3f 100644
--- a/cmd/ingress/ingress.go
+++ b/cmd/ingress/ingress.go
@@ -23,6 +23,7 @@ import (
"syscall"
"time"
+ "github.com/api7/ingress-controller/pkg/metrics"
api6Informers "github.com/gxthrj/apisix-ingress-types/pkg/client/informers/externalversions"
"github.com/spf13/cobra"
@@ -107,6 +108,18 @@ the apisix cluster and others are created`,
if err := kube.InitInformer(cfg); err != nil {
dief("failed to initialize kube informers: %s", err)
}
+
+ // TODO: logics about metrics should be moved inside ingress controller,
+ // after we refactoring it.
+ podName := os.Getenv("POD_NAME")
+ podNamespace := os.Getenv("POD_NAMESPACE")
+ if podNamespace == "" {
+ podNamespace = "default"
+ }
+
+ collector := metrics.NewPrometheusCollector(podName, podNamespace)
+ collector.ResetLeader(true)
+
kubeClientSet := kube.GetKubeClient()
apisixClientset := kube.GetApisixClient()
sharedInformerFactory := api6Informers.NewSharedInformerFactory(apisixClientset, 0)
diff --git a/go.mod b/go.mod
index f4195b0..906ec2b 100644
--- a/go.mod
+++ b/go.mod
@@ -14,6 +14,7 @@ require (
github.com/onsi/ginkgo v1.11.0 // indirect
github.com/onsi/gomega v1.8.1 // indirect
github.com/prometheus/client_golang v0.9.3
+ github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
github.com/sergi/go-diff v1.1.0 // indirect
github.com/spf13/cobra v1.1.1
github.com/stretchr/testify v1.4.0
diff --git a/pkg/metrics/prometheus.go b/pkg/metrics/prometheus.go
new file mode 100644
index 0000000..5f3fbfc
--- /dev/null
+++ b/pkg/metrics/prometheus.go
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package metrics
+
+import (
+ "strconv"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+const (
+ _namespace = "apisix_ingress_controller"
+)
+
+// Collector defines all metrics for ingress apisix.
+type Collector interface {
+ // ResetLeader changes the role of ingress apisix instance (leader, follower).
+ ResetLeader(bool)
+ // RecordAPISIXCode records a status code returned by APISIX with the resource
+ // type label.
+ RecordAPISIXCode(int, string)
+ // RecordAPISIXLatency records the latency for a round trip from ingress apisix
+ // to apisix.
+ RecordAPISIXLatency(time.Duration)
+ // IncrAPISIXRequest increases the number of requests to apisix.
+ IncrAPISIXRequest(string)
+}
+
+// collector contains necessary messages to collect Prometheus metrics.
+type collector struct {
+ isLeader prometheus.Gauge
+ apisixLatency prometheus.Summary
+ apisixRequests *prometheus.CounterVec
+ apisixCodes *prometheus.GaugeVec
+}
+
+// NewPrometheusCollectors creates the Prometheus metrics collector.
+// It also registers all internal metric collector to prometheus,
+// so do not call this function duplicately.
+func NewPrometheusCollector(podName, podNamespace string) Collector {
+ constLabels := prometheus.Labels{
+ "controller_pod": podName,
+ "controller_namespace": podNamespace,
+ }
+
+ collector := &collector{
+ isLeader: prometheus.NewGauge(
+ prometheus.GaugeOpts{
+ Name: "is_leader",
+ Namespace: _namespace,
+ Help: "Whether the role of controller instance is leader",
+ ConstLabels: constLabels,
+ },
+ ),
+ apisixCodes: prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "apisix_bad_status_codes",
+ Namespace: _namespace,
+ Help: "Whether the role of controller instance is leader",
+ ConstLabels: constLabels,
+ },
+ []string{"resource", "status_code"},
+ ),
+ apisixLatency: prometheus.NewSummary(
+ prometheus.SummaryOpts{
+ Namespace: _namespace,
+ Name: "apisix_request_latencies",
+ Help: "Request latencies with APISIX",
+ ConstLabels: constLabels,
+ },
+ ),
+ apisixRequests: prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: _namespace,
+ Name: "apisix_requests",
+ Help: "Number of requests to APISIX",
+ ConstLabels: constLabels,
+ },
+ []string{"resource"},
+ ),
+ }
+
+ // Since we use the DefaultRegisterer, in test cases, the metrics
+ // might be registered duplicately, unregister them before re register.
+ prometheus.Unregister(collector.isLeader)
+ prometheus.Unregister(collector.apisixCodes)
+ prometheus.Unregister(collector.apisixLatency)
+ prometheus.Unregister(collector.apisixRequests)
+
+ prometheus.MustRegister(
+ collector.isLeader,
+ collector.apisixCodes,
+ collector.apisixLatency,
+ collector.apisixRequests,
+ )
+
+ return collector
+}
+
+// ResetLeader resets the leader role.
+func (c *collector) ResetLeader(leader bool) {
+ if leader {
+ c.isLeader.Set(1)
+ } else {
+ c.isLeader.Set(0)
+ }
+}
+
+// RecordAPISIXCode records the status code (returned by APISIX)
+// for the specific resource (e.g. Route, Upstream and etc).
+func (c *collector) RecordAPISIXCode(code int, resource string) {
+ c.apisixCodes.With(prometheus.Labels{
+ "resource": resource,
+ "status_code": strconv.Itoa(code),
+ }).Inc()
+}
+
+// RecordAPISIXLatency records the latency for a complete round trip
+// from controller to APISIX.
+func (c *collector) RecordAPISIXLatency(latency time.Duration) {
+ c.apisixLatency.Observe(float64(latency.Nanoseconds()))
+}
+
+// IncrAPISIXRequest increases the number of requests for specific
+// resource to APISIX.
+func (c *collector) IncrAPISIXRequest(resource string) {
+ c.apisixRequests.WithLabelValues(resource).Inc()
+}
+
+// Collect collects the prometheus.Collect.
+func (c *collector) Collect(ch chan<- prometheus.Metric) {
+ c.isLeader.Collect(ch)
+ c.apisixLatency.Collect(ch)
+ c.apisixRequests.Collect(ch)
+ c.apisixLatency.Collect(ch)
+ c.apisixCodes.Collect(ch)
+}
+
+// Describe describes the prometheus.Describe.
+func (c *collector) Describe(ch chan<- *prometheus.Desc) {
+ c.isLeader.Describe(ch)
+ c.apisixLatency.Describe(ch)
+ c.apisixRequests.Describe(ch)
+ c.apisixLatency.Describe(ch)
+ c.apisixCodes.Describe(ch)
+}
diff --git a/pkg/metrics/prometheus_test.go b/pkg/metrics/prometheus_test.go
new file mode 100644
index 0000000..b8d4192
--- /dev/null
+++ b/pkg/metrics/prometheus_test.go
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package metrics
+
+import (
+ "testing"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ io_prometheus_client "github.com/prometheus/client_model/go"
+ "github.com/stretchr/testify/assert"
+)
+
+func apisixBadStatusCodesTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(*testing.T) {
+ return func(t *testing.T) {
+ metric := findMetric("apisix_ingress_controller_apisix_bad_status_codes", metrics)
+ assert.NotNil(t, metric)
+ assert.Equal(t, metric.Type.String(), "GAUGE")
+ m := metric.GetMetric()
+ assert.Len(t, m, 2)
+ assert.Equal(t, *m[0].Gauge.Value, float64(1))
+ assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+ assert.Equal(t, *m[0].Label[0].Value, "default")
+ assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+ assert.Equal(t, *m[0].Label[1].Value, "test")
+ assert.Equal(t, *m[0].Label[2].Name, "resource")
+ assert.Equal(t, *m[0].Label[2].Value, "route")
+ assert.Equal(t, *m[0].Label[3].Name, "status_code")
+ assert.Equal(t, *m[0].Label[3].Value, "404")
+
+ assert.Equal(t, *m[1].Gauge.Value, float64(1))
+ assert.Equal(t, *m[1].Label[0].Name, "controller_namespace")
+ assert.Equal(t, *m[1].Label[0].Value, "default")
+ assert.Equal(t, *m[1].Label[1].Name, "controller_pod")
+ assert.Equal(t, *m[1].Label[1].Value, "test")
+ assert.Equal(t, *m[1].Label[2].Name, "resource")
+ assert.Equal(t, *m[1].Label[2].Value, "upstream")
+ assert.Equal(t, *m[1].Label[3].Name, "status_code")
+ assert.Equal(t, *m[1].Label[3].Value, "500")
+ }
+}
+
+func isLeaderTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(*testing.T) {
+ return func(t *testing.T) {
+ metric := findMetric("apisix_ingress_controller_is_leader", metrics)
+ assert.NotNil(t, metric)
+ assert.Equal(t, metric.Type.String(), "GAUGE")
+ m := metric.GetMetric()
+ assert.Len(t, m, 1)
+
+ assert.Equal(t, *m[0].Gauge.Value, float64(1))
+ assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+ assert.Equal(t, *m[0].Label[0].Value, "default")
+ assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+ assert.Equal(t, *m[0].Label[1].Value, "test")
+ }
+}
+
+func apisixLatencyTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(t *testing.T) {
+ return func(t *testing.T) {
+ metric := findMetric("apisix_ingress_controller_apisix_request_latencies", metrics)
+ assert.NotNil(t, metric)
+ assert.Equal(t, metric.Type.String(), "SUMMARY")
+ m := metric.GetMetric()
+ assert.Len(t, m, 1)
+
+ assert.Equal(t, *m[0].Summary.SampleCount, uint64(1))
+ assert.Equal(t, *m[0].Summary.SampleSum, float64((500 * time.Millisecond).Nanoseconds()))
+ assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+ assert.Equal(t, *m[0].Label[0].Value, "default")
+ assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+ assert.Equal(t, *m[0].Label[1].Value, "test")
+ }
+}
+
+func apisixRequestTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(t *testing.T) {
+ return func(t *testing.T) {
+ metric := findMetric("apisix_ingress_controller_apisix_requests", metrics)
+ assert.NotNil(t, metric)
+ assert.Equal(t, metric.Type.String(), "COUNTER")
+ m := metric.GetMetric()
+ assert.Len(t, m, 2)
+
+ assert.Equal(t, *m[0].Counter.Value, float64(2))
+ assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+ assert.Equal(t, *m[0].Label[0].Value, "default")
+ assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+ assert.Equal(t, *m[0].Label[1].Value, "test")
+ assert.Equal(t, *m[0].Label[2].Name, "resource")
+ assert.Equal(t, *m[0].Label[2].Value, "route")
+
+ assert.Equal(t, *m[1].Counter.Value, float64(1))
+ assert.Equal(t, *m[1].Label[0].Name, "controller_namespace")
+ assert.Equal(t, *m[1].Label[0].Value, "default")
+ assert.Equal(t, *m[1].Label[1].Name, "controller_pod")
+ assert.Equal(t, *m[1].Label[1].Value, "test")
+ assert.Equal(t, *m[1].Label[2].Name, "resource")
+ assert.Equal(t, *m[1].Label[2].Value, "upstream")
+ }
+}
+
+func TestPrometheusCollector(t *testing.T) {
+ c := NewPrometheusCollector("test", "default")
+ c.ResetLeader(true)
+ c.RecordAPISIXCode(404, "route")
+ c.RecordAPISIXCode(500, "upstream")
+ c.RecordAPISIXLatency(500 * time.Millisecond)
+ c.IncrAPISIXRequest("route")
+ c.IncrAPISIXRequest("route")
+ c.IncrAPISIXRequest("upstream")
+
+ metrics, err := prometheus.DefaultGatherer.Gather()
+ assert.Nil(t, err)
+
+ t.Run("apisix_bad_status_codes", apisixBadStatusCodesTestHandler(t, metrics))
+ t.Run("is_leader", isLeaderTestHandler(t, metrics))
+ t.Run("apisix_request_latencies", apisixLatencyTestHandler(t, metrics))
+ t.Run("apisix_requests", apisixRequestTestHandler(t, metrics))
+}
+
+func findMetric(name string, metrics []*io_prometheus_client.MetricFamily) *io_prometheus_client.MetricFamily {
+ for _, m := range metrics {
+ if name == *m.Name {
+ return m
+ }
+ }
+ return nil
+}