You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@apisix.apache.org by to...@apache.org on 2020/12/29 10:39:15 UTC

[apisix-ingress-controller] branch master updated: chore: add some metrics (#143)

This is an automated email from the ASF dual-hosted git repository.

tokers pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix-ingress-controller.git


The following commit(s) were added to refs/heads/master by this push:
     new e0658f4  chore: add some metrics (#143)
e0658f4 is described below

commit e0658f4fb8c54530dc9c6dd2f5d66633b2ae6bad
Author: Alex Zhang <zc...@gmail.com>
AuthorDate: Tue Dec 29 18:39:04 2020 +0800

    chore: add some metrics (#143)
---
 cmd/ingress/ingress.go         |  13 ++++
 go.mod                         |   1 +
 pkg/metrics/prometheus.go      | 159 +++++++++++++++++++++++++++++++++++++++++
 pkg/metrics/prometheus_test.go | 140 ++++++++++++++++++++++++++++++++++++
 4 files changed, 313 insertions(+)

diff --git a/cmd/ingress/ingress.go b/cmd/ingress/ingress.go
index fb41553..b633d3f 100644
--- a/cmd/ingress/ingress.go
+++ b/cmd/ingress/ingress.go
@@ -23,6 +23,7 @@ import (
 	"syscall"
 	"time"
 
+	"github.com/api7/ingress-controller/pkg/metrics"
 	api6Informers "github.com/gxthrj/apisix-ingress-types/pkg/client/informers/externalversions"
 	"github.com/spf13/cobra"
 
@@ -107,6 +108,18 @@ the apisix cluster and others are created`,
 			if err := kube.InitInformer(cfg); err != nil {
 				dief("failed to initialize kube informers: %s", err)
 			}
+
+			// TODO: logics about metrics should be moved inside ingress controller,
+			// after we  refactoring it.
+			podName := os.Getenv("POD_NAME")
+			podNamespace := os.Getenv("POD_NAMESPACE")
+			if podNamespace == "" {
+				podNamespace = "default"
+			}
+
+			collector := metrics.NewPrometheusCollector(podName, podNamespace)
+			collector.ResetLeader(true)
+
 			kubeClientSet := kube.GetKubeClient()
 			apisixClientset := kube.GetApisixClient()
 			sharedInformerFactory := api6Informers.NewSharedInformerFactory(apisixClientset, 0)
diff --git a/go.mod b/go.mod
index f4195b0..906ec2b 100644
--- a/go.mod
+++ b/go.mod
@@ -14,6 +14,7 @@ require (
 	github.com/onsi/ginkgo v1.11.0 // indirect
 	github.com/onsi/gomega v1.8.1 // indirect
 	github.com/prometheus/client_golang v0.9.3
+	github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
 	github.com/sergi/go-diff v1.1.0 // indirect
 	github.com/spf13/cobra v1.1.1
 	github.com/stretchr/testify v1.4.0
diff --git a/pkg/metrics/prometheus.go b/pkg/metrics/prometheus.go
new file mode 100644
index 0000000..5f3fbfc
--- /dev/null
+++ b/pkg/metrics/prometheus.go
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package metrics
+
+import (
+	"strconv"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+const (
+	_namespace = "apisix_ingress_controller"
+)
+
+// Collector defines all metrics for ingress apisix.
+type Collector interface {
+	// ResetLeader changes the role of ingress apisix instance (leader, follower).
+	ResetLeader(bool)
+	// RecordAPISIXCode records a status code returned by APISIX with the resource
+	// type label.
+	RecordAPISIXCode(int, string)
+	// RecordAPISIXLatency records the latency for a round trip from ingress apisix
+	// to apisix.
+	RecordAPISIXLatency(time.Duration)
+	// IncrAPISIXRequest increases the number of requests to apisix.
+	IncrAPISIXRequest(string)
+}
+
+// collector contains necessary messages to collect Prometheus metrics.
+type collector struct {
+	isLeader       prometheus.Gauge
+	apisixLatency  prometheus.Summary
+	apisixRequests *prometheus.CounterVec
+	apisixCodes    *prometheus.GaugeVec
+}
+
+// NewPrometheusCollectors creates the Prometheus metrics collector.
+// It also registers all internal metric collector to prometheus,
+// so do not call this function duplicately.
+func NewPrometheusCollector(podName, podNamespace string) Collector {
+	constLabels := prometheus.Labels{
+		"controller_pod":       podName,
+		"controller_namespace": podNamespace,
+	}
+
+	collector := &collector{
+		isLeader: prometheus.NewGauge(
+			prometheus.GaugeOpts{
+				Name:        "is_leader",
+				Namespace:   _namespace,
+				Help:        "Whether the role of controller instance is leader",
+				ConstLabels: constLabels,
+			},
+		),
+		apisixCodes: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Name:        "apisix_bad_status_codes",
+				Namespace:   _namespace,
+				Help:        "Whether the role of controller instance is leader",
+				ConstLabels: constLabels,
+			},
+			[]string{"resource", "status_code"},
+		),
+		apisixLatency: prometheus.NewSummary(
+			prometheus.SummaryOpts{
+				Namespace:   _namespace,
+				Name:        "apisix_request_latencies",
+				Help:        "Request latencies with APISIX",
+				ConstLabels: constLabels,
+			},
+		),
+		apisixRequests: prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace:   _namespace,
+				Name:        "apisix_requests",
+				Help:        "Number of requests to APISIX",
+				ConstLabels: constLabels,
+			},
+			[]string{"resource"},
+		),
+	}
+
+	// Since we use the DefaultRegisterer, in test cases, the metrics
+	// might be registered duplicately, unregister them before re register.
+	prometheus.Unregister(collector.isLeader)
+	prometheus.Unregister(collector.apisixCodes)
+	prometheus.Unregister(collector.apisixLatency)
+	prometheus.Unregister(collector.apisixRequests)
+
+	prometheus.MustRegister(
+		collector.isLeader,
+		collector.apisixCodes,
+		collector.apisixLatency,
+		collector.apisixRequests,
+	)
+
+	return collector
+}
+
+// ResetLeader resets the leader role.
+func (c *collector) ResetLeader(leader bool) {
+	if leader {
+		c.isLeader.Set(1)
+	} else {
+		c.isLeader.Set(0)
+	}
+}
+
+// RecordAPISIXCode records the status code (returned by APISIX)
+// for the specific resource (e.g. Route, Upstream and etc).
+func (c *collector) RecordAPISIXCode(code int, resource string) {
+	c.apisixCodes.With(prometheus.Labels{
+		"resource":    resource,
+		"status_code": strconv.Itoa(code),
+	}).Inc()
+}
+
+// RecordAPISIXLatency records the latency for a complete round trip
+// from controller to APISIX.
+func (c *collector) RecordAPISIXLatency(latency time.Duration) {
+	c.apisixLatency.Observe(float64(latency.Nanoseconds()))
+}
+
+// IncrAPISIXRequest increases the number of requests for specific
+// resource to APISIX.
+func (c *collector) IncrAPISIXRequest(resource string) {
+	c.apisixRequests.WithLabelValues(resource).Inc()
+}
+
+// Collect collects the prometheus.Collect.
+func (c *collector) Collect(ch chan<- prometheus.Metric) {
+	c.isLeader.Collect(ch)
+	c.apisixLatency.Collect(ch)
+	c.apisixRequests.Collect(ch)
+	c.apisixLatency.Collect(ch)
+	c.apisixCodes.Collect(ch)
+}
+
+// Describe describes the prometheus.Describe.
+func (c *collector) Describe(ch chan<- *prometheus.Desc) {
+	c.isLeader.Describe(ch)
+	c.apisixLatency.Describe(ch)
+	c.apisixRequests.Describe(ch)
+	c.apisixLatency.Describe(ch)
+	c.apisixCodes.Describe(ch)
+}
diff --git a/pkg/metrics/prometheus_test.go b/pkg/metrics/prometheus_test.go
new file mode 100644
index 0000000..b8d4192
--- /dev/null
+++ b/pkg/metrics/prometheus_test.go
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package metrics
+
+import (
+	"testing"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	io_prometheus_client "github.com/prometheus/client_model/go"
+	"github.com/stretchr/testify/assert"
+)
+
+func apisixBadStatusCodesTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(*testing.T) {
+	return func(t *testing.T) {
+		metric := findMetric("apisix_ingress_controller_apisix_bad_status_codes", metrics)
+		assert.NotNil(t, metric)
+		assert.Equal(t, metric.Type.String(), "GAUGE")
+		m := metric.GetMetric()
+		assert.Len(t, m, 2)
+		assert.Equal(t, *m[0].Gauge.Value, float64(1))
+		assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+		assert.Equal(t, *m[0].Label[0].Value, "default")
+		assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+		assert.Equal(t, *m[0].Label[1].Value, "test")
+		assert.Equal(t, *m[0].Label[2].Name, "resource")
+		assert.Equal(t, *m[0].Label[2].Value, "route")
+		assert.Equal(t, *m[0].Label[3].Name, "status_code")
+		assert.Equal(t, *m[0].Label[3].Value, "404")
+
+		assert.Equal(t, *m[1].Gauge.Value, float64(1))
+		assert.Equal(t, *m[1].Label[0].Name, "controller_namespace")
+		assert.Equal(t, *m[1].Label[0].Value, "default")
+		assert.Equal(t, *m[1].Label[1].Name, "controller_pod")
+		assert.Equal(t, *m[1].Label[1].Value, "test")
+		assert.Equal(t, *m[1].Label[2].Name, "resource")
+		assert.Equal(t, *m[1].Label[2].Value, "upstream")
+		assert.Equal(t, *m[1].Label[3].Name, "status_code")
+		assert.Equal(t, *m[1].Label[3].Value, "500")
+	}
+}
+
+func isLeaderTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(*testing.T) {
+	return func(t *testing.T) {
+		metric := findMetric("apisix_ingress_controller_is_leader", metrics)
+		assert.NotNil(t, metric)
+		assert.Equal(t, metric.Type.String(), "GAUGE")
+		m := metric.GetMetric()
+		assert.Len(t, m, 1)
+
+		assert.Equal(t, *m[0].Gauge.Value, float64(1))
+		assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+		assert.Equal(t, *m[0].Label[0].Value, "default")
+		assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+		assert.Equal(t, *m[0].Label[1].Value, "test")
+	}
+}
+
+func apisixLatencyTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(t *testing.T) {
+	return func(t *testing.T) {
+		metric := findMetric("apisix_ingress_controller_apisix_request_latencies", metrics)
+		assert.NotNil(t, metric)
+		assert.Equal(t, metric.Type.String(), "SUMMARY")
+		m := metric.GetMetric()
+		assert.Len(t, m, 1)
+
+		assert.Equal(t, *m[0].Summary.SampleCount, uint64(1))
+		assert.Equal(t, *m[0].Summary.SampleSum, float64((500 * time.Millisecond).Nanoseconds()))
+		assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+		assert.Equal(t, *m[0].Label[0].Value, "default")
+		assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+		assert.Equal(t, *m[0].Label[1].Value, "test")
+	}
+}
+
+func apisixRequestTestHandler(t *testing.T, metrics []*io_prometheus_client.MetricFamily) func(t *testing.T) {
+	return func(t *testing.T) {
+		metric := findMetric("apisix_ingress_controller_apisix_requests", metrics)
+		assert.NotNil(t, metric)
+		assert.Equal(t, metric.Type.String(), "COUNTER")
+		m := metric.GetMetric()
+		assert.Len(t, m, 2)
+
+		assert.Equal(t, *m[0].Counter.Value, float64(2))
+		assert.Equal(t, *m[0].Label[0].Name, "controller_namespace")
+		assert.Equal(t, *m[0].Label[0].Value, "default")
+		assert.Equal(t, *m[0].Label[1].Name, "controller_pod")
+		assert.Equal(t, *m[0].Label[1].Value, "test")
+		assert.Equal(t, *m[0].Label[2].Name, "resource")
+		assert.Equal(t, *m[0].Label[2].Value, "route")
+
+		assert.Equal(t, *m[1].Counter.Value, float64(1))
+		assert.Equal(t, *m[1].Label[0].Name, "controller_namespace")
+		assert.Equal(t, *m[1].Label[0].Value, "default")
+		assert.Equal(t, *m[1].Label[1].Name, "controller_pod")
+		assert.Equal(t, *m[1].Label[1].Value, "test")
+		assert.Equal(t, *m[1].Label[2].Name, "resource")
+		assert.Equal(t, *m[1].Label[2].Value, "upstream")
+	}
+}
+
+func TestPrometheusCollector(t *testing.T) {
+	c := NewPrometheusCollector("test", "default")
+	c.ResetLeader(true)
+	c.RecordAPISIXCode(404, "route")
+	c.RecordAPISIXCode(500, "upstream")
+	c.RecordAPISIXLatency(500 * time.Millisecond)
+	c.IncrAPISIXRequest("route")
+	c.IncrAPISIXRequest("route")
+	c.IncrAPISIXRequest("upstream")
+
+	metrics, err := prometheus.DefaultGatherer.Gather()
+	assert.Nil(t, err)
+
+	t.Run("apisix_bad_status_codes", apisixBadStatusCodesTestHandler(t, metrics))
+	t.Run("is_leader", isLeaderTestHandler(t, metrics))
+	t.Run("apisix_request_latencies", apisixLatencyTestHandler(t, metrics))
+	t.Run("apisix_requests", apisixRequestTestHandler(t, metrics))
+}
+
+func findMetric(name string, metrics []*io_prometheus_client.MetricFamily) *io_prometheus_client.MetricFamily {
+	for _, m := range metrics {
+		if name == *m.Name {
+			return m
+		}
+	}
+	return nil
+}