You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by as...@apache.org on 2020/11/10 10:20:14 UTC

[camel-k] 14/25: chore: Add build attempts per build histogram metric

This is an automated email from the ASF dual-hosted git repository.

astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git

commit 62bdfa464ec746bbe3092f9dbc6b39c2b2750305
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 16:39:58 2020 +0200

    chore: Add build attempts per build histogram metric
---
 pkg/controller/build/metrics.go          | 14 ++++++++------
 pkg/controller/build/monitor_pod.go      | 19 +++++++++++++++----
 pkg/controller/build/monitor_routine.go  |  1 -
 pkg/controller/build/recovery.go         | 12 +++++++++++-
 pkg/controller/build/schedule_routine.go | 20 ++++++++++++++++----
 5 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index 64286ce..59961cc 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -28,14 +28,16 @@ import (
 const buildResultLabel = "result"
 
 var (
-	buildAttempt = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Name: "camel_k_build_attempt",
-			Help: "Camel K build attempt",
+	buildAttempts = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Name:    "camel_k_build_attempts",
+			Help:    "Camel K build attempts",
+			Buckets: []float64{1, 2, 3, 4, 5},
 		},
 		[]string{
 			buildResultLabel,
-		})
+		},
+	)
 
 	buildDuration = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
@@ -72,5 +74,5 @@ var (
 
 func init() {
 	// Register custom metrics with the global prometheus registry
-	metrics.Registry.MustRegister(buildAttempt, buildDuration, queueDuration)
+	metrics.Registry.MustRegister(buildAttempts, buildDuration, queueDuration)
 }
diff --git a/pkg/controller/build/monitor_pod.go b/pkg/controller/build/monitor_pod.go
index cead6f4..a24673f 100644
--- a/pkg/controller/build/monitor_pod.go
+++ b/pkg/controller/build/monitor_pod.go
@@ -69,9 +69,15 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
 		build.Status.Phase = v1.BuildPhaseSucceeded
 		duration := metav1.Now().Sub(build.Status.StartedAt.Time)
 		build.Status.Duration = duration.String()
+
 		// Account for the Build metrics
-		buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
-		buildDuration.WithLabelValues(build.Status.Phase.String()).Observe(duration.Seconds())
+		buildAttempts.
+			WithLabelValues(build.Status.Phase.String()).
+			Observe(float64(getBuildAttemptsFor(build)))
+		buildDuration.
+			WithLabelValues(build.Status.Phase.String()).
+			Observe(duration.Seconds())
+
 		for _, task := range build.Spec.Tasks {
 			if task.Image != nil {
 				build.Status.Image = task.Image.BuiltImage
@@ -90,9 +96,14 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
 		build.Status.Phase = v1.BuildPhaseFailed
 		duration := metav1.Now().Sub(build.Status.StartedAt.Time)
 		build.Status.Duration = duration.String()
+
 		// Account for the Build metrics
-		buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
-		buildDuration.WithLabelValues(build.Status.Phase.String()).Observe(duration.Seconds())
+		buildAttempts.
+			WithLabelValues(build.Status.Phase.String()).
+			Observe(float64(getBuildAttemptsFor(build)))
+		buildDuration.
+			WithLabelValues(build.Status.Phase.String()).
+			Observe(duration.Seconds())
 	}
 
 	return build, nil
diff --git a/pkg/controller/build/monitor_routine.go b/pkg/controller/build/monitor_routine.go
index 8d15aa9..89a47bd 100644
--- a/pkg/controller/build/monitor_routine.go
+++ b/pkg/controller/build/monitor_routine.go
@@ -53,7 +53,6 @@ func (action *monitorRoutineAction) Handle(ctx context.Context, build *v1.Build)
 		// and recover the build if it's missing. This can happen when the operator
 		// stops abruptly and restarts or the build status update fails.
 		build.Status.Phase = v1.BuildPhaseFailed
-		buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
 		return build, nil
 	}
 
diff --git a/pkg/controller/build/recovery.go b/pkg/controller/build/recovery.go
index 5320809..5b44662 100644
--- a/pkg/controller/build/recovery.go
+++ b/pkg/controller/build/recovery.go
@@ -69,7 +69,9 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
 
 	if build.Status.Failure.Recovery.Attempt >= build.Status.Failure.Recovery.AttemptMax {
 		build.Status.Phase = v1.BuildPhaseError
-		buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
+		buildAttempts.
+			WithLabelValues(build.Status.Phase.String()).
+			Observe(float64(getBuildAttemptsFor(build)))
 		return build, nil
 	}
 
@@ -96,3 +98,11 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
 
 	return build, nil
 }
+
+func getBuildAttemptsFor(build *v1.Build) int {
+	attempts := 1
+	if build.Status.Failure != nil {
+		attempts += build.Status.Failure.Recovery.Attempt
+	}
+	return attempts
+}
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index eccddbf..2a87b20 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -128,9 +128,15 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
 					task.GetName()),
 				Duration: duration.String(),
 			}
+
 			// Account for the Build metrics
-			buildAttempt.WithLabelValues(status.Phase.String()).Inc()
-			buildDuration.WithLabelValues(status.Phase.String()).Observe(duration.Seconds())
+			buildAttempts.
+				WithLabelValues(status.Phase.String()).
+				Observe(float64(getBuildAttemptsFor(build)))
+			buildDuration.
+				WithLabelValues(status.Phase.String()).
+				Observe(duration.Seconds())
+
 			_ = action.updateBuildStatus(ctx, build, status)
 			break
 		}
@@ -144,10 +150,16 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
 		if lastTask || taskFailed {
 			duration := metav1.Now().Sub(build.Status.StartedAt.Time)
 			status.Duration = duration.String()
+
 			// Account for the Build metrics
-			buildAttempt.WithLabelValues(status.Phase.String()).Inc()
-			buildDuration.WithLabelValues(status.Phase.String()).Observe(duration.Seconds())
+			buildAttempts.
+				WithLabelValues(status.Phase.String()).
+				Observe(float64(getBuildAttemptsFor(build)))
+			buildDuration.
+				WithLabelValues(status.Phase.String()).
+				Observe(duration.Seconds())
 		}
+
 		err := action.updateBuildStatus(ctx, build, status)
 		if err != nil || taskFailed {
 			break