You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by as...@apache.org on 2020/11/10 10:20:00 UTC
[camel-k] branch master updated (c42b640 -> 6afbdc2)
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git.
from c42b640 Update .asf.yaml
new c5b45f2 feat: Add build duration histogram metric
new 3159bb7 feat: Add build queue duration histogram metric
new 2572ff7 feat: Add build attempt counter metric
new 5a6dcef chore: Add BuildPhase.String() helper method
new 0bb2ade feat: Add Camel K controllers reconcile loop histgram metric
new 01eb712 chore: Declare metrics container port
new dcc4347 feat(cli): Add an option to install a default PodMonitor resource
new 6266dfb feat: Install alerting rule for reconciliation request duration SLO
new 2456f03 feat: Install alerting rule for reconciliation request failure SLO
new b37d644 chore: Reformat reconciliation duration alerting rule
new 71c947a feat: Add default alerting rules for build duration SLOs
new 8c53d37 feat: Add default alerting rule for build failure SLO
new ae92b0e feat: Add default alerting rules for build queue duration SLOs
new 62bdfa4 chore: Add build attempts per build histogram metric
new b2313c5 chore: Factorise metrics observations
new e40b5f1 chore: Rename build recovery attempts metric
new 45ba6a0 feat: Add default alerting rule for build error SLO
new 2998f3a fix: Use attempt time to compute queuing duration on recovery
new 4cca10c feat: Add time to first integration readiness metric
new 05a11fb chore: Factorize integration re-initialization logic
new f70182e feat: Add an option to configure the operator metrics endpoint port
new 5142dc9 feat: Add an install option to configure the operator metrics endpoint port
new ceee5e3 feat: Enable operator liveness health check
new 35cf0cc feat: Add an install option to configure the health endpoint port
new 6afbdc2 chore: Rebuild resources
The 25 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
deploy/crd-integration.yaml | 9 ++
...el-k.v1.3.0-snapshot.clusterserviceversion.yaml | 9 ++
.../integrations.camel.apache.org.crd.yaml | 9 ++
deploy/operator-deployment.yaml | 15 ++-
deploy/operator-pod-monitor.yaml | 15 +++
deploy/operator-prometheus-rule.yaml | 128 +++++++++++++++++++++
deploy/resources.go | 24 +++-
go.mod | 1 +
go.sum | 7 ++
helm/camel-k/crds/crd-integration.yaml | 9 ++
helm/camel-k/templates/operator.yaml | 11 +-
pkg/apis/camel/v1/build_types_support.go | 4 +
pkg/apis/camel/v1/integration_types.go | 4 +
pkg/apis/camel/v1/integration_types_support.go | 64 +++++++----
pkg/apis/camel/v1/zz_generated.deepcopy.go | 8 ++
pkg/client/fastmapper.go | 1 +
pkg/cmd/install.go | 39 +++++--
pkg/cmd/operator.go | 35 ++++--
pkg/cmd/operator/operator.go | 21 +++-
pkg/cmd/root.go | 7 +-
pkg/controller/build/build_controller.go | 37 +++---
pkg/controller/build/metrics.go | 112 ++++++++++++++++++
pkg/controller/build/monitor_pod.go | 13 ++-
pkg/controller/build/monitor_routine.go | 1 -
pkg/controller/build/schedule_pod.go | 4 +
pkg/controller/build/schedule_routine.go | 30 +++--
pkg/controller/integration/error.go | 2 +-
pkg/controller/integration/initialize.go | 6 +
.../integration/integration_controller.go | 41 ++++---
.../s2i.go => controller/integration/metrics.go} | 42 ++++---
pkg/controller/integration/monitor.go | 25 ++--
pkg/controller/integration/platform_setup.go | 6 +-
.../integrationkit/integrationkit_controller.go | 47 ++++----
.../integrationplatform_controller.go | 33 ++++--
pkg/controller/kamelet/kamelet_controller.go | 39 ++++---
.../kameletbinding/kamelet_binding_controller.go | 35 +++---
pkg/install/operator.go | 51 +++++++-
pkg/util/monitoring/controller.go | 128 +++++++++++++++++++++
pkg/util/monitoring/{register.go => timer.go} | 39 +++----
39 files changed, 893 insertions(+), 218 deletions(-)
create mode 100644 deploy/operator-pod-monitor.yaml
create mode 100644 deploy/operator-prometheus-rule.yaml
create mode 100644 pkg/controller/build/metrics.go
copy pkg/{builder/s2i/s2i.go => controller/integration/metrics.go} (55%)
create mode 100644 pkg/util/monitoring/controller.go
copy pkg/util/monitoring/{register.go => timer.go} (55%)
[camel-k] 20/25: chore: Factorize integration re-initialization
logic
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 05a11fb2d9902439cf37880737cee76abad6eb46
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 27 17:12:56 2020 +0100
chore: Factorize integration re-initialization logic
---
pkg/apis/camel/v1/integration_types_support.go | 12 ++++++++++++
pkg/controller/integration/error.go | 3 +--
pkg/controller/integration/monitor.go | 9 ++-------
pkg/controller/integration/platform_setup.go | 6 +++---
4 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/pkg/apis/camel/v1/integration_types_support.go b/pkg/apis/camel/v1/integration_types_support.go
index 591afda..12479ae 100644
--- a/pkg/apis/camel/v1/integration_types_support.go
+++ b/pkg/apis/camel/v1/integration_types_support.go
@@ -52,6 +52,18 @@ func NewIntegrationList() IntegrationList {
}
// Sources return a new slice containing all the sources associated to the integration
+func (in *Integration) Initialize() {
+ profile := in.Status.Profile
+ if in.Spec.Profile != "" {
+ profile = in.Spec.Profile
+ }
+ in.Status = IntegrationStatus{
+ Phase: IntegrationPhaseInitialization,
+ Profile: profile,
+ }
+}
+
+// Sources return a new slice containing all the sources associated to the integration
func (in *Integration) Sources() []SourceSpec {
sources := make([]SourceSpec, 0, len(in.Spec.Sources)+len(in.Status.GeneratedSources))
sources = append(sources, in.Spec.Sources...)
diff --git a/pkg/controller/integration/error.go b/pkg/controller/integration/error.go
index 9e3ad90..5f19458 100644
--- a/pkg/controller/integration/error.go
+++ b/pkg/controller/integration/error.go
@@ -50,9 +50,8 @@ func (action *errorAction) Handle(ctx context.Context, integration *v1.Integrati
if hash != integration.Status.Digest {
action.L.Info("Integration needs a rebuild")
+ integration.Initialize()
integration.Status.Digest = hash
- integration.Status.Phase = v1.IntegrationPhaseInitialization
- integration.Status.InitializationTimestamp = nil
return integration, nil
}
diff --git a/pkg/controller/integration/monitor.go b/pkg/controller/integration/monitor.go
index 654d78b..dbfcbd6 100644
--- a/pkg/controller/integration/monitor.go
+++ b/pkg/controller/integration/monitor.go
@@ -22,11 +22,11 @@ import (
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
+
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/trait"
- "github.com/apache/camel-k/pkg/util/defaults"
"github.com/apache/camel-k/pkg/util/digest"
"github.com/apache/camel-k/pkg/util/kubernetes"
)
@@ -57,13 +57,8 @@ func (action *monitorAction) Handle(ctx context.Context, integration *v1.Integra
if hash != integration.Status.Digest {
action.L.Info("Integration needs a rebuild")
+ integration.Initialize()
integration.Status.Digest = hash
- integration.Status.Phase = v1.IntegrationPhaseInitialization
- if integration.Spec.Profile != "" {
- integration.Status.Profile = integration.Spec.Profile
- }
- integration.Status.Version = defaults.Version
- integration.Status.InitializationTimestamp = nil
return integration, nil
}
diff --git a/pkg/controller/integration/platform_setup.go b/pkg/controller/integration/platform_setup.go
index fa4f40c..51c5a5b 100644
--- a/pkg/controller/integration/platform_setup.go
+++ b/pkg/controller/integration/platform_setup.go
@@ -20,13 +20,13 @@ package integration
import (
"context"
- "github.com/apache/camel-k/pkg/client"
- "github.com/apache/camel-k/pkg/platform"
- "github.com/apache/camel-k/pkg/util/knative"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
+ "github.com/apache/camel-k/pkg/client"
+ "github.com/apache/camel-k/pkg/platform"
"github.com/apache/camel-k/pkg/trait"
+ "github.com/apache/camel-k/pkg/util/knative"
)
// NewPlatformSetupAction creates a new platform-setup action
[camel-k] 21/25: feat: Add an option to configure the operator
metrics endpoint port
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit f70182e2e055fd4eeb40fef658e7bdffff2c8e53
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 28 12:22:27 2020 +0100
feat: Add an option to configure the operator metrics endpoint port
---
pkg/cmd/operator.go | 33 +++++++++++++++++++++++----------
pkg/cmd/operator/operator.go | 9 +++++----
pkg/cmd/root.go | 7 ++++---
3 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/pkg/cmd/operator.go b/pkg/cmd/operator.go
index 87361fd..2d69d01 100644
--- a/pkg/cmd/operator.go
+++ b/pkg/cmd/operator.go
@@ -18,20 +18,33 @@ limitations under the License.
package cmd
import (
- "github.com/apache/camel-k/pkg/cmd/operator"
"github.com/spf13/cobra"
+
+ "github.com/apache/camel-k/pkg/cmd/operator"
)
-func newCmdOperator() *cobra.Command {
+func newCmdOperator() (*cobra.Command, *operatorCmdOptions) {
+ options := operatorCmdOptions{
+ }
+
cmd := cobra.Command{
- Use: "operator",
- Short: "Run the Camel K operator",
- Long: `Run the Camel K operator`,
- Hidden: true,
- Run: func(cmd *cobra.Command, args []string) {
- operator.Run()
- },
+ Use: "operator",
+ Short: "Run the Camel K operator",
+ Long: `Run the Camel K operator`,
+ Hidden: true,
+ PreRunE: decode(&options),
+ Run: options.run,
}
- return &cmd
+ cmd.Flags().Int32("monitoring-port", 8080, "The port of the metrics endpoint")
+
+ return &cmd, &options
+}
+
+type operatorCmdOptions struct {
+ MonitoringPort int32 `mapstructure:"monitoring-port"`
+}
+
+func (o *operatorCmdOptions) run(_ *cobra.Command, _ []string) {
+ operator.Run(o.MonitoringPort)
}
diff --git a/pkg/cmd/operator/operator.go b/pkg/cmd/operator/operator.go
index a858b30..ec59ea1 100644
--- a/pkg/cmd/operator/operator.go
+++ b/pkg/cmd/operator/operator.go
@@ -24,6 +24,7 @@ import (
"math/rand"
"os"
"runtime"
+ "strconv"
"time"
"github.com/apache/camel-k/pkg/platform"
@@ -63,7 +64,7 @@ func printVersion() {
}
// Run starts the Camel K operator
-func Run() {
+func Run(monitoringPort int32) {
rand.Seed(time.Now().UTC().UnixNano())
flag.Parse()
@@ -123,9 +124,9 @@ func Run() {
// Create a new Cmd to provide shared dependencies and start components
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
- Namespace: namespace,
- EventBroadcaster: eventBroadcaster,
- MetricsBindAddress: ":8080",
+ Namespace: namespace,
+ EventBroadcaster: eventBroadcaster,
+ MetricsBindAddress: ":" + strconv.Itoa(int(monitoringPort)),
})
if err != nil {
log.Error(err, "")
diff --git a/pkg/cmd/root.go b/pkg/cmd/root.go
index 2eab1ef..2899a30 100644
--- a/pkg/cmd/root.go
+++ b/pkg/cmd/root.go
@@ -22,11 +22,12 @@ import (
"os"
"strings"
- "github.com/apache/camel-k/pkg/client"
- camelv1 "github.com/apache/camel-k/pkg/client/camel/clientset/versioned/typed/camel/v1"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/spf13/viper"
+
+ "github.com/apache/camel-k/pkg/client"
+ camelv1 "github.com/apache/camel-k/pkg/client/camel/clientset/versioned/typed/camel/v1"
)
const kamelCommandLongDescription = `Apache Camel K is a lightweight integration platform, born on Kubernetes, with serverless
@@ -133,7 +134,7 @@ func addKamelSubcommands(cmd *cobra.Command, options *RootCmdOptions) {
cmd.AddCommand(cmdOnly(newCmdReset(options)))
cmd.AddCommand(newCmdDescribe(options))
cmd.AddCommand(cmdOnly(newCmdRebuild(options)))
- cmd.AddCommand(newCmdOperator())
+ cmd.AddCommand(cmdOnly(newCmdOperator()))
cmd.AddCommand(cmdOnly(newCmdBuilder(options)))
cmd.AddCommand(cmdOnly(newCmdInit(options)))
cmd.AddCommand(cmdOnly(newCmdDebug(options)))
[camel-k] 15/25: chore: Factorise metrics observations
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit b2313c53e185804d6bbffaf6278d2e7ca0d6ad57
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 17:52:10 2020 +0200
chore: Factorise metrics observations
---
pkg/controller/build/metrics.go | 26 ++++++++++++++++++++++++++
pkg/controller/build/monitor_pod.go | 14 ++------------
pkg/controller/build/recovery.go | 11 -----------
pkg/controller/build/schedule_routine.go | 14 ++------------
4 files changed, 30 insertions(+), 35 deletions(-)
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index 59961cc..2b9950b 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -18,11 +18,14 @@ limitations under the License.
package build
import (
+ "math"
"time"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"github.com/prometheus/client_golang/prometheus"
+
+ v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
)
const buildResultLabel = "result"
@@ -76,3 +79,26 @@ func init() {
// Register custom metrics with the global prometheus registry
metrics.Registry.MustRegister(buildAttempts, buildDuration, queueDuration)
}
+
+func observeBuildResult(build *v1.Build, phase v1.BuildPhase, duration time.Duration) {
+ attempt, attemptMax := getBuildAttemptFor(build)
+
+ if phase == v1.BuildPhaseFailed && attempt >= attemptMax {
+ // The phase will be updated in the recovery action,
+ // so let's account for it right now.
+ phase = v1.BuildPhaseError
+ }
+
+ buildAttempts.WithLabelValues(phase.String()).Observe(float64(attempt))
+ buildDuration.WithLabelValues(phase.String()).Observe(duration.Seconds())
+}
+
+func getBuildAttemptFor(build *v1.Build) (int, int) {
+ attempt := 0
+ attemptMax := math.MaxInt32
+ if failure := build.Status.Failure; failure != nil {
+ attempt += failure.Recovery.Attempt
+ attemptMax = failure.Recovery.AttemptMax
+ }
+ return attempt, attemptMax
+}
diff --git a/pkg/controller/build/monitor_pod.go b/pkg/controller/build/monitor_pod.go
index a24673f..10ee4b1 100644
--- a/pkg/controller/build/monitor_pod.go
+++ b/pkg/controller/build/monitor_pod.go
@@ -71,12 +71,7 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
build.Status.Duration = duration.String()
// Account for the Build metrics
- buildAttempts.
- WithLabelValues(build.Status.Phase.String()).
- Observe(float64(getBuildAttemptsFor(build)))
- buildDuration.
- WithLabelValues(build.Status.Phase.String()).
- Observe(duration.Seconds())
+ observeBuildResult(build, build.Status.Phase, duration)
for _, task := range build.Spec.Tasks {
if task.Image != nil {
@@ -98,12 +93,7 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
build.Status.Duration = duration.String()
// Account for the Build metrics
- buildAttempts.
- WithLabelValues(build.Status.Phase.String()).
- Observe(float64(getBuildAttemptsFor(build)))
- buildDuration.
- WithLabelValues(build.Status.Phase.String()).
- Observe(duration.Seconds())
+ observeBuildResult(build, build.Status.Phase, duration)
}
return build, nil
diff --git a/pkg/controller/build/recovery.go b/pkg/controller/build/recovery.go
index 5b44662..0c6cc9c 100644
--- a/pkg/controller/build/recovery.go
+++ b/pkg/controller/build/recovery.go
@@ -69,9 +69,6 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
if build.Status.Failure.Recovery.Attempt >= build.Status.Failure.Recovery.AttemptMax {
build.Status.Phase = v1.BuildPhaseError
- buildAttempts.
- WithLabelValues(build.Status.Phase.String()).
- Observe(float64(getBuildAttemptsFor(build)))
return build, nil
}
@@ -98,11 +95,3 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
return build, nil
}
-
-func getBuildAttemptsFor(build *v1.Build) int {
- attempts := 1
- if build.Status.Failure != nil {
- attempts += build.Status.Failure.Recovery.Attempt
- }
- return attempts
-}
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index 2a87b20..56d0c6f 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -130,12 +130,7 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
}
// Account for the Build metrics
- buildAttempts.
- WithLabelValues(status.Phase.String()).
- Observe(float64(getBuildAttemptsFor(build)))
- buildDuration.
- WithLabelValues(status.Phase.String()).
- Observe(duration.Seconds())
+ observeBuildResult(build, status.Phase, duration)
_ = action.updateBuildStatus(ctx, build, status)
break
@@ -152,12 +147,7 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
status.Duration = duration.String()
// Account for the Build metrics
- buildAttempts.
- WithLabelValues(status.Phase.String()).
- Observe(float64(getBuildAttemptsFor(build)))
- buildDuration.
- WithLabelValues(status.Phase.String()).
- Observe(duration.Seconds())
+ observeBuildResult(build, status.Phase, duration)
}
err := action.updateBuildStatus(ctx, build, status)
[camel-k] 17/25: feat: Add default alerting rule for build error SLO
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 45ba6a097d8b204f60764091745412ec8c777010
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 18:39:23 2020 +0200
feat: Add default alerting rule for build error SLO
---
deploy/operator-prometheus-rule.yaml | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
index ab4e837..83f056a 100644
--- a/deploy/operator-prometheus-rule.yaml
+++ b/deploy/operator-prometheus-rule.yaml
@@ -70,7 +70,7 @@ spec:
for {{ $labels.job }} have their duration above 5m.
- alert: CamelKBuildFailure
expr: |
- sum(rate(camel_k_build_duration_seconds_count{result="Error"}[5m])) by (job)
+ sum(rate(camel_k_build_duration_seconds_count{result="Failed"}[5m])) by (job)
/
sum(rate(camel_k_build_duration_seconds_count[5m])) by (job)
* 100
@@ -81,6 +81,19 @@ spec:
annotations:
message: |
{{ printf "%0.0f" $value }}% of the builds for {{ $labels.job }} have failed.
+ - alert: CamelKBuildError
+ expr: |
+ sum(rate(camel_k_build_duration_seconds_count{result="Error"}[5m])) by (job)
+ /
+ sum(rate(camel_k_build_duration_seconds_count[5m])) by (job)
+ * 100
+ > 1
+ for: 10m
+ labels:
+ severity: critical
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the builds for {{ $labels.job }} have errored.
- alert: CamelKBuildQueueDuration1m
expr: |
(
[camel-k] 10/25: chore: Reformat reconciliation duration alerting
rule
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit b37d64412b67b2227d57cfc0d80c4c90edfe7296
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 20 16:26:06 2020 +0200
chore: Reformat reconciliation duration alerting rule
---
deploy/operator-prometheus-rule.yaml | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
index 73b8722..6947505 100644
--- a/deploy/operator-prometheus-rule.yaml
+++ b/deploy/operator-prometheus-rule.yaml
@@ -7,13 +7,21 @@ spec:
- name: camel-k-operator
rules:
- alert: CamelKReconciliationDuration
- expr: 100 * (1 - sum(rate(camel_k_reconciliation_duration_seconds_bucket{le="0.5"}[5m])) by (job) / sum(rate(camel_k_reconciliation_duration_seconds_count[5m])) by (job)) > 10
+ expr: |
+ (
+ 1 - sum(rate(camel_k_reconciliation_duration_seconds_bucket{le="0.5"}[5m])) by (job)
+ /
+ sum(rate(camel_k_reconciliation_duration_seconds_count[5m])) by (job)
+ )
+ * 100
+ > 10
for: 1m
labels:
severity: warning
annotations:
message: |
- {{ printf "%0.0f" $value }}% of the reconciliation requests for {{ $labels.job }} have their duration above 0.5s.
+ {{ printf "%0.0f" $value }}% of the reconciliation requests
+ for {{ $labels.job }} have their duration above 0.5s.
- alert: CamelKReconciliationFailure
expr: |
sum(rate(camel_k_reconciliation_duration_seconds_count{result="Errored"}[5m])) by (job)
[camel-k] 14/25: chore: Add build attempts per build histogram
metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 62bdfa464ec746bbe3092f9dbc6b39c2b2750305
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 16:39:58 2020 +0200
chore: Add build attempts per build histogram metric
---
pkg/controller/build/metrics.go | 14 ++++++++------
pkg/controller/build/monitor_pod.go | 19 +++++++++++++++----
pkg/controller/build/monitor_routine.go | 1 -
pkg/controller/build/recovery.go | 12 +++++++++++-
pkg/controller/build/schedule_routine.go | 20 ++++++++++++++++----
5 files changed, 50 insertions(+), 16 deletions(-)
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index 64286ce..59961cc 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -28,14 +28,16 @@ import (
const buildResultLabel = "result"
var (
- buildAttempt = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "camel_k_build_attempt",
- Help: "Camel K build attempt",
+ buildAttempts = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "camel_k_build_attempts",
+ Help: "Camel K build attempts",
+ Buckets: []float64{1, 2, 3, 4, 5},
},
[]string{
buildResultLabel,
- })
+ },
+ )
buildDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@@ -72,5 +74,5 @@ var (
func init() {
// Register custom metrics with the global prometheus registry
- metrics.Registry.MustRegister(buildAttempt, buildDuration, queueDuration)
+ metrics.Registry.MustRegister(buildAttempts, buildDuration, queueDuration)
}
diff --git a/pkg/controller/build/monitor_pod.go b/pkg/controller/build/monitor_pod.go
index cead6f4..a24673f 100644
--- a/pkg/controller/build/monitor_pod.go
+++ b/pkg/controller/build/monitor_pod.go
@@ -69,9 +69,15 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
build.Status.Phase = v1.BuildPhaseSucceeded
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
build.Status.Duration = duration.String()
+
// Account for the Build metrics
- buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
- buildDuration.WithLabelValues(build.Status.Phase.String()).Observe(duration.Seconds())
+ buildAttempts.
+ WithLabelValues(build.Status.Phase.String()).
+ Observe(float64(getBuildAttemptsFor(build)))
+ buildDuration.
+ WithLabelValues(build.Status.Phase.String()).
+ Observe(duration.Seconds())
+
for _, task := range build.Spec.Tasks {
if task.Image != nil {
build.Status.Image = task.Image.BuiltImage
@@ -90,9 +96,14 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
build.Status.Phase = v1.BuildPhaseFailed
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
build.Status.Duration = duration.String()
+
// Account for the Build metrics
- buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
- buildDuration.WithLabelValues(build.Status.Phase.String()).Observe(duration.Seconds())
+ buildAttempts.
+ WithLabelValues(build.Status.Phase.String()).
+ Observe(float64(getBuildAttemptsFor(build)))
+ buildDuration.
+ WithLabelValues(build.Status.Phase.String()).
+ Observe(duration.Seconds())
}
return build, nil
diff --git a/pkg/controller/build/monitor_routine.go b/pkg/controller/build/monitor_routine.go
index 8d15aa9..89a47bd 100644
--- a/pkg/controller/build/monitor_routine.go
+++ b/pkg/controller/build/monitor_routine.go
@@ -53,7 +53,6 @@ func (action *monitorRoutineAction) Handle(ctx context.Context, build *v1.Build)
// and recover the build if it's missing. This can happen when the operator
// stops abruptly and restarts or the build status update fails.
build.Status.Phase = v1.BuildPhaseFailed
- buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
return build, nil
}
diff --git a/pkg/controller/build/recovery.go b/pkg/controller/build/recovery.go
index 5320809..5b44662 100644
--- a/pkg/controller/build/recovery.go
+++ b/pkg/controller/build/recovery.go
@@ -69,7 +69,9 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
if build.Status.Failure.Recovery.Attempt >= build.Status.Failure.Recovery.AttemptMax {
build.Status.Phase = v1.BuildPhaseError
- buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
+ buildAttempts.
+ WithLabelValues(build.Status.Phase.String()).
+ Observe(float64(getBuildAttemptsFor(build)))
return build, nil
}
@@ -96,3 +98,11 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
return build, nil
}
+
+func getBuildAttemptsFor(build *v1.Build) int {
+ attempts := 1
+ if build.Status.Failure != nil {
+ attempts += build.Status.Failure.Recovery.Attempt
+ }
+ return attempts
+}
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index eccddbf..2a87b20 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -128,9 +128,15 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
task.GetName()),
Duration: duration.String(),
}
+
// Account for the Build metrics
- buildAttempt.WithLabelValues(status.Phase.String()).Inc()
- buildDuration.WithLabelValues(status.Phase.String()).Observe(duration.Seconds())
+ buildAttempts.
+ WithLabelValues(status.Phase.String()).
+ Observe(float64(getBuildAttemptsFor(build)))
+ buildDuration.
+ WithLabelValues(status.Phase.String()).
+ Observe(duration.Seconds())
+
_ = action.updateBuildStatus(ctx, build, status)
break
}
@@ -144,10 +150,16 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
if lastTask || taskFailed {
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
status.Duration = duration.String()
+
// Account for the Build metrics
- buildAttempt.WithLabelValues(status.Phase.String()).Inc()
- buildDuration.WithLabelValues(status.Phase.String()).Observe(duration.Seconds())
+ buildAttempts.
+ WithLabelValues(status.Phase.String()).
+ Observe(float64(getBuildAttemptsFor(build)))
+ buildDuration.
+ WithLabelValues(status.Phase.String()).
+ Observe(duration.Seconds())
}
+
err := action.updateBuildStatus(ctx, build, status)
if err != nil || taskFailed {
break
[camel-k] 05/25: feat: Add Camel K controllers reconcile loop
histgram metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 0bb2ade6cb66fb66a3ac134fbd249d75f81186dc
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 14 14:58:21 2020 +0200
feat: Add Camel K controllers reconcile loop histgram metric
---
pkg/controller/build/build_controller.go | 37 +++---
.../integration/integration_controller.go | 41 ++++---
.../integrationkit/integrationkit_controller.go | 47 ++++----
.../integrationplatform_controller.go | 33 ++++--
pkg/controller/kamelet/kamelet_controller.go | 39 ++++---
.../kameletbinding/kamelet_binding_controller.go | 35 +++---
pkg/util/monitoring/controller.go | 128 +++++++++++++++++++++
pkg/util/monitoring/timer.go | 46 ++++++++
8 files changed, 315 insertions(+), 91 deletions(-)
diff --git a/pkg/controller/build/build_controller.go b/pkg/controller/build/build_controller.go
index 30a0e04..2e09188 100644
--- a/pkg/controller/build/build_controller.go
+++ b/pkg/controller/build/build_controller.go
@@ -22,12 +22,12 @@ import (
"sync"
"time"
- camelevent "github.com/apache/camel-k/pkg/event"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
+ "k8s.io/apimachinery/pkg/runtime/schema"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
@@ -40,7 +40,9 @@ import (
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/builder"
"github.com/apache/camel-k/pkg/client"
+ camelevent "github.com/apache/camel-k/pkg/event"
"github.com/apache/camel-k/pkg/platform"
+ "github.com/apache/camel-k/pkg/util/monitoring"
)
// Add creates a new Build Controller and adds it to the Manager. The Manager will set fields on the Controller
@@ -55,13 +57,20 @@ func Add(mgr manager.Manager) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, c client.Client) reconcile.Reconciler {
- return &ReconcileBuild{
- client: c,
- reader: mgr.GetAPIReader(),
- scheme: mgr.GetScheme(),
- builder: builder.New(c),
- recorder: mgr.GetEventRecorderFor("camel-k-build-controller"),
- }
+ return monitoring.NewInstrumentedReconciler(
+ &reconcileBuild{
+ client: c,
+ reader: mgr.GetAPIReader(),
+ scheme: mgr.GetScheme(),
+ builder: builder.New(c),
+ recorder: mgr.GetEventRecorderFor("camel-k-build-controller"),
+ },
+ schema.GroupVersionKind{
+ Group: v1.SchemeGroupVersion.Group,
+ Version: v1.SchemeGroupVersion.Version,
+ Kind: v1.BuildKind,
+ },
+ )
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
@@ -111,12 +120,12 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
return nil
}
-var _ reconcile.Reconciler = &ReconcileBuild{}
+var _ reconcile.Reconciler = &reconcileBuild{}
-// ReconcileBuild reconciles a Build object
-type ReconcileBuild struct {
+// reconcileBuild reconciles a Build object
+type reconcileBuild struct {
// This client, initialized using mgr.Client() above, is a split client
- // that reads objects from the cache and writes to the apiserver
+ // that reads objects from the cache and writes to the API server
client client.Client
// Non-caching client to be used whenever caching may cause race conditions,
// like in the builds scheduling critical section
@@ -132,7 +141,7 @@ type ReconcileBuild struct {
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
-func (r *ReconcileBuild) Reconcile(request reconcile.Request) (reconcile.Result, error) {
+func (r *reconcileBuild) Reconcile(request reconcile.Request) (reconcile.Result, error) {
rlog := Log.WithValues("request-namespace", request.Namespace, "request-name", request.Name)
rlog.Info("Reconciling Build")
@@ -255,7 +264,7 @@ func (r *ReconcileBuild) Reconcile(request reconcile.Request) (reconcile.Result,
return reconcile.Result{}, nil
}
-func (r *ReconcileBuild) update(ctx context.Context, base *v1.Build, target *v1.Build) (reconcile.Result, error) {
+func (r *reconcileBuild) update(ctx context.Context, base *v1.Build, target *v1.Build) (reconcile.Result, error) {
err := r.client.Status().Patch(ctx, target, k8sclient.MergeFrom(base))
return reconcile.Result{}, err
diff --git a/pkg/controller/integration/integration_controller.go b/pkg/controller/integration/integration_controller.go
index 3d94b87..5a1b5fe 100644
--- a/pkg/controller/integration/integration_controller.go
+++ b/pkg/controller/integration/integration_controller.go
@@ -20,14 +20,14 @@ package integration
import (
"context"
- camelevent "github.com/apache/camel-k/pkg/event"
- "github.com/apache/camel-k/pkg/platform"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/api/batch/v1beta1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
+
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
@@ -39,15 +39,13 @@ import (
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/client"
+ camelevent "github.com/apache/camel-k/pkg/event"
+ "github.com/apache/camel-k/pkg/platform"
"github.com/apache/camel-k/pkg/util/digest"
"github.com/apache/camel-k/pkg/util/log"
+ "github.com/apache/camel-k/pkg/util/monitoring"
)
-/**
-* USER ACTION REQUIRED: This is a scaffold file intended for the user to modify with their own Controller
-* business logic. Delete these comments after modifying this file.*
- */
-
// Add creates a new Integration Controller and adds it to the Manager. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
@@ -60,11 +58,18 @@ func Add(mgr manager.Manager) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, c client.Client) reconcile.Reconciler {
- return &ReconcileIntegration{
- client: c,
- scheme: mgr.GetScheme(),
- recorder: mgr.GetEventRecorderFor("camel-k-integration-controller"),
- }
+ return monitoring.NewInstrumentedReconciler(
+ &reconcileIntegration{
+ client: c,
+ scheme: mgr.GetScheme(),
+ recorder: mgr.GetEventRecorderFor("camel-k-integration-controller"),
+ },
+ schema.GroupVersionKind{
+ Group: v1.SchemeGroupVersion.Group,
+ Version: v1.SchemeGroupVersion.Version,
+ Kind: v1.IntegrationKind,
+ },
+ )
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
@@ -215,12 +220,12 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
return nil
}
-var _ reconcile.Reconciler = &ReconcileIntegration{}
+var _ reconcile.Reconciler = &reconcileIntegration{}
-// ReconcileIntegration reconciles a Integration object
-type ReconcileIntegration struct {
+// reconcileIntegration reconciles a Integration object
+type reconcileIntegration struct {
// This client, initialized using mgr.Client() above, is a split client
- // that reads objects from the cache and writes to the apiserver
+ // that reads objects from the cache and writes to the API server
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
@@ -231,7 +236,7 @@ type ReconcileIntegration struct {
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
-func (r *ReconcileIntegration) Reconcile(request reconcile.Request) (reconcile.Result, error) {
+func (r *reconcileIntegration) Reconcile(request reconcile.Request) (reconcile.Result, error) {
rlog := Log.WithValues("request-namespace", request.Namespace, "request-name", request.Name)
rlog.Info("Reconciling Integration")
@@ -310,7 +315,7 @@ func (r *ReconcileIntegration) Reconcile(request reconcile.Request) (reconcile.R
return reconcile.Result{}, nil
}
-func (r *ReconcileIntegration) update(ctx context.Context, base *v1.Integration, target *v1.Integration) (reconcile.Result, error) {
+func (r *reconcileIntegration) update(ctx context.Context, base *v1.Integration, target *v1.Integration) (reconcile.Result, error) {
dgst, err := digest.ComputeForIntegration(target)
if err != nil {
return reconcile.Result{}, err
diff --git a/pkg/controller/integrationkit/integrationkit_controller.go b/pkg/controller/integrationkit/integrationkit_controller.go
index aaa87fe..a525725 100644
--- a/pkg/controller/integrationkit/integrationkit_controller.go
+++ b/pkg/controller/integrationkit/integrationkit_controller.go
@@ -20,18 +20,13 @@ package integrationkit
import (
"context"
- camelevent "github.com/apache/camel-k/pkg/event"
- "github.com/apache/camel-k/pkg/platform"
- "k8s.io/client-go/tools/record"
-
- "github.com/apache/camel-k/pkg/util/digest"
- "github.com/apache/camel-k/pkg/util/log"
-
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
- k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
+ "k8s.io/client-go/tools/record"
+ k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -42,6 +37,11 @@ import (
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/client"
+ camelevent "github.com/apache/camel-k/pkg/event"
+ "github.com/apache/camel-k/pkg/platform"
+ "github.com/apache/camel-k/pkg/util/digest"
+ "github.com/apache/camel-k/pkg/util/log"
+ "github.com/apache/camel-k/pkg/util/monitoring"
)
// Add creates a new IntegrationKit Controller and adds it to the Manager. The Manager will set fields on the Controller
@@ -56,11 +56,18 @@ func Add(mgr manager.Manager) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, c client.Client) reconcile.Reconciler {
- return &ReconcileIntegrationKit{
- client: c,
- scheme: mgr.GetScheme(),
- recorder: mgr.GetEventRecorderFor("camel-k-integration-kit-controller"),
- }
+ return monitoring.NewInstrumentedReconciler(
+ &reconcileIntegrationKit{
+ client: c,
+ scheme: mgr.GetScheme(),
+ recorder: mgr.GetEventRecorderFor("camel-k-integration-kit-controller"),
+ },
+ schema.GroupVersionKind{
+ Group: v1.SchemeGroupVersion.Group,
+ Version: v1.SchemeGroupVersion.Version,
+ Kind: v1.IntegrationKitKind,
+ },
+ )
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
@@ -112,7 +119,7 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
}
// Watch for IntegrationPlatform phase transitioning to ready and enqueue
- // requests for any integrationkits that are in phase waiting for platform
+ // requests for any integration kits that are in phase waiting for platform
err = c.Watch(&source.Kind{Type: &v1.IntegrationPlatform{}}, &handler.EnqueueRequestsFromMapFunc{
ToRequests: handler.ToRequestsFunc(func(a handler.MapObject) []reconcile.Request {
platform := a.Object.(*v1.IntegrationPlatform)
@@ -149,12 +156,12 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
return nil
}
-var _ reconcile.Reconciler = &ReconcileIntegrationKit{}
+var _ reconcile.Reconciler = &reconcileIntegrationKit{}
-// ReconcileIntegrationKit reconciles a IntegrationKit object
-type ReconcileIntegrationKit struct {
+// reconcileIntegrationKit reconciles a IntegrationKit object
+type reconcileIntegrationKit struct {
// This client, initialized using mgr.Client() above, is a split client
- // that reads objects from the cache and writes to the apiserver
+ // that reads objects from the cache and writes to the API server
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
@@ -165,7 +172,7 @@ type ReconcileIntegrationKit struct {
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
-func (r *ReconcileIntegrationKit) Reconcile(request reconcile.Request) (reconcile.Result, error) {
+func (r *reconcileIntegrationKit) Reconcile(request reconcile.Request) (reconcile.Result, error) {
rlog := Log.WithValues("request-namespace", request.Namespace, "request-name", request.Name)
rlog.Info("Reconciling IntegrationKit")
@@ -264,7 +271,7 @@ func (r *ReconcileIntegrationKit) Reconcile(request reconcile.Request) (reconcil
return reconcile.Result{}, nil
}
-func (r *ReconcileIntegrationKit) update(ctx context.Context, base *v1.IntegrationKit, target *v1.IntegrationKit) (reconcile.Result, error) {
+func (r *reconcileIntegrationKit) update(ctx context.Context, base *v1.IntegrationKit, target *v1.IntegrationKit) (reconcile.Result, error) {
dgst, err := digest.ComputeForIntegrationKit(target)
if err != nil {
return reconcile.Result{}, err
diff --git a/pkg/controller/integrationplatform/integrationplatform_controller.go b/pkg/controller/integrationplatform/integrationplatform_controller.go
index 9a0d99d..71a8711 100644
--- a/pkg/controller/integrationplatform/integrationplatform_controller.go
+++ b/pkg/controller/integrationplatform/integrationplatform_controller.go
@@ -21,10 +21,9 @@ import (
"context"
"time"
- camelevent "github.com/apache/camel-k/pkg/event"
- "github.com/apache/camel-k/pkg/platform"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/record"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
@@ -38,6 +37,9 @@ import (
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/client"
+ camelevent "github.com/apache/camel-k/pkg/event"
+ "github.com/apache/camel-k/pkg/platform"
+ "github.com/apache/camel-k/pkg/util/monitoring"
)
// Add creates a new IntegrationPlatform Controller and adds it to the Manager. The Manager will set fields on the Controller
@@ -52,11 +54,18 @@ func Add(mgr manager.Manager) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, c client.Client) reconcile.Reconciler {
- return &ReconcileIntegrationPlatform{
- client: c,
- scheme: mgr.GetScheme(),
- recorder: mgr.GetEventRecorderFor("camel-k-integration-platform-controller"),
- }
+ return monitoring.NewInstrumentedReconciler(
+ &reconcileIntegrationPlatform{
+ client: c,
+ scheme: mgr.GetScheme(),
+ recorder: mgr.GetEventRecorderFor("camel-k-integration-platform-controller"),
+ },
+ schema.GroupVersionKind{
+ Group: v1.SchemeGroupVersion.Group,
+ Version: v1.SchemeGroupVersion.Version,
+ Kind: v1.IntegrationPlatformKind,
+ },
+ )
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
@@ -90,12 +99,12 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
return nil
}
-var _ reconcile.Reconciler = &ReconcileIntegrationPlatform{}
+var _ reconcile.Reconciler = &reconcileIntegrationPlatform{}
-// ReconcileIntegrationPlatform reconciles a IntegrationPlatform object
-type ReconcileIntegrationPlatform struct {
+// reconcileIntegrationPlatform reconciles a IntegrationPlatform object
+type reconcileIntegrationPlatform struct {
// This client, initialized using mgr.Client() above, is a split client
- // that reads objects from the cache and writes to the apiserver
+ // that reads objects from the cache and writes to the API server
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
@@ -106,7 +115,7 @@ type ReconcileIntegrationPlatform struct {
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
-func (r *ReconcileIntegrationPlatform) Reconcile(request reconcile.Request) (reconcile.Result, error) {
+func (r *reconcileIntegrationPlatform) Reconcile(request reconcile.Request) (reconcile.Result, error) {
rlog := Log.WithValues("request-namespace", request.Namespace, "request-name", request.Name)
rlog.Info("Reconciling IntegrationPlatform")
diff --git a/pkg/controller/kamelet/kamelet_controller.go b/pkg/controller/kamelet/kamelet_controller.go
index 22d7548..a9acf50 100644
--- a/pkg/controller/kamelet/kamelet_controller.go
+++ b/pkg/controller/kamelet/kamelet_controller.go
@@ -21,13 +21,11 @@ import (
"context"
"time"
- "github.com/apache/camel-k/pkg/apis/camel/v1alpha1"
- "github.com/apache/camel-k/pkg/client"
- camelevent "github.com/apache/camel-k/pkg/event"
- "github.com/apache/camel-k/pkg/platform"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/record"
+
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
@@ -36,6 +34,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
+
+ "github.com/apache/camel-k/pkg/apis/camel/v1alpha1"
+ "github.com/apache/camel-k/pkg/client"
+ camelevent "github.com/apache/camel-k/pkg/event"
+ "github.com/apache/camel-k/pkg/platform"
+ "github.com/apache/camel-k/pkg/util/monitoring"
)
// Add creates a new Kamelet Controller and adds it to the Manager. The Manager will set fields on the Controller
@@ -50,11 +54,18 @@ func Add(mgr manager.Manager) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, c client.Client) reconcile.Reconciler {
- return &ReconcileKamelet{
- client: c,
- scheme: mgr.GetScheme(),
- recorder: mgr.GetEventRecorderFor("camel-k-kamelet-controller"),
- }
+ return monitoring.NewInstrumentedReconciler(
+ &reconcileKamelet{
+ client: c,
+ scheme: mgr.GetScheme(),
+ recorder: mgr.GetEventRecorderFor("camel-k-kamelet-controller"),
+ },
+ schema.GroupVersionKind{
+ Group: v1alpha1.SchemeGroupVersion.Group,
+ Version: v1alpha1.SchemeGroupVersion.Version,
+ Kind: v1alpha1.KameletKind,
+ },
+ )
}
// add adds a new Controller to mgr with r as the reconcile.Reconciler
@@ -88,12 +99,12 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
return nil
}
-var _ reconcile.Reconciler = &ReconcileKamelet{}
+var _ reconcile.Reconciler = &reconcileKamelet{}
-// ReconcileKamelet reconciles a Kamelet object
-type ReconcileKamelet struct {
+// reconcileKamelet reconciles a Kamelet object
+type reconcileKamelet struct {
// This client, initialized using mgr.Client() above, is a split client
- // that reads objects from the cache and writes to the apiserver
+ // that reads objects from the cache and writes to the API server
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
@@ -104,7 +115,7 @@ type ReconcileKamelet struct {
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
-func (r *ReconcileKamelet) Reconcile(request reconcile.Request) (reconcile.Result, error) {
+func (r *reconcileKamelet) Reconcile(request reconcile.Request) (reconcile.Result, error) {
rlog := Log.WithValues("request-namespace", request.Namespace, "request-name", request.Name)
rlog.Info("Reconciling Kamelet")
diff --git a/pkg/controller/kameletbinding/kamelet_binding_controller.go b/pkg/controller/kameletbinding/kamelet_binding_controller.go
index bbc84cb..ffde023 100644
--- a/pkg/controller/kameletbinding/kamelet_binding_controller.go
+++ b/pkg/controller/kameletbinding/kamelet_binding_controller.go
@@ -21,14 +21,11 @@ import (
"context"
"time"
- v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
- "github.com/apache/camel-k/pkg/apis/camel/v1alpha1"
- "github.com/apache/camel-k/pkg/client"
- camelevent "github.com/apache/camel-k/pkg/event"
- "github.com/apache/camel-k/pkg/platform"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/record"
+
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
@@ -37,6 +34,13 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
+
+ v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
+ "github.com/apache/camel-k/pkg/apis/camel/v1alpha1"
+ "github.com/apache/camel-k/pkg/client"
+ camelevent "github.com/apache/camel-k/pkg/event"
+ "github.com/apache/camel-k/pkg/platform"
+ "github.com/apache/camel-k/pkg/util/monitoring"
)
// Add creates a new KameletBinding Controller and adds it to the Manager. The Manager will set fields on the Controller
@@ -49,16 +53,21 @@ func Add(mgr manager.Manager) error {
return add(mgr, newReconciler(mgr, c))
}
-// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, c client.Client) reconcile.Reconciler {
- return &ReconcileKameletBinding{
- client: c,
- scheme: mgr.GetScheme(),
- recorder: mgr.GetEventRecorderFor("camel-k-kamelet-binding-controller"),
- }
+ return monitoring.NewInstrumentedReconciler(
+ &ReconcileKameletBinding{
+ client: c,
+ scheme: mgr.GetScheme(),
+ recorder: mgr.GetEventRecorderFor("camel-k-kamelet-binding-controller"),
+ },
+ schema.GroupVersionKind{
+ Group: v1alpha1.SchemeGroupVersion.Group,
+ Version: v1alpha1.SchemeGroupVersion.Version,
+ Kind: v1alpha1.KameletBindingKind,
+ },
+ )
}
-// add adds a new Controller to mgr with r as the reconcile.Reconciler
func add(mgr manager.Manager, r reconcile.Reconciler) error {
// Create a new controller
c, err := controller.New("kamelet-binding-controller", mgr, controller.Options{Reconciler: r})
@@ -103,7 +112,7 @@ var _ reconcile.Reconciler = &ReconcileKameletBinding{}
// ReconcileKameletBinding reconciles a KameletBinding object
type ReconcileKameletBinding struct {
// This client, initialized using mgr.Client() above, is a split client
- // that reads objects from the cache and writes to the apiserver
+ // that reads objects from the cache and writes to the API server
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
diff --git a/pkg/util/monitoring/controller.go b/pkg/util/monitoring/controller.go
new file mode 100644
index 0000000..c9c3136
--- /dev/null
+++ b/pkg/util/monitoring/controller.go
@@ -0,0 +1,128 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package monitoring
+
+import (
+ "time"
+
+ "k8s.io/apimachinery/pkg/runtime/schema"
+
+ "sigs.k8s.io/controller-runtime/pkg/metrics"
+ "sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+type resultLabelValue string
+
+const (
+ reconciled resultLabelValue = "Reconciled"
+ errored resultLabelValue = "Errored"
+ requeued resultLabelValue = "Requeued"
+
+ namespaceLabel = "namespace"
+ groupLabel = "group"
+ versionLabel = "version"
+ kindLabel = "kind"
+ resultLabel = "result"
+ tagLabel = "tag"
+)
+
+type tagLabelValue string
+
+const (
+ platformError tagLabelValue = "PlatformError"
+ userError tagLabelValue = "UserError"
+)
+
+type instrumentedReconciler struct {
+ reconciler reconcile.Reconciler
+ gvk schema.GroupVersionKind
+}
+
+var _ reconcile.Reconciler = &instrumentedReconciler{}
+
+func NewInstrumentedReconciler(reconciler reconcile.Reconciler, gvk schema.GroupVersionKind) reconcile.Reconciler {
+ return &instrumentedReconciler{
+ reconciler: reconciler,
+ gvk: gvk,
+ }
+}
+
+func (r *instrumentedReconciler) Reconcile(request reconcile.Request) (reconcile.Result, error) {
+ timer := NewTimer()
+
+ res, err := r.reconciler.Reconcile(request)
+
+ labels := prometheus.Labels{
+ namespaceLabel: request.Namespace,
+ groupLabel: r.gvk.Group,
+ versionLabel: r.gvk.Version,
+ kindLabel: r.gvk.Kind,
+ resultLabel: resultLabelFor(res, err),
+ tagLabel: "",
+ }
+ if err != nil {
+ // Controller errors are tagged as platform errors
+ labels[tagLabel] = string(platformError)
+ }
+
+ timer.ObserveDurationInSeconds(loopDuration.With(labels))
+
+ return res, err
+}
+
+func resultLabelFor(res reconcile.Result, err error) string {
+ var label resultLabelValue
+ if err != nil {
+ label = errored
+ } else if res.Requeue || res.RequeueAfter > 0 {
+ label = requeued
+ } else {
+ label = reconciled
+ }
+ return string(label)
+}
+
+var (
+ loopDuration = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "camel_k_reconciliation_duration_seconds",
+ Help: "Camel K reconciliation loop duration",
+ Buckets: []float64{
+ 0.25 * time.Second.Seconds(),
+ 0.5 * time.Second.Seconds(),
+ 1 * time.Second.Seconds(),
+ 5 * time.Second.Seconds(),
+ },
+ },
+ []string{
+ namespaceLabel,
+ groupLabel,
+ versionLabel,
+ kindLabel,
+ resultLabel,
+ tagLabel,
+ },
+ )
+)
+
+func init() {
+ // Register custom metrics with the global prometheus registry
+ metrics.Registry.MustRegister(loopDuration)
+}
diff --git a/pkg/util/monitoring/timer.go b/pkg/util/monitoring/timer.go
new file mode 100644
index 0000000..215e8dd
--- /dev/null
+++ b/pkg/util/monitoring/timer.go
@@ -0,0 +1,46 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package monitoring
+
+import (
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+// Timer is a helper type to time functions. Use NewTimer to create new
+// instances.
+type Timer struct {
+ begin time.Time
+}
+
+// NewTimer creates a new Timer.
+func NewTimer() *Timer {
+ return &Timer{
+ begin: time.Now(),
+ }
+}
+
+// ObserveDurationInSeconds records the duration passed since the Timer was created
+// with NewTimer. It calls the Observe method of the provided Observer. The observed
+// duration is also returned.
+func (t *Timer) ObserveDurationInSeconds(o prometheus.Observer) time.Duration {
+ d := time.Since(t.begin)
+ o.Observe(d.Seconds())
+ return d
+}
[camel-k] 13/25: feat: Add default alerting rules for build queue
duration SLOs
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit ae92b0eed63e5b31b74f14670df6ffa0332c05ac
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 14:38:30 2020 +0200
feat: Add default alerting rules for build queue duration SLOs
---
deploy/operator-prometheus-rule.yaml | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
index 0d6ca96..ab4e837 100644
--- a/deploy/operator-prometheus-rule.yaml
+++ b/deploy/operator-prometheus-rule.yaml
@@ -81,3 +81,35 @@ spec:
annotations:
message: |
{{ printf "%0.0f" $value }}% of the builds for {{ $labels.job }} have failed.
+ - alert: CamelKBuildQueueDuration1m
+ expr: |
+ (
+ 1 - sum(rate(camel_k_build_queue_duration_seconds_bucket{le="60"}[5m])) by (job)
+ /
+ sum(rate(camel_k_build_queue_duration_seconds_count[5m])) by (job)
+ )
+ * 100
+ > 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the builds for {{ $labels.job }}
+ have been queued for more than 1m.
+ - alert: CamelKBuildQueueDuration5m
+ expr: |
+ (
+ 1 - sum(rate(camel_k_build_queue_duration_seconds_bucket{le="300"}[5m])) by (job)
+ /
+ sum(rate(camel_k_build_queue_duration_seconds_count[5m])) by (job)
+ )
+ * 100
+ > 1
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the builds for {{ $labels.job }}
+ have been queued for more than 5m.
[camel-k] 04/25: chore: Add BuildPhase.String() helper method
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 5a6dcef6740a5e51c8e8ea12b770c10d98dd9df5
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 13 12:13:44 2020 +0200
chore: Add BuildPhase.String() helper method
---
pkg/apis/camel/v1/build_types_support.go | 4 ++++
pkg/controller/build/monitor_pod.go | 8 ++++----
pkg/controller/build/monitor_routine.go | 2 +-
pkg/controller/build/recovery.go | 2 +-
pkg/controller/build/schedule_routine.go | 8 ++++----
5 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/pkg/apis/camel/v1/build_types_support.go b/pkg/apis/camel/v1/build_types_support.go
index 2c8243b..a1519b0 100644
--- a/pkg/apis/camel/v1/build_types_support.go
+++ b/pkg/apis/camel/v1/build_types_support.go
@@ -56,6 +56,10 @@ func NewBuildList() BuildList {
}
}
+func(buildPhase *BuildPhase) String() string {
+ return string(*buildPhase)
+}
+
// SetIntegrationPlatform --
func (in *Build) SetIntegrationPlatform(platform *IntegrationPlatform) {
cs := corev1.ConditionTrue
diff --git a/pkg/controller/build/monitor_pod.go b/pkg/controller/build/monitor_pod.go
index 15b3f87..cead6f4 100644
--- a/pkg/controller/build/monitor_pod.go
+++ b/pkg/controller/build/monitor_pod.go
@@ -70,8 +70,8 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
build.Status.Duration = duration.String()
// Account for the Build metrics
- buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
- buildDuration.WithLabelValues(string(build.Status.Phase)).Observe(duration.Seconds())
+ buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
+ buildDuration.WithLabelValues(build.Status.Phase.String()).Observe(duration.Seconds())
for _, task := range build.Spec.Tasks {
if task.Image != nil {
build.Status.Image = task.Image.BuiltImage
@@ -91,8 +91,8 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
build.Status.Duration = duration.String()
// Account for the Build metrics
- buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
- buildDuration.WithLabelValues(string(build.Status.Phase)).Observe(duration.Seconds())
+ buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
+ buildDuration.WithLabelValues(build.Status.Phase.String()).Observe(duration.Seconds())
}
return build, nil
diff --git a/pkg/controller/build/monitor_routine.go b/pkg/controller/build/monitor_routine.go
index c0ccd65..8d15aa9 100644
--- a/pkg/controller/build/monitor_routine.go
+++ b/pkg/controller/build/monitor_routine.go
@@ -53,7 +53,7 @@ func (action *monitorRoutineAction) Handle(ctx context.Context, build *v1.Build)
// and recover the build if it's missing. This can happen when the operator
// stops abruptly and restarts or the build status update fails.
build.Status.Phase = v1.BuildPhaseFailed
- buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
+ buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
return build, nil
}
diff --git a/pkg/controller/build/recovery.go b/pkg/controller/build/recovery.go
index b8d7bb8..5320809 100644
--- a/pkg/controller/build/recovery.go
+++ b/pkg/controller/build/recovery.go
@@ -69,7 +69,7 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
if build.Status.Failure.Recovery.Attempt >= build.Status.Failure.Recovery.AttemptMax {
build.Status.Phase = v1.BuildPhaseError
- buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
+ buildAttempt.WithLabelValues(build.Status.Phase.String()).Inc()
return build, nil
}
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index f0e22a0..eccddbf 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -129,8 +129,8 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
Duration: duration.String(),
}
// Account for the Build metrics
- buildAttempt.WithLabelValues(string(status.Phase)).Inc()
- buildDuration.WithLabelValues(string(status.Phase)).Observe(duration.Seconds())
+ buildAttempt.WithLabelValues(status.Phase.String()).Inc()
+ buildDuration.WithLabelValues(status.Phase.String()).Observe(duration.Seconds())
_ = action.updateBuildStatus(ctx, build, status)
break
}
@@ -145,8 +145,8 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
status.Duration = duration.String()
// Account for the Build metrics
- buildAttempt.WithLabelValues(string(status.Phase)).Inc()
- buildDuration.WithLabelValues(string(status.Phase)).Observe(duration.Seconds())
+ buildAttempt.WithLabelValues(status.Phase.String()).Inc()
+ buildDuration.WithLabelValues(status.Phase.String()).Observe(duration.Seconds())
}
err := action.updateBuildStatus(ctx, build, status)
if err != nil || taskFailed {
[camel-k] 02/25: feat: Add build queue duration histogram metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 3159bb71cde167ff4d75c9e638cb2891f377340f
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Mon Oct 12 18:14:15 2020 +0200
feat: Add build queue duration histogram metric
---
pkg/controller/build/metrics.go | 16 +++++++++++++++-
pkg/controller/build/schedule_pod.go | 4 ++++
pkg/controller/build/schedule_routine.go | 5 +++++
3 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index cac2012..b61034c 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -45,9 +45,23 @@ var (
buildResult,
},
)
+
+ queueDuration = prometheus.NewHistogram(
+ prometheus.HistogramOpts{
+ Name: "camel_k_build_queue_duration_seconds",
+ Help: "Camel K build queue duration",
+ Buckets: []float64{
+ 5 * time.Second.Seconds(),
+ 15 * time.Second.Seconds(),
+ 30 * time.Second.Seconds(),
+ 1 * time.Minute.Seconds(),
+ 5 * time.Minute.Seconds(),
+ },
+ },
+ )
)
func init() {
// Register custom metrics with the global prometheus registry
- metrics.Registry.MustRegister(buildDuration)
+ metrics.Registry.MustRegister(buildDuration, queueDuration)
}
diff --git a/pkg/controller/build/schedule_pod.go b/pkg/controller/build/schedule_pod.go
index 82c134d..e2bf606 100644
--- a/pkg/controller/build/schedule_pod.go
+++ b/pkg/controller/build/schedule_pod.go
@@ -20,6 +20,7 @@ package build
import (
"context"
"sync"
+ "time"
"github.com/pkg/errors"
@@ -103,6 +104,9 @@ func (action *schedulePodAction) Handle(ctx context.Context, build *v1.Build) (*
if err := action.client.Create(ctx, pod); err != nil {
return nil, errors.Wrap(err, "cannot create build pod")
}
+
+ // Report the duration the Build has been waiting in the build queue
+ queueDuration.Observe(time.Now().Sub(build.CreationTimestamp.Time).Seconds())
}
build.Status.Phase = v1.BuildPhasePending
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index 86edee6..a48f346 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -21,6 +21,7 @@ import (
"context"
"fmt"
"sync"
+ "time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
@@ -91,6 +92,10 @@ func (action *scheduleRoutineAction) Handle(ctx context.Context, build *v1.Build
if err != nil {
return nil, err
}
+
+ // Report the duration the Build has been waiting in the build queue
+ queueDuration.Observe(time.Now().Sub(build.CreationTimestamp.Time).Seconds())
+
camelevent.NotifyBuildUpdated(ctx, action.client, action.recorder, build, target)
// Start the build asynchronously to avoid blocking the reconcile loop
[camel-k] 25/25: chore: Rebuild resources
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 6afbdc264190a49429712f1938cb740f6ba4c8d9
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Nov 3 10:53:57 2020 +0100
chore: Rebuild resources
---
deploy/resources.go | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/deploy/resources.go b/deploy/resources.go
index 162979f..a47f1d4 100644
--- a/deploy/resources.go
+++ b/deploy/resources.go
@@ -140,9 +140,9 @@ var assets = func() http.FileSystem {
"/crd-integration.yaml": &vfsgenÛ°CompressedFileInfo{
name: "crd-integration.yaml",
modTime: time.Time{},
- uncompressedSize: 12466,
+ uncompressedSize: 12872,
- compressedContent: []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xec\x1a\x4d\x73\xdb\xb8\xf5\xce\x5f\xf1\xc6\x3a\x24\x99\xb1\x28\xc7\x69\x76\x76\xd5\x93\xea\xc4\x53\x35\x89\xed\xb1\x94\xdd\xd9\x23\x04\x3e\x51\xa8\x41\x80\x05\x40\x29\x6e\xa7\xff\xbd\xf3\x00\x52\x22\x25\x52\x96\x65\x6f\x0f\x3b\xc2\x49\x02\xf1\xf0\xbe\x3f\xc9\x1e\xf4\x5f\x6f\x45\x3d\xf8\x2a\x38\x2a\x8b\x09\x38\x0d\x6e\x81\x30\xca\x19\x5f\x20\x4c\xf4\xdc\xad\x98\x41\xb8\xd6\x85\x4a\x98\x13\x5a\xc1\xdb\xd1\xe4\xfa\x1d\x [...]
+ compressedContent: []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xec\x1a\x5d\x73\xdb\xb8\xf1\x9d\xbf\x62\xc7\x7a\xb8\x64\xc6\xa2\x1c\xa7\xb9\xb9\x53\x9f\x54\x27\x9e\xaa\x71\x6c\x8f\xa5\xdc\xcd\x3d\x42\xe4\x8a\x42\x0d\x02\x2c\x00\x4a\xd1\x75\xfa\xdf\x3b\x0b\x90\x22\x29\x91\xb2\x4c\x27\x7d\xe8\x98\x4f\x22\xb8\x8b\xfd\x5e\xec\xae\x30\x80\xe1\xf7\x7b\x82\x01\xdc\xf0\x08\xa5\xc1\x18\xac\x02\xbb\x42\x98\x64\x2c\x5a\x21\xcc\xd4\xd2\x6e\x98\x46\xb8\x56\xb9\x8c\x99\xe5\x4a\xc2\x9b\xc9\xec\x [...]
},
"/crd-kamelet-binding.yaml": &vfsgenÛ°CompressedFileInfo{
name: "crd-kamelet-binding.yaml",
@@ -161,9 +161,23 @@ var assets = func() http.FileSystem {
"/operator-deployment.yaml": &vfsgenÛ°CompressedFileInfo{
name: "operator-deployment.yaml",
modTime: time.Time{},
- uncompressedSize: 2148,
+ uncompressedSize: 2395,
- compressedContent: []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xbc\x54\x41\x6f\xe3\x36\x13\xbd\xeb\x57\x3c\x58\x97\x5d\x20\xb6\x37\xdf\x77\x53\x4f\x6a\xe2\x20\x46\x53\xc9\xb0\xbc\x0d\xf6\x54\x4c\xa8\x91\x44\x84\x22\x55\x92\x8a\x56\xff\xbe\xa0\x6c\x27\x76\x36\x9b\xf6\x10\x94\x27\x9b\x33\xf3\xe6\xbd\x79\x23\xc6\x98\x7f\xdc\x89\x62\xdc\x49\xc1\xda\x71\x09\x6f\xe0\x1b\x46\xda\x91\x68\x18\x85\xa9\xfc\x40\x96\x71\x63\x7a\x5d\x92\x97\x46\xe3\x53\x5a\xdc\x7c\x46\xaf\x4b\xb6\x30\x9a\x61\x [...]
+ compressedContent: []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xc4\x55\xc1\x6e\xe3\x36\x10\xbd\xeb\x2b\x1e\xac\xcb\x2e\x10\xdb\x49\x8b\x02\x0b\xf5\xa4\x26\x4e\x63\x34\xb5\x0d\xcb\xdb\x60\x4f\x05\x4d\x8d\x25\x22\x14\x47\x25\x29\x7b\xd5\xaf\x2f\x28\x5b\x8e\xed\x4d\xd3\x1e\x02\x2c\x4f\x96\x66\xe6\xcd\x7b\x33\x4f\x74\x8c\xe1\xfb\x9d\x28\xc6\xa3\x92\x64\x1c\xe5\xf0\x0c\x5f\x12\xd2\x5a\xc8\x92\x90\xf1\xc6\xef\x84\x25\xdc\x73\x63\x72\xe1\x15\x1b\x7c\x48\xb3\xfb\x8f\x68\x4c\x4e\x16\x6c\x [...]
+ },
+ "/operator-pod-monitor.yaml": &vfsgenÛ°CompressedFileInfo{
+ name: "operator-pod-monitor.yaml",
+ modTime: time.Time{},
+ uncompressedSize: 301,
+
+ compressedContent: []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x8c\xcd\xbd\x4e\x43\x31\x0c\x86\xe1\x3d\x57\x61\x75\x3f\xa9\x58\xb3\xb3\x51\x89\x89\xdd\x38\x56\x1b\x35\xfe\x91\x63\x71\xfd\xe8\xf4\x54\x88\x81\x81\x31\xf1\xab\xe7\x2b\xe8\xe3\x83\x63\x0d\xd3\x06\x62\x3a\xd2\x62\xe8\xb5\x92\x05\xdb\xaa\x64\x72\xfe\x7a\x29\xf7\xa1\xbd\xc1\xbb\xf5\xcb\x51\x14\xe1\xc4\x8e\x89\xad\x00\x28\x0a\x37\x20\x14\x9e\xdb\x7d\x33\xe7\xc0\x3d\x01\x98\xf8\xc9\x73\xed\x09\x00\xba\x37\x38\x3d\xa3\xd3\x [...]
+ },
+ "/operator-prometheus-rule.yaml": &vfsgenÛ°CompressedFileInfo{
+ name: "operator-prometheus-rule.yaml",
+ modTime: time.Time{},
+ uncompressedSize: 4469,
+
+ compressedContent: []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xdc\x97\x5d\x6f\xda\x3e\x14\xc6\xef\xf9\x14\x47\x51\x2b\xd1\xbf\xfe\xa4\x09\x13\xbb\x88\xd4\x5d\xec\xa5\x37\xbb\xd9\x3a\x69\x37\xd3\x84\x1c\xe7\x04\x5c\xfc\x92\x1e\xdb\x6c\x15\xe3\xbb\x4f\x31\x65\x22\x6c\xbc\x15\xda\x0d\x7a\x53\xec\x38\x8f\xad\xf3\xfc\x78\x7c\x60\x95\xf8\x8c\x64\x85\xd1\x19\x28\xa3\x85\x33\x24\xf4\x20\xe6\x86\xd0\xd8\x98\x1b\x75\x39\x4e\x5b\x23\xa1\x8b\x0c\x3e\x90\x51\xe8\x86\xe8\xed\x8d\x97\xd8\x52\x [...]
},
"/operator-role-binding-events.yaml": &vfsgenÛ°CompressedFileInfo{
name: "operator-role-binding-events.yaml",
@@ -355,6 +369,8 @@ var assets = func() http.FileSystem {
fs["/crd-kamelet-binding.yaml"].(os.FileInfo),
fs["/crd-kamelet.yaml"].(os.FileInfo),
fs["/operator-deployment.yaml"].(os.FileInfo),
+ fs["/operator-pod-monitor.yaml"].(os.FileInfo),
+ fs["/operator-prometheus-rule.yaml"].(os.FileInfo),
fs["/operator-role-binding-events.yaml"].(os.FileInfo),
fs["/operator-role-binding-knative.yaml"].(os.FileInfo),
fs["/operator-role-binding-servicemonitors.yaml"].(os.FileInfo),
[camel-k] 07/25: feat(cli): Add an option to install a default
PodMonitor resource
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit dcc4347887f839a438bc0fbcc1099249815d65ea
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Mon Oct 19 15:20:37 2020 +0200
feat(cli): Add an option to install a default PodMonitor resource
---
deploy/operator-pod-monitor.yaml | 15 +++++++++++++++
pkg/client/fastmapper.go | 1 +
pkg/cmd/install.go | 7 +++++++
pkg/install/operator.go | 29 +++++++++++++++++++++++++----
4 files changed, 48 insertions(+), 4 deletions(-)
diff --git a/deploy/operator-pod-monitor.yaml b/deploy/operator-pod-monitor.yaml
new file mode 100644
index 0000000..07ed843
--- /dev/null
+++ b/deploy/operator-pod-monitor.yaml
@@ -0,0 +1,15 @@
+
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+ name: camel-k-operator
+ labels:
+ app: "camel-k"
+ camel.apache.org/component: operator
+spec:
+ selector:
+ matchLabels:
+ app: "camel-k"
+ camel.apache.org/component: operator
+ podMetricsEndpoints:
+ - port: metrics
\ No newline at end of file
diff --git a/pkg/client/fastmapper.go b/pkg/client/fastmapper.go
index 82b33b8..7055154 100644
--- a/pkg/client/fastmapper.go
+++ b/pkg/client/fastmapper.go
@@ -39,6 +39,7 @@ var FastMapperAllowedAPIGroups = map[string]bool{
"rbac.authorization.k8s.io": true,
"console.openshift.io": true, // OpenShift console resources
"operators.coreos.com": true, // Operator SDK OLM
+ "monitoring.coreos.com": true, // Prometheus resources
}
// newFastDiscoveryRESTMapper comes from https://github.com/kubernetes-sigs/controller-runtime/pull/592.
diff --git a/pkg/cmd/install.go b/pkg/cmd/install.go
index 3df7108..1ce3cce 100644
--- a/pkg/cmd/install.go
+++ b/pkg/cmd/install.go
@@ -118,6 +118,9 @@ func newCmdInstall(rootCmdOptions *RootCmdOptions) (*cobra.Command, *installCmdO
cmd.Flags().String("maven-settings", "", "Configure the source of the maven settings (configmap|secret:name[/key])")
cmd.Flags().StringArray("maven-repository", nil, "Add a maven repository")
+ // monitoring
+ cmd.Flags().Bool("monitoring", false, "To enable or disable the operator monitoring")
+
// save
cmd.Flags().Bool("save", false, "Save the install parameters into the default kamel configuration file (kamel-config.yaml)")
@@ -157,6 +160,7 @@ type installCmdOptions struct {
BuildTimeout string `mapstructure:"build-timeout"`
MavenRepositories []string `mapstructure:"maven-repositories"`
MavenSettings string `mapstructure:"maven-settings"`
+ Monitoring bool `mapstructure:"monitoring"`
Properties []string `mapstructure:"properties"`
TraitProfile string `mapstructure:"trait-profile"`
HTTPProxySecret string `mapstructure:"http-proxy-secret"`
@@ -246,6 +250,9 @@ func (o *installCmdOptions) install(cobraCmd *cobra.Command, _ []string) error {
Namespace: namespace,
Global: o.Global,
ClusterType: o.ClusterType,
+ Monitoring: install.OperatorMonitoringConfiguration{
+ Enabled: o.Monitoring,
+ },
}
err = install.OperatorOrCollect(o.Context, c, cfg, collection, o.Force)
if err != nil {
diff --git a/pkg/install/operator.go b/pkg/install/operator.go
index bfaa4d3..2b1ee78 100644
--- a/pkg/install/operator.go
+++ b/pkg/install/operator.go
@@ -19,14 +19,16 @@ package install
import (
"context"
- "errors"
"fmt"
"strings"
+ "github.com/pkg/errors"
+
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -46,11 +48,12 @@ type OperatorConfiguration struct {
Namespace string
Global bool
ClusterType string
+ Monitoring OperatorMonitoringConfiguration
}
-// Operator installs the operator resources in the given namespace
-func Operator(ctx context.Context, c client.Client, cfg OperatorConfiguration, force bool) error {
- return OperatorOrCollect(ctx, c, cfg, nil, force)
+// OperatorMonitoringConfiguration --
+type OperatorMonitoringConfiguration struct {
+ Enabled bool
}
// OperatorOrCollect installs the operator resources or adds them to the collector if present
@@ -166,6 +169,18 @@ func OperatorOrCollect(ctx context.Context, c client.Client, cfg OperatorConfigu
fmt.Println("Warning: the operator will not be able to lookup strimzi kafka resources. Try installing as cluster-admin to allow the lookup of strimzi kafka resources.")
}
+ if cfg.Monitoring.Enabled {
+ if err := installPodMonitor(ctx, c, cfg.Namespace, customizer, collection, force); err != nil {
+ if k8serrors.IsForbidden(err) {
+ fmt.Println("Warning: the creation of PodMonitor resources is not allowed. Try installing as cluster-admin to allow the creation of PodMonitor resources.")
+ } else if meta.IsNoMatchError(errors.Cause(err)) {
+ fmt.Println("Warning: the creation of the PodMonitor resource has failed: ", err)
+ } else {
+ return err
+ }
+ }
+ }
+
return nil
}
@@ -215,6 +230,12 @@ func installStrimziBindings(ctx context.Context, c client.Client, namespace stri
)
}
+func installPodMonitor(ctx context.Context, c client.Client, namespace string, customizer ResourceCustomizer, collection *kubernetes.Collection, force bool) error {
+ return ResourcesOrCollect(ctx, c, namespace, collection, force, customizer,
+ "operator-pod-monitor.yaml",
+ )
+}
+
// Platform installs the platform custom resource
// nolint: lll
func Platform(ctx context.Context, c client.Client, clusterType string, namespace string, registry v1.IntegrationPlatformRegistrySpec) (*v1.IntegrationPlatform, error) {
[camel-k] 24/25: feat: Add an install option to configure the
health endpoint port
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 35cf0cca9e6340b0fee93fb38a1dd605a6e65442
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Nov 3 10:43:12 2020 +0100
feat: Add an install option to configure the health endpoint port
---
pkg/cmd/install.go | 11 +++++++++--
pkg/install/operator.go | 12 ++++++++++++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/pkg/cmd/install.go b/pkg/cmd/install.go
index ac4ff34..4f43f2f 100644
--- a/pkg/cmd/install.go
+++ b/pkg/cmd/install.go
@@ -101,7 +101,7 @@ func newCmdInstall(rootCmdOptions *RootCmdOptions) (*cobra.Command, *installCmdO
cmd.Flags().String("http-proxy-secret", "", "Configure the source of the secret holding HTTP proxy server details "+
"(HTTP_PROXY|HTTPS_PROXY|NO_PROXY)")
- // olm
+ // OLM
cmd.Flags().Bool("olm", true, "Try to install everything via OLM (Operator Lifecycle Manager) if available")
cmd.Flags().String("olm-operator-name", olm.DefaultOperatorName, "Name of the Camel K operator in the OLM source or marketplace")
cmd.Flags().String("olm-package", olm.DefaultPackage, "Name of the Camel K package in the OLM source or marketplace")
@@ -113,11 +113,14 @@ func newCmdInstall(rootCmdOptions *RootCmdOptions) (*cobra.Command, *installCmdO
cmd.Flags().String("olm-global-namespace", olm.DefaultGlobalNamespace, "A namespace containing an OperatorGroup that defines global scope for the "+
"operator (used in combination with the --global flag)")
- // maven settings
+ // Maven settings
cmd.Flags().String("local-repository", "", "Location of the local maven repository")
cmd.Flags().String("maven-settings", "", "Configure the source of the maven settings (configmap|secret:name[/key])")
cmd.Flags().StringArray("maven-repository", nil, "Add a maven repository")
+ // health
+ cmd.Flags().Int("health-port", 8081, "The port of the health endpoint")
+
// monitoring
cmd.Flags().Bool("monitoring", false, "To enable or disable the operator monitoring")
cmd.Flags().Int("monitoring-port", 8080, "The port of the metrics endpoint")
@@ -161,6 +164,7 @@ type installCmdOptions struct {
BuildTimeout string `mapstructure:"build-timeout"`
MavenRepositories []string `mapstructure:"maven-repositories"`
MavenSettings string `mapstructure:"maven-settings"`
+ HealthPort int32 `mapstructure:"health-port"`
Monitoring bool `mapstructure:"monitoring"`
MonitoringPort int32 `mapstructure:"monitoring-port"`
Properties []string `mapstructure:"properties"`
@@ -252,6 +256,9 @@ func (o *installCmdOptions) install(cobraCmd *cobra.Command, _ []string) error {
Namespace: namespace,
Global: o.Global,
ClusterType: o.ClusterType,
+ Health: install.OperatorHealthConfiguration{
+ Port: o.HealthPort,
+ },
Monitoring: install.OperatorMonitoringConfiguration{
Enabled: o.Monitoring,
Port: o.MonitoringPort,
diff --git a/pkg/install/operator.go b/pkg/install/operator.go
index 2999144..7eb21c3 100644
--- a/pkg/install/operator.go
+++ b/pkg/install/operator.go
@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/util/intstr"
"github.com/apache/camel-k/deploy"
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
@@ -48,9 +49,15 @@ type OperatorConfiguration struct {
Namespace string
Global bool
ClusterType string
+ Health OperatorHealthConfiguration
Monitoring OperatorMonitoringConfiguration
}
+// OperatorHealthConfiguration --
+type OperatorHealthConfiguration struct {
+ Port int32
+}
+
// OperatorMonitoringConfiguration --
type OperatorMonitoringConfiguration struct {
Enabled bool
@@ -78,9 +85,14 @@ func OperatorOrCollect(ctx context.Context, c client.Client, cfg OperatorConfigu
if d, ok := o.(*appsv1.Deployment); ok {
if d.Labels["camel.apache.org/component"] == "operator" {
+ // Metrics endpoint port
d.Spec.Template.Spec.Containers[0].Args = append(d.Spec.Template.Spec.Containers[0].Args,
fmt.Sprintf("--monitoring-port=%d", cfg.Monitoring.Port))
d.Spec.Template.Spec.Containers[0].Ports[0].ContainerPort = cfg.Monitoring.Port
+ // Health endpoint port
+ d.Spec.Template.Spec.Containers[0].Args = append(d.Spec.Template.Spec.Containers[0].Args,
+ fmt.Sprintf("--health-port=%d", cfg.Health.Port))
+ d.Spec.Template.Spec.Containers[0].LivenessProbe.HTTPGet.Port = intstr.FromInt(int(cfg.Health.Port))
}
}
[camel-k] 18/25: fix: Use attempt time to compute queuing duration
on recovery
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 2998f3a9bdeab6b0493185e4208296cb66b6d195
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 18:40:31 2020 +0200
fix: Use attempt time to compute queuing duration on recovery
---
pkg/controller/build/metrics.go | 8 ++++++++
pkg/controller/build/schedule_pod.go | 2 +-
pkg/controller/build/schedule_routine.go | 2 +-
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index 20b052f..16fd330 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -102,3 +102,11 @@ func getBuildAttemptFor(build *v1.Build) (int, int) {
}
return attempt, attemptMax
}
+
+func getBuildQueuingTime(build *v1.Build) time.Time {
+ queuingTime := build.CreationTimestamp.Time
+ if failure := build.Status.Failure; failure != nil {
+ queuingTime = failure.Recovery.AttemptTime.Time
+ }
+ return queuingTime
+}
diff --git a/pkg/controller/build/schedule_pod.go b/pkg/controller/build/schedule_pod.go
index e2bf606..0e17694 100644
--- a/pkg/controller/build/schedule_pod.go
+++ b/pkg/controller/build/schedule_pod.go
@@ -106,7 +106,7 @@ func (action *schedulePodAction) Handle(ctx context.Context, build *v1.Build) (*
}
// Report the duration the Build has been waiting in the build queue
- queueDuration.Observe(time.Now().Sub(build.CreationTimestamp.Time).Seconds())
+ queueDuration.Observe(time.Now().Sub(getBuildQueuingTime(build)).Seconds())
}
build.Status.Phase = v1.BuildPhasePending
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index 56d0c6f..7797991 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -94,7 +94,7 @@ func (action *scheduleRoutineAction) Handle(ctx context.Context, build *v1.Build
}
// Report the duration the Build has been waiting in the build queue
- queueDuration.Observe(time.Now().Sub(build.CreationTimestamp.Time).Seconds())
+ queueDuration.Observe(time.Now().Sub(getBuildQueuingTime(build)).Seconds())
camelevent.NotifyBuildUpdated(ctx, action.client, action.recorder, build, target)
[camel-k] 03/25: feat: Add build attempt counter metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 2572ff7485b8bb46538716c728544209b420f3f4
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 13 11:53:28 2020 +0200
feat: Add build attempt counter metric
---
pkg/controller/build/metrics.go | 15 ++++++++++++---
pkg/controller/build/monitor_pod.go | 4 ++++
pkg/controller/build/monitor_routine.go | 2 +-
pkg/controller/build/recovery.go | 1 +
pkg/controller/build/schedule_routine.go | 4 ++++
5 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index b61034c..64286ce 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -25,9 +25,18 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
-const buildResult = "result"
+const buildResultLabel = "result"
var (
+ buildAttempt = prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Name: "camel_k_build_attempt",
+ Help: "Camel K build attempt",
+ },
+ []string{
+ buildResultLabel,
+ })
+
buildDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "camel_k_build_duration_seconds",
@@ -42,7 +51,7 @@ var (
},
},
[]string{
- buildResult,
+ buildResultLabel,
},
)
@@ -63,5 +72,5 @@ var (
func init() {
// Register custom metrics with the global prometheus registry
- metrics.Registry.MustRegister(buildDuration, queueDuration)
+ metrics.Registry.MustRegister(buildAttempt, buildDuration, queueDuration)
}
diff --git a/pkg/controller/build/monitor_pod.go b/pkg/controller/build/monitor_pod.go
index 23ebbee..15b3f87 100644
--- a/pkg/controller/build/monitor_pod.go
+++ b/pkg/controller/build/monitor_pod.go
@@ -69,6 +69,8 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
build.Status.Phase = v1.BuildPhaseSucceeded
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
build.Status.Duration = duration.String()
+ // Account for the Build metrics
+ buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
buildDuration.WithLabelValues(string(build.Status.Phase)).Observe(duration.Seconds())
for _, task := range build.Spec.Tasks {
if task.Image != nil {
@@ -88,6 +90,8 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
build.Status.Phase = v1.BuildPhaseFailed
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
build.Status.Duration = duration.String()
+ // Account for the Build metrics
+ buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
buildDuration.WithLabelValues(string(build.Status.Phase)).Observe(duration.Seconds())
}
diff --git a/pkg/controller/build/monitor_routine.go b/pkg/controller/build/monitor_routine.go
index 7bdf769..c0ccd65 100644
--- a/pkg/controller/build/monitor_routine.go
+++ b/pkg/controller/build/monitor_routine.go
@@ -53,7 +53,7 @@ func (action *monitorRoutineAction) Handle(ctx context.Context, build *v1.Build)
// and recover the build if it's missing. This can happen when the operator
// stops abruptly and restarts or the build status update fails.
build.Status.Phase = v1.BuildPhaseFailed
-
+ buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
return build, nil
}
diff --git a/pkg/controller/build/recovery.go b/pkg/controller/build/recovery.go
index 0c6cc9c..b8d7bb8 100644
--- a/pkg/controller/build/recovery.go
+++ b/pkg/controller/build/recovery.go
@@ -69,6 +69,7 @@ func (action *errorRecoveryAction) Handle(ctx context.Context, build *v1.Build)
if build.Status.Failure.Recovery.Attempt >= build.Status.Failure.Recovery.AttemptMax {
build.Status.Phase = v1.BuildPhaseError
+ buildAttempt.WithLabelValues(string(build.Status.Phase)).Inc()
return build, nil
}
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index a48f346..f0e22a0 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -128,6 +128,8 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
task.GetName()),
Duration: duration.String(),
}
+ // Account for the Build metrics
+ buildAttempt.WithLabelValues(string(status.Phase)).Inc()
buildDuration.WithLabelValues(string(status.Phase)).Observe(duration.Seconds())
_ = action.updateBuildStatus(ctx, build, status)
break
@@ -142,6 +144,8 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
if lastTask || taskFailed {
duration := metav1.Now().Sub(build.Status.StartedAt.Time)
status.Duration = duration.String()
+ // Account for the Build metrics
+ buildAttempt.WithLabelValues(string(status.Phase)).Inc()
buildDuration.WithLabelValues(string(status.Phase)).Observe(duration.Seconds())
}
err := action.updateBuildStatus(ctx, build, status)
[camel-k] 11/25: feat: Add default alerting rules for build
duration SLOs
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 71c947a18146fdbf0f742160bbd7143164ad4465
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 11:28:06 2020 +0200
feat: Add default alerting rules for build duration SLOs
---
deploy/operator-prometheus-rule.yaml | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
index 6947505..6a009dd 100644
--- a/deploy/operator-prometheus-rule.yaml
+++ b/deploy/operator-prometheus-rule.yaml
@@ -36,3 +36,35 @@ spec:
message: |
{{ printf "%0.0f" $value }}% of the reconciliation requests
for {{ $labels.job }} have failed.
+ - alert: CamelKSuccessBuildDuration2m
+ expr: |
+ (
+ 1 - sum(rate(camel_k_build_duration_seconds_bucket{le="120",result="Succeeded"}[5m])) by (job)
+ /
+ sum(rate(camel_k_build_duration_seconds_count{result="Succeeded"}[5m])) by (job)
+ )
+ * 100
+ > 10
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the successful builds
+ for {{ $labels.job }} have their duration above 2m.
+ - alert: CamelKSuccessBuildDuration5m
+ expr: |
+ (
+ 1 - sum(rate(camel_k_build_duration_seconds_bucket{le="300",result="Succeeded"}[5m])) by (job)
+ /
+ sum(rate(camel_k_build_duration_seconds_count{result="Succeeded"}[5m])) by (job)
+ )
+ * 100
+ > 1
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the successful builds
+ for {{ $labels.job }} have their duration above 5m.
[camel-k] 19/25: feat: Add time to first integration readiness
metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 4cca10c96fc34961278eb6405c23a90b92badfaf
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 27 12:22:28 2020 +0100
feat: Add time to first integration readiness metric
---
deploy/crd-integration.yaml | 9 ++++
.../integrations.camel.apache.org.crd.yaml | 9 ++++
helm/camel-k/crds/crd-integration.yaml | 9 ++++
pkg/apis/camel/v1/integration_types.go | 4 ++
pkg/apis/camel/v1/integration_types_support.go | 52 +++++++++++++---------
pkg/apis/camel/v1/zz_generated.deepcopy.go | 8 ++++
pkg/controller/integration/error.go | 1 +
pkg/controller/integration/initialize.go | 6 +++
pkg/controller/integration/metrics.go | 47 +++++++++++++++++++
pkg/controller/integration/monitor.go | 18 ++++++--
10 files changed, 139 insertions(+), 24 deletions(-)
diff --git a/deploy/crd-integration.yaml b/deploy/crd-integration.yaml
index 73753c7..a6ebe04 100644
--- a/deploy/crd-integration.yaml
+++ b/deploy/crd-integration.yaml
@@ -194,6 +194,10 @@ spec:
description: IntegrationCondition describes the state of a resource
at a certain point.
properties:
+ firstTruthyTime:
+ description: First time the condition status transitioned to True.
+ format: date-time
+ type: string
lastTransitionTime:
description: Last time the condition transitioned from one status
to another.
@@ -331,6 +335,11 @@ spec:
type: string
kit:
type: string
+ lastInitTimestamp:
+ description: The timestamp representing the last time when this integration
+ was initialized.
+ format: date-time
+ type: string
phase:
description: IntegrationPhase --
type: string
diff --git a/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/integrations.camel.apache.org.crd.yaml b/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/integrations.camel.apache.org.crd.yaml
index 73753c7..a6ebe04 100644
--- a/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/integrations.camel.apache.org.crd.yaml
+++ b/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/integrations.camel.apache.org.crd.yaml
@@ -194,6 +194,10 @@ spec:
description: IntegrationCondition describes the state of a resource
at a certain point.
properties:
+ firstTruthyTime:
+ description: First time the condition status transitioned to True.
+ format: date-time
+ type: string
lastTransitionTime:
description: Last time the condition transitioned from one status
to another.
@@ -331,6 +335,11 @@ spec:
type: string
kit:
type: string
+ lastInitTimestamp:
+ description: The timestamp representing the last time when this integration
+ was initialized.
+ format: date-time
+ type: string
phase:
description: IntegrationPhase --
type: string
diff --git a/helm/camel-k/crds/crd-integration.yaml b/helm/camel-k/crds/crd-integration.yaml
index 73753c7..a6ebe04 100644
--- a/helm/camel-k/crds/crd-integration.yaml
+++ b/helm/camel-k/crds/crd-integration.yaml
@@ -194,6 +194,10 @@ spec:
description: IntegrationCondition describes the state of a resource
at a certain point.
properties:
+ firstTruthyTime:
+ description: First time the condition status transitioned to True.
+ format: date-time
+ type: string
lastTransitionTime:
description: Last time the condition transitioned from one status
to another.
@@ -331,6 +335,11 @@ spec:
type: string
kit:
type: string
+ lastInitTimestamp:
+ description: The timestamp representing the last time when this integration
+ was initialized.
+ format: date-time
+ type: string
phase:
description: IntegrationPhase --
type: string
diff --git a/pkg/apis/camel/v1/integration_types.go b/pkg/apis/camel/v1/integration_types.go
index 91d8c57..b5a57cd 100644
--- a/pkg/apis/camel/v1/integration_types.go
+++ b/pkg/apis/camel/v1/integration_types.go
@@ -60,6 +60,8 @@ type IntegrationStatus struct {
Replicas *int32 `json:"replicas,omitempty"`
Selector string `json:"selector,omitempty"`
Capabilities []string `json:"capabilities,omitempty"`
+ // The timestamp representing the last time when this integration was initialized.
+ InitializationTimestamp *metav1.Time `json:"lastInitTimestamp,omitempty"`
}
// +genclient
@@ -274,6 +276,8 @@ type IntegrationCondition struct {
LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`
// Last time the condition transitioned from one status to another.
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"`
+ // First time the condition status transitioned to True.
+ FirstTruthyTime *metav1.Time `json:"firstTruthyTime,omitempty"`
// The reason for the condition's last transition.
Reason string `json:"reason,omitempty"`
// A human readable message indicating details about the transition.
diff --git a/pkg/apis/camel/v1/integration_types_support.go b/pkg/apis/camel/v1/integration_types_support.go
index 2e807d3..591afda 100644
--- a/pkg/apis/camel/v1/integration_types_support.go
+++ b/pkg/apis/camel/v1/integration_types_support.go
@@ -282,24 +282,20 @@ func (in *IntegrationStatus) GetCondition(condType IntegrationConditionType) *In
// SetCondition --
func (in *IntegrationStatus) SetCondition(condType IntegrationConditionType, status corev1.ConditionStatus, reason string, message string) {
in.SetConditions(IntegrationCondition{
- Type: condType,
- Status: status,
- LastUpdateTime: metav1.Now(),
- LastTransitionTime: metav1.Now(),
- Reason: reason,
- Message: message,
+ Type: condType,
+ Status: status,
+ Reason: reason,
+ Message: message,
})
}
// SetErrorCondition --
func (in *IntegrationStatus) SetErrorCondition(condType IntegrationConditionType, reason string, err error) {
in.SetConditions(IntegrationCondition{
- Type: condType,
- Status: corev1.ConditionFalse,
- LastUpdateTime: metav1.Now(),
- LastTransitionTime: metav1.Now(),
- Reason: reason,
- Message: err.Error(),
+ Type: condType,
+ Status: corev1.ConditionFalse,
+ Reason: reason,
+ Message: err.Error(),
})
}
@@ -308,22 +304,36 @@ func (in *IntegrationStatus) SetErrorCondition(condType IntegrationConditionType
// If a condition that we are about to add already exists and has the same status and
// reason then we are not going to update.
func (in *IntegrationStatus) SetConditions(conditions ...IntegrationCondition) {
+ now := metav1.Now()
for _, condition := range conditions {
+ currentCond := in.GetCondition(condition.Type)
+
+ if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
+ return
+ }
+
if condition.LastUpdateTime.IsZero() {
- condition.LastUpdateTime = metav1.Now()
+ condition.LastUpdateTime = now
}
+
if condition.LastTransitionTime.IsZero() {
- condition.LastTransitionTime = metav1.Now()
+ // We may want not to set it when the current condition is nil
+ condition.LastTransitionTime = now
}
- currentCond := in.GetCondition(condition.Type)
-
- if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
- return
+ if (condition.FirstTruthyTime == nil || condition.FirstTruthyTime.IsZero()) && condition.Status == corev1.ConditionTrue {
+ condition.FirstTruthyTime = &now
}
- // Do not update lastTransitionTime if the status of the condition doesn't change.
- if currentCond != nil && currentCond.Status == condition.Status {
- condition.LastTransitionTime = currentCond.LastTransitionTime
+
+ if currentCond != nil {
+ if currentCond.Status == condition.Status {
+ // Do not update LastTransitionTime if the status of the condition doesn't change
+ condition.LastTransitionTime = currentCond.LastTransitionTime
+ }
+ if !(currentCond.FirstTruthyTime != nil || currentCond.FirstTruthyTime.IsZero()) {
+ // Preserve FirstTruthyTime
+ condition.FirstTruthyTime = currentCond.FirstTruthyTime.DeepCopy()
+ }
}
in.RemoveCondition(condition.Type)
diff --git a/pkg/apis/camel/v1/zz_generated.deepcopy.go b/pkg/apis/camel/v1/zz_generated.deepcopy.go
index e734bde..a38ad74 100644
--- a/pkg/apis/camel/v1/zz_generated.deepcopy.go
+++ b/pkg/apis/camel/v1/zz_generated.deepcopy.go
@@ -663,6 +663,10 @@ func (in *IntegrationCondition) DeepCopyInto(out *IntegrationCondition) {
*out = *in
in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime)
in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime)
+ if in.FirstTruthyTime != nil {
+ in, out := &in.FirstTruthyTime, &out.FirstTruthyTime
+ *out = (*in).DeepCopy()
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntegrationCondition.
@@ -1152,6 +1156,10 @@ func (in *IntegrationStatus) DeepCopyInto(out *IntegrationStatus) {
*out = make([]string, len(*in))
copy(*out, *in)
}
+ if in.InitializationTimestamp != nil {
+ in, out := &in.InitializationTimestamp, &out.InitializationTimestamp
+ *out = (*in).DeepCopy()
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IntegrationStatus.
diff --git a/pkg/controller/integration/error.go b/pkg/controller/integration/error.go
index 0e18538..9e3ad90 100644
--- a/pkg/controller/integration/error.go
+++ b/pkg/controller/integration/error.go
@@ -52,6 +52,7 @@ func (action *errorAction) Handle(ctx context.Context, integration *v1.Integrati
integration.Status.Digest = hash
integration.Status.Phase = v1.IntegrationPhaseInitialization
+ integration.Status.InitializationTimestamp = nil
return integration, nil
}
diff --git a/pkg/controller/integration/initialize.go b/pkg/controller/integration/initialize.go
index ed98d88..4698b71 100644
--- a/pkg/controller/integration/initialize.go
+++ b/pkg/controller/integration/initialize.go
@@ -20,6 +20,8 @@ package integration
import (
"context"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/trait"
"github.com/apache/camel-k/pkg/util/defaults"
@@ -55,6 +57,10 @@ func (action *initializeAction) Handle(ctx context.Context, integration *v1.Inte
integration.Status.Phase = v1.IntegrationPhaseBuildingKit
integration.SetIntegrationKit(&kit)
integration.Status.Version = defaults.Version
+ if timestamp := integration.Status.InitializationTimestamp; timestamp == nil || timestamp.IsZero() {
+ now := metav1.Now()
+ integration.Status.InitializationTimestamp = &now
+ }
return integration, nil
}
diff --git a/pkg/controller/integration/metrics.go b/pkg/controller/integration/metrics.go
new file mode 100644
index 0000000..fb6fff3
--- /dev/null
+++ b/pkg/controller/integration/metrics.go
@@ -0,0 +1,47 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package integration
+
+import (
+ "time"
+
+ "sigs.k8s.io/controller-runtime/pkg/metrics"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+ timeToFirstReadiness = prometheus.NewHistogram(
+ prometheus.HistogramOpts{
+ Name: "camel_k_integration_first_readiness_seconds",
+ Help: "Camel K integration time to first readiness",
+ Buckets: []float64{
+ 5 * time.Second.Seconds(),
+ 10 * time.Second.Seconds(),
+ 30 * time.Second.Seconds(),
+ 1 * time.Minute.Seconds(),
+ 2 * time.Minute.Seconds(),
+ },
+ },
+ )
+)
+
+func init() {
+ // Register custom metrics with the global prometheus registry
+ metrics.Registry.MustRegister(timeToFirstReadiness)
+}
diff --git a/pkg/controller/integration/monitor.go b/pkg/controller/integration/monitor.go
index 2b55f59..654d78b 100644
--- a/pkg/controller/integration/monitor.go
+++ b/pkg/controller/integration/monitor.go
@@ -21,6 +21,7 @@ import (
"context"
appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
@@ -62,6 +63,7 @@ func (action *monitorAction) Handle(ctx context.Context, integration *v1.Integra
integration.Status.Profile = integration.Spec.Profile
}
integration.Status.Version = defaults.Version
+ integration.Status.InitializationTimestamp = nil
return integration, nil
}
@@ -72,8 +74,9 @@ func (action *monitorAction) Handle(ctx context.Context, integration *v1.Integra
return nil, err
}
- // Enforce the scale sub-resource label selector
- // It is used by the HPA that queries the scale sub-resource endpoint to list the pods owned by the integration
+ // Enforce the scale sub-resource label selector.
+ // It is used by the HPA that queries the scale sub-resource endpoint,
+ // to list the pods owned by the integration.
integration.Status.Selector = v1.IntegrationLabel + "=" + integration.Name
// Check replicas
@@ -96,9 +99,18 @@ func (action *monitorAction) Handle(ctx context.Context, integration *v1.Integra
}
}
- // Mirror ready condition from the sub resource (e.g.ReplicaSet, Deployment, CronJob, ...) to the integration
+ // Mirror ready condition from the owned resource (e.g.ReplicaSet, Deployment, CronJob, ...)
+ // into the owning integration
+ previous := integration.Status.GetCondition(v1.IntegrationConditionReady)
kubernetes.MirrorReadyCondition(ctx, action.client, integration)
+ if next := integration.Status.GetCondition(v1.IntegrationConditionReady);
+ (previous == nil || previous.FirstTruthyTime == nil || previous.FirstTruthyTime.IsZero()) &&
+ next != nil && next.Status == corev1.ConditionTrue && !(next.FirstTruthyTime == nil || next.FirstTruthyTime.IsZero()) {
+ // Observe the time to first readiness metric
+ timeToFirstReadiness.Observe(next.FirstTruthyTime.Time.Sub(integration.Status.InitializationTimestamp.Time).Seconds())
+ }
+
return integration, nil
}
[camel-k] 09/25: feat: Install alerting rule for reconciliation
request failure SLO
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 2456f03a271ae536a47809d0faeb2f7f059686e3
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 20 16:18:43 2020 +0200
feat: Install alerting rule for reconciliation request failure SLO
---
deploy/operator-prometheus-rule.yaml | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
index f88a35e..73b8722 100644
--- a/deploy/operator-prometheus-rule.yaml
+++ b/deploy/operator-prometheus-rule.yaml
@@ -13,4 +13,18 @@ spec:
severity: warning
annotations:
message: |
- {{ printf "%0.0f" $value }}% of the reconciliation requests for {{ $labels.job }} have their duration above 0.5s.
\ No newline at end of file
+ {{ printf "%0.0f" $value }}% of the reconciliation requests for {{ $labels.job }} have their duration above 0.5s.
+ - alert: CamelKReconciliationFailure
+ expr: |
+ sum(rate(camel_k_reconciliation_duration_seconds_count{result="Errored"}[5m])) by (job)
+ /
+ sum(rate(camel_k_reconciliation_duration_seconds_count[5m])) by (job)
+ * 100
+ > 1
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the reconciliation requests
+ for {{ $labels.job }} have failed.
[camel-k] 23/25: feat: Enable operator liveness health check
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit ceee5e3a881bc552fbe5d2877b655be80c1e52a2
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Nov 3 10:38:18 2020 +0100
feat: Enable operator liveness health check
---
.../camel-k.v1.3.0-snapshot.clusterserviceversion.yaml | 6 ++++++
deploy/operator-deployment.yaml | 10 ++++++++--
helm/camel-k/templates/operator.yaml | 6 ++++++
pkg/cmd/operator.go | 4 +++-
pkg/cmd/operator/operator.go | 17 ++++++++++++-----
5 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml b/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml
index fdf069e..1178c34 100644
--- a/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml
+++ b/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml
@@ -314,6 +314,12 @@ spec:
fieldPath: metadata.namespace
image: docker.io/apache/camel-k:1.3.0-SNAPSHOT
imagePullPolicy: IfNotPresent
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 5
name: camel-k-operator
ports:
- containerPort: 8080
diff --git a/deploy/operator-deployment.yaml b/deploy/operator-deployment.yaml
index ce8e9e1..52e3046 100644
--- a/deploy/operator-deployment.yaml
+++ b/deploy/operator-deployment.yaml
@@ -42,8 +42,8 @@ spec:
image: docker.io/apache/camel-k:1.3.0-SNAPSHOT
imagePullPolicy: IfNotPresent
command:
- - kamel
- - operator
+ - kamel
+ - operator
ports:
- containerPort: 8080
name: metrics
@@ -63,3 +63,9 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 5
diff --git a/helm/camel-k/templates/operator.yaml b/helm/camel-k/templates/operator.yaml
index 3a870ed..9a68ab0 100644
--- a/helm/camel-k/templates/operator.yaml
+++ b/helm/camel-k/templates/operator.yaml
@@ -58,6 +58,12 @@ spec:
fieldPath: metadata.namespace
image: {{ .Values.operator.image }}
imagePullPolicy: IfNotPresent
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 5
name: camel-k-operator
ports:
- containerPort: 8080
diff --git a/pkg/cmd/operator.go b/pkg/cmd/operator.go
index 2d69d01..276a1d4 100644
--- a/pkg/cmd/operator.go
+++ b/pkg/cmd/operator.go
@@ -36,15 +36,17 @@ func newCmdOperator() (*cobra.Command, *operatorCmdOptions) {
Run: options.run,
}
+ cmd.Flags().Int32("health-port", 8081, "The port of the health endpoint")
cmd.Flags().Int32("monitoring-port", 8080, "The port of the metrics endpoint")
return &cmd, &options
}
type operatorCmdOptions struct {
+ HealthPort int32 `mapstructure:"health-port"`
MonitoringPort int32 `mapstructure:"monitoring-port"`
}
func (o *operatorCmdOptions) run(_ *cobra.Command, _ []string) {
- operator.Run(o.MonitoringPort)
+ operator.Run(o.HealthPort, o.MonitoringPort)
}
diff --git a/pkg/cmd/operator/operator.go b/pkg/cmd/operator/operator.go
index ec59ea1..2b1cc54 100644
--- a/pkg/cmd/operator/operator.go
+++ b/pkg/cmd/operator/operator.go
@@ -34,6 +34,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/config"
+ "sigs.k8s.io/controller-runtime/pkg/healthz"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
@@ -64,8 +65,7 @@ func printVersion() {
}
// Run starts the Camel K operator
-func Run(monitoringPort int32) {
- rand.Seed(time.Now().UTC().UnixNano())
+func Run(healthPort, monitoringPort int32) {rand.Seed(time.Now().UTC().UnixNano())
flag.Parse()
@@ -124,15 +124,22 @@ func Run(monitoringPort int32) {
// Create a new Cmd to provide shared dependencies and start components
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
- Namespace: namespace,
- EventBroadcaster: eventBroadcaster,
- MetricsBindAddress: ":" + strconv.Itoa(int(monitoringPort)),
+ Namespace: namespace,
+ EventBroadcaster: eventBroadcaster,
+ HealthProbeBindAddress: ":" + strconv.Itoa(int(healthPort)),
+ MetricsBindAddress: ":" + strconv.Itoa(int(monitoringPort)),
})
if err != nil {
log.Error(err, "")
os.Exit(1)
}
+ // Add health check
+ if err := mgr.AddHealthzCheck("health-probe", healthz.Ping); err != nil {
+ log.Error(err, "Unable add liveness check")
+ os.Exit(1)
+ }
+
log.Info("Registering Components.")
// Setup Scheme for all resources
[camel-k] 22/25: feat: Add an install option to configure the
operator metrics endpoint port
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 5142dc9f1a892baaa5258cf340fd20b591ae4172
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 28 12:25:21 2020 +0100
feat: Add an install option to configure the operator metrics endpoint port
---
pkg/cmd/install.go | 21 ++++++++++++---------
pkg/install/operator.go | 9 +++++++++
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/pkg/cmd/install.go b/pkg/cmd/install.go
index 1ce3cce..ac4ff34 100644
--- a/pkg/cmd/install.go
+++ b/pkg/cmd/install.go
@@ -24,9 +24,14 @@ import (
"strings"
"time"
- "github.com/apache/camel-k/pkg/util/olm"
- "github.com/apache/camel-k/pkg/util/registry"
"go.uber.org/multierr"
+
+ "github.com/pkg/errors"
+ "github.com/spf13/cobra"
+ "github.com/spf13/viper"
+
+ corev1 "k8s.io/api/core/v1"
+ k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -36,14 +41,9 @@ import (
"github.com/apache/camel-k/pkg/client"
"github.com/apache/camel-k/pkg/install"
"github.com/apache/camel-k/pkg/util/kubernetes"
+ "github.com/apache/camel-k/pkg/util/olm"
+ "github.com/apache/camel-k/pkg/util/registry"
"github.com/apache/camel-k/pkg/util/watch"
-
- "github.com/pkg/errors"
- "github.com/spf13/cobra"
- "github.com/spf13/viper"
-
- corev1 "k8s.io/api/core/v1"
- k8serrors "k8s.io/apimachinery/pkg/api/errors"
)
func newCmdInstall(rootCmdOptions *RootCmdOptions) (*cobra.Command, *installCmdOptions) {
@@ -120,6 +120,7 @@ func newCmdInstall(rootCmdOptions *RootCmdOptions) (*cobra.Command, *installCmdO
// monitoring
cmd.Flags().Bool("monitoring", false, "To enable or disable the operator monitoring")
+ cmd.Flags().Int("monitoring-port", 8080, "The port of the metrics endpoint")
// save
cmd.Flags().Bool("save", false, "Save the install parameters into the default kamel configuration file (kamel-config.yaml)")
@@ -161,6 +162,7 @@ type installCmdOptions struct {
MavenRepositories []string `mapstructure:"maven-repositories"`
MavenSettings string `mapstructure:"maven-settings"`
Monitoring bool `mapstructure:"monitoring"`
+ MonitoringPort int32 `mapstructure:"monitoring-port"`
Properties []string `mapstructure:"properties"`
TraitProfile string `mapstructure:"trait-profile"`
HTTPProxySecret string `mapstructure:"http-proxy-secret"`
@@ -252,6 +254,7 @@ func (o *installCmdOptions) install(cobraCmd *cobra.Command, _ []string) error {
ClusterType: o.ClusterType,
Monitoring: install.OperatorMonitoringConfiguration{
Enabled: o.Monitoring,
+ Port: o.MonitoringPort,
},
}
err = install.OperatorOrCollect(o.Context, c, cfg, collection, o.Force)
diff --git a/pkg/install/operator.go b/pkg/install/operator.go
index ca6cd3e..2999144 100644
--- a/pkg/install/operator.go
+++ b/pkg/install/operator.go
@@ -54,6 +54,7 @@ type OperatorConfiguration struct {
// OperatorMonitoringConfiguration --
type OperatorMonitoringConfiguration struct {
Enabled bool
+ Port int32
}
// OperatorOrCollect installs the operator resources or adds them to the collector if present
@@ -75,6 +76,14 @@ func OperatorOrCollect(ctx context.Context, c client.Client, cfg OperatorConfigu
}
}
+ if d, ok := o.(*appsv1.Deployment); ok {
+ if d.Labels["camel.apache.org/component"] == "operator" {
+ d.Spec.Template.Spec.Containers[0].Args = append(d.Spec.Template.Spec.Containers[0].Args,
+ fmt.Sprintf("--monitoring-port=%d", cfg.Monitoring.Port))
+ d.Spec.Template.Spec.Containers[0].Ports[0].ContainerPort = cfg.Monitoring.Port
+ }
+ }
+
if cfg.Global {
if d, ok := o.(*appsv1.Deployment); ok {
if d.Labels["camel.apache.org/component"] == "operator" {
[camel-k] 06/25: chore: Declare metrics container port
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 01eb712e6fcb48898ac63a28eb7182ac95b538ed
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Fri Oct 16 11:30:05 2020 +0200
chore: Declare metrics container port
---
.../camel-k.v1.3.0-snapshot.clusterserviceversion.yaml | 3 +++
deploy/operator-deployment.yaml | 5 ++++-
helm/camel-k/templates/operator.yaml | 5 +++--
pkg/cmd/operator/operator.go | 5 +++--
4 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml b/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml
index af3bdc4..fdf069e 100644
--- a/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml
+++ b/deploy/olm-catalog/camel-k-dev/1.3.0-snapshot/camel-k.v1.3.0-snapshot.clusterserviceversion.yaml
@@ -315,6 +315,9 @@ spec:
image: docker.io/apache/camel-k:1.3.0-SNAPSHOT
imagePullPolicy: IfNotPresent
name: camel-k-operator
+ ports:
+ - containerPort: 8080
+ name: metrics
resources: {}
serviceAccountName: camel-k-operator
permissions:
diff --git a/deploy/operator-deployment.yaml b/deploy/operator-deployment.yaml
index ea88763..ce8e9e1 100644
--- a/deploy/operator-deployment.yaml
+++ b/deploy/operator-deployment.yaml
@@ -40,10 +40,13 @@ spec:
containers:
- name: camel-k-operator
image: docker.io/apache/camel-k:1.3.0-SNAPSHOT
+ imagePullPolicy: IfNotPresent
command:
- kamel
- operator
- imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 8080
+ name: metrics
env:
- name: WATCH_NAMESPACE
valueFrom:
diff --git a/helm/camel-k/templates/operator.yaml b/helm/camel-k/templates/operator.yaml
index cee272f..3a870ed 100644
--- a/helm/camel-k/templates/operator.yaml
+++ b/helm/camel-k/templates/operator.yaml
@@ -18,7 +18,6 @@
apiVersion: apps/v1
kind: Deployment
metadata:
- creationTimestamp: null
labels:
app: camel-k
camel.apache.org/component: operator
@@ -33,7 +32,6 @@ spec:
type: Recreate
template:
metadata:
- creationTimestamp: null
labels:
app: camel-k
camel.apache.org/component: operator
@@ -61,5 +59,8 @@ spec:
image: {{ .Values.operator.image }}
imagePullPolicy: IfNotPresent
name: camel-k-operator
+ ports:
+ - containerPort: 8080
+ name: metrics
resources: {}
serviceAccountName: camel-k-operator
diff --git a/pkg/cmd/operator/operator.go b/pkg/cmd/operator/operator.go
index b11c021..a858b30 100644
--- a/pkg/cmd/operator/operator.go
+++ b/pkg/cmd/operator/operator.go
@@ -31,10 +31,10 @@ import (
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
+ ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/config"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
- "sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"github.com/operator-framework/operator-lib/leader"
@@ -122,9 +122,10 @@ func Run() {
}
// Create a new Cmd to provide shared dependencies and start components
- mgr, err := manager.New(cfg, manager.Options{
+ mgr, err := ctrl.NewManager(cfg, ctrl.Options{
Namespace: namespace,
EventBroadcaster: eventBroadcaster,
+ MetricsBindAddress: ":8080",
})
if err != nil {
log.Error(err, "")
[camel-k] 08/25: feat: Install alerting rule for reconciliation
request duration SLO
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 6266dfbfce0bbe21233ae5d57ec4bcabe00cccc6
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Tue Oct 20 15:12:43 2020 +0200
feat: Install alerting rule for reconciliation request duration SLO
---
deploy/operator-prometheus-rule.yaml | 16 ++++++++++++++++
pkg/install/operator.go | 9 +++++----
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
new file mode 100644
index 0000000..f88a35e
--- /dev/null
+++ b/deploy/operator-prometheus-rule.yaml
@@ -0,0 +1,16 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: camel-k-operator
+spec:
+ groups:
+ - name: camel-k-operator
+ rules:
+ - alert: CamelKReconciliationDuration
+ expr: 100 * (1 - sum(rate(camel_k_reconciliation_duration_seconds_bucket{le="0.5"}[5m])) by (job) / sum(rate(camel_k_reconciliation_duration_seconds_count[5m])) by (job)) > 10
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the reconciliation requests for {{ $labels.job }} have their duration above 0.5s.
\ No newline at end of file
diff --git a/pkg/install/operator.go b/pkg/install/operator.go
index 2b1ee78..ca6cd3e 100644
--- a/pkg/install/operator.go
+++ b/pkg/install/operator.go
@@ -170,11 +170,11 @@ func OperatorOrCollect(ctx context.Context, c client.Client, cfg OperatorConfigu
}
if cfg.Monitoring.Enabled {
- if err := installPodMonitor(ctx, c, cfg.Namespace, customizer, collection, force); err != nil {
+ if err := installMonitoringResources(ctx, c, cfg.Namespace, customizer, collection, force); err != nil {
if k8serrors.IsForbidden(err) {
- fmt.Println("Warning: the creation of PodMonitor resources is not allowed. Try installing as cluster-admin to allow the creation of PodMonitor resources.")
+ fmt.Println("Warning: the creation of monitoring resources is not allowed. Try installing as cluster-admin to allow the creation of monitoring resources.")
} else if meta.IsNoMatchError(errors.Cause(err)) {
- fmt.Println("Warning: the creation of the PodMonitor resource has failed: ", err)
+ fmt.Println("Warning: the creation of the monitoring resources failed: ", err)
} else {
return err
}
@@ -230,9 +230,10 @@ func installStrimziBindings(ctx context.Context, c client.Client, namespace stri
)
}
-func installPodMonitor(ctx context.Context, c client.Client, namespace string, customizer ResourceCustomizer, collection *kubernetes.Collection, force bool) error {
+func installMonitoringResources(ctx context.Context, c client.Client, namespace string, customizer ResourceCustomizer, collection *kubernetes.Collection, force bool) error {
return ResourcesOrCollect(ctx, c, namespace, collection, force, customizer,
"operator-pod-monitor.yaml",
+ "operator-prometheus-rule.yaml",
)
}
[camel-k] 16/25: chore: Rename build recovery attempts metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit e40b5f11bb26cf734f9186dcdcc708477a93e9dc
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 17:54:41 2020 +0200
chore: Rename build recovery attempts metric
---
pkg/controller/build/metrics.go | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
index 2b9950b..20b052f 100644
--- a/pkg/controller/build/metrics.go
+++ b/pkg/controller/build/metrics.go
@@ -31,17 +31,6 @@ import (
const buildResultLabel = "result"
var (
- buildAttempts = prometheus.NewHistogramVec(
- prometheus.HistogramOpts{
- Name: "camel_k_build_attempts",
- Help: "Camel K build attempts",
- Buckets: []float64{1, 2, 3, 4, 5},
- },
- []string{
- buildResultLabel,
- },
- )
-
buildDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "camel_k_build_duration_seconds",
@@ -60,6 +49,17 @@ var (
},
)
+ buildRecovery = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "camel_k_build_recovery_attempts",
+ Help: "Camel K build recovery attempts",
+ Buckets: []float64{0, 1, 2, 3, 4, 5},
+ },
+ []string{
+ buildResultLabel,
+ },
+ )
+
queueDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "camel_k_build_queue_duration_seconds",
@@ -77,7 +77,7 @@ var (
func init() {
// Register custom metrics with the global prometheus registry
- metrics.Registry.MustRegister(buildAttempts, buildDuration, queueDuration)
+ metrics.Registry.MustRegister(buildDuration, buildRecovery, queueDuration)
}
func observeBuildResult(build *v1.Build, phase v1.BuildPhase, duration time.Duration) {
@@ -89,7 +89,7 @@ func observeBuildResult(build *v1.Build, phase v1.BuildPhase, duration time.Dura
phase = v1.BuildPhaseError
}
- buildAttempts.WithLabelValues(phase.String()).Observe(float64(attempt))
+ buildRecovery.WithLabelValues(phase.String()).Observe(float64(attempt))
buildDuration.WithLabelValues(phase.String()).Observe(duration.Seconds())
}
[camel-k] 01/25: feat: Add build duration histogram metric
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit c5b45f2f6080e7096487cd695baa0a2a3987cdba
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Mon Oct 12 15:55:02 2020 +0200
feat: Add build duration histogram metric
---
go.mod | 1 +
go.sum | 7 +++++
pkg/controller/build/metrics.go | 53 ++++++++++++++++++++++++++++++++
pkg/controller/build/monitor_pod.go | 8 +++--
pkg/controller/build/schedule_routine.go | 19 +++++++-----
5 files changed, 79 insertions(+), 9 deletions(-)
diff --git a/go.mod b/go.mod
index 12c7fe1..d0ea343 100644
--- a/go.mod
+++ b/go.mod
@@ -24,6 +24,7 @@ require (
github.com/operator-framework/operator-lifecycle-manager v0.0.0-20200321030439-57b580e57e88
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.42.1
+ github.com/prometheus/client_golang v1.7.1
github.com/radovskyb/watcher v1.0.6
github.com/rs/xid v1.2.1
github.com/scylladb/go-set v1.0.2
diff --git a/go.sum b/go.sum
index 2a6ab78..b399f5e 100644
--- a/go.sum
+++ b/go.sum
@@ -1012,6 +1012,8 @@ github.com/prometheus/client_golang v1.5.0 h1:Ctq0iGpCmr3jeP77kbF2UxgvRwzWWz+4Bh
github.com/prometheus/client_golang v1.5.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.6.0 h1:YVPodQOcK15POxhgARIvnDRVpLcuK8mglnMrWfyrw6A=
github.com/prometheus/client_golang v1.6.0/go.mod h1:ZLOG9ck3JLRdB5MgO8f+lLTe83AXG6ro35rLTxvnIl4=
+github.com/prometheus/client_golang v1.7.1 h1:NTGy1Ja9pByO+xAeH/qiWnLrKtr3hJPNjaVUwnjpdpA=
+github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
@@ -1037,6 +1039,8 @@ github.com/prometheus/common v0.7.0 h1:L+1lyG48J1zAQXA3RBX/nG/B3gjlHq0zTt2tlbJLy
github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA=
github.com/prometheus/common v0.9.1 h1:KOMtN28tlbam3/7ZKEYKHhKoJZYYj3gMH4uc62x7X7U=
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
+github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
+github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/procfs v0.0.0-20180125133057-cb4147076ac7/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20180612222113-7d6f385de8be/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
@@ -1056,6 +1060,8 @@ github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+Gx
github.com/prometheus/procfs v0.0.10/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.0.11 h1:DhHlBtkHWPYi8O2y31JkK0TF+DGM+51OopZjH/Ia5qI=
github.com/prometheus/procfs v0.0.11/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
+github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
+github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/statsd_exporter v0.15.0 h1:UiwC1L5HkxEPeapXdm2Ye0u1vUJfTj7uwT5yydYpa1E=
github.com/prometheus/statsd_exporter v0.15.0/go.mod h1:Dv8HnkoLQkeEjkIE4/2ndAA7WL1zHKK7WMqFQqu72rw=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
@@ -1492,6 +1498,7 @@ golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200610111108-226ff32320da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200802091954-4b90ce9b60b3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/pkg/controller/build/metrics.go b/pkg/controller/build/metrics.go
new file mode 100644
index 0000000..cac2012
--- /dev/null
+++ b/pkg/controller/build/metrics.go
@@ -0,0 +1,53 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package build
+
+import (
+ "time"
+
+ "sigs.k8s.io/controller-runtime/pkg/metrics"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+const buildResult = "result"
+
+var (
+ buildDuration = prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "camel_k_build_duration_seconds",
+ Help: "Camel K build duration",
+ Buckets: []float64{
+ 30 * time.Second.Seconds(),
+ 1 * time.Minute.Seconds(),
+ 1.5 * time.Minute.Seconds(),
+ 2 * time.Minute.Seconds(),
+ 5 * time.Minute.Seconds(),
+ 10 * time.Minute.Seconds(),
+ },
+ },
+ []string{
+ buildResult,
+ },
+ )
+)
+
+func init() {
+ // Register custom metrics with the global prometheus registry
+ metrics.Registry.MustRegister(buildDuration)
+}
diff --git a/pkg/controller/build/monitor_pod.go b/pkg/controller/build/monitor_pod.go
index cd6aca1..23ebbee 100644
--- a/pkg/controller/build/monitor_pod.go
+++ b/pkg/controller/build/monitor_pod.go
@@ -67,7 +67,9 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
case pod.Status.Phase == corev1.PodSucceeded:
build.Status.Phase = v1.BuildPhaseSucceeded
- build.Status.Duration = metav1.Now().Sub(build.Status.StartedAt.Time).String()
+ duration := metav1.Now().Sub(build.Status.StartedAt.Time)
+ build.Status.Duration = duration.String()
+ buildDuration.WithLabelValues(string(build.Status.Phase)).Observe(duration.Seconds())
for _, task := range build.Spec.Tasks {
if task.Image != nil {
build.Status.Image = task.Image.BuiltImage
@@ -84,7 +86,9 @@ func (action *monitorPodAction) Handle(ctx context.Context, build *v1.Build) (*v
case pod.Status.Phase == corev1.PodFailed:
build.Status.Phase = v1.BuildPhaseFailed
- build.Status.Duration = metav1.Now().Sub(build.Status.StartedAt.Time).String()
+ duration := metav1.Now().Sub(build.Status.StartedAt.Time)
+ build.Status.Duration = duration.String()
+ buildDuration.WithLabelValues(string(build.Status.Phase)).Observe(duration.Seconds())
}
return build, nil
diff --git a/pkg/controller/build/schedule_routine.go b/pkg/controller/build/schedule_routine.go
index 6edc0f7..86edee6 100644
--- a/pkg/controller/build/schedule_routine.go
+++ b/pkg/controller/build/schedule_routine.go
@@ -22,13 +22,14 @@ import (
"fmt"
"sync"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/types"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+
v1 "github.com/apache/camel-k/pkg/apis/camel/v1"
"github.com/apache/camel-k/pkg/builder"
camelevent "github.com/apache/camel-k/pkg/event"
"github.com/apache/camel-k/pkg/util/patch"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/types"
- "sigs.k8s.io/controller-runtime/pkg/client"
)
// NewScheduleRoutineAction creates a new schedule routine action
@@ -114,13 +115,15 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
for i, task := range build.Spec.Tasks {
if task.Builder == nil {
+ duration := metav1.Now().Sub(build.Status.StartedAt.Time)
status := v1.BuildStatus{
// Error the build directly as we know recovery won't work over ill-defined tasks
Phase: v1.BuildPhaseError,
Error: fmt.Sprintf("task cannot be executed using the routine strategy: %s",
task.GetName()),
- Duration: metav1.Now().Sub(build.Status.StartedAt.Time).String(),
+ Duration: duration.String(),
}
+ buildDuration.WithLabelValues(string(status.Phase)).Observe(duration.Seconds())
_ = action.updateBuildStatus(ctx, build, status)
break
}
@@ -128,12 +131,14 @@ func (action *scheduleRoutineAction) runBuild(ctx context.Context, build *v1.Bui
status := action.builder.Run(*task.Builder)
lastTask := i == len(build.Spec.Tasks)-1
taskFailed := status.Phase == v1.BuildPhaseFailed
- if lastTask || taskFailed {
- status.Duration = metav1.Now().Sub(build.Status.StartedAt.Time).String()
- }
if lastTask && !taskFailed {
status.Phase = v1.BuildPhaseSucceeded
}
+ if lastTask || taskFailed {
+ duration := metav1.Now().Sub(build.Status.StartedAt.Time)
+ status.Duration = duration.String()
+ buildDuration.WithLabelValues(string(status.Phase)).Observe(duration.Seconds())
+ }
err := action.updateBuildStatus(ctx, build, status)
if err != nil || taskFailed {
break
[camel-k] 12/25: feat: Add default alerting rule for build failure
SLO
Posted by as...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
astefanutti pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel-k.git
commit 8c53d37466227ad5b20f3a6e52c48ffc22fc1c7b
Author: Antonin Stefanutti <an...@stefanutti.fr>
AuthorDate: Wed Oct 21 12:18:26 2020 +0200
feat: Add default alerting rule for build failure SLO
---
deploy/operator-prometheus-rule.yaml | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/deploy/operator-prometheus-rule.yaml b/deploy/operator-prometheus-rule.yaml
index 6a009dd..0d6ca96 100644
--- a/deploy/operator-prometheus-rule.yaml
+++ b/deploy/operator-prometheus-rule.yaml
@@ -68,3 +68,16 @@ spec:
message: |
{{ printf "%0.0f" $value }}% of the successful builds
for {{ $labels.job }} have their duration above 5m.
+ - alert: CamelKBuildFailure
+ expr: |
+ sum(rate(camel_k_build_duration_seconds_count{result="Error"}[5m])) by (job)
+ /
+ sum(rate(camel_k_build_duration_seconds_count[5m])) by (job)
+ * 100
+ > 1
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ message: |
+ {{ printf "%0.0f" $value }}% of the builds for {{ $labels.job }} have failed.