You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by wu...@apache.org on 2020/06/11 08:58:44 UTC
[skywalking] branch master updated: Update prometheus fetcher for
so11y (#4902)
This is an automated email from the ASF dual-hosted git repository.
wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git
The following commit(s) were added to refs/heads/master by this push:
new 5038f07 Update prometheus fetcher for so11y (#4902)
5038f07 is described below
commit 5038f07094afffb63e2f6f83ae2dd4ce19ba18f4
Author: Gao Hongtao <ha...@gmail.com>
AuthorDate: Thu Jun 11 16:58:28 2020 +0800
Update prometheus fetcher for so11y (#4902)
---
docs/en/setup/backend/backend-telemetry.md | 46 +++++
.../main/resources/fetcher-prom-rules/self.yaml | 189 ++++++++++++++++++---
.../main/resources/ui-initialized-templates.yml | 175 +++++++++++++++++++
.../core/analysis/meter/function/AvgFunction.java | 48 +++++-
.../function/AvgHistogramPercentileFunction.java | 6 +-
.../provider/PrometheusFetcherProvider.java | 20 ++-
.../prometheus/provider/counter/Window.java | 2 +-
.../{PrometheusMetric.java => LabelMatchRule.java} | 9 +-
.../prometheus/provider/rule/PrometheusMetric.java | 2 +
.../library/util/prometheus/metrics/Summary.java | 2 +-
.../library/util/prometheus/parser/Context.java | 49 ++++--
11 files changed, 493 insertions(+), 55 deletions(-)
diff --git a/docs/en/setup/backend/backend-telemetry.md b/docs/en/setup/backend/backend-telemetry.md
index dd6e9dc..b2b428d 100644
--- a/docs/en/setup/backend/backend-telemetry.md
+++ b/docs/en/setup/backend/backend-telemetry.md
@@ -62,3 +62,49 @@ prometheus-fetcher:
```
3. Make sure `config/fetcher-prom-rules/self.yaml` exists.
+
+Once you deploy an oap-server cluster, the target host should be replaced with a dedicated IP or hostname. For instances,
+there are three oap server in your cluster, their host is `service1`, `service2` and `service3` respectively. You should
+update each `self.yaml` to twist target host.
+
+service1:
+```yaml
+fetcherInterval: PT15S
+fetcherTimeout: PT10S
+metricsPath: /metrics
+staticConfig:
+ # targets will be labeled as "instance"
+ targets:
+ - service1:1234
+ labels:
+ service: oap-server
+...
+```
+
+service2:
+```yaml
+fetcherInterval: PT15S
+fetcherTimeout: PT10S
+metricsPath: /metrics
+staticConfig:
+ # targets will be labeled as "instance"
+ targets:
+ - service2:1234
+ labels:
+ service: oap-server
+...
+```
+
+service3:
+```yaml
+fetcherInterval: PT15S
+fetcherTimeout: PT10S
+metricsPath: /metrics
+staticConfig:
+ # targets will be labeled as "instance"
+ targets:
+ - service3:1234
+ labels:
+ service: oap-server
+...
+```
diff --git a/oap-server/server-bootstrap/src/main/resources/fetcher-prom-rules/self.yaml b/oap-server/server-bootstrap/src/main/resources/fetcher-prom-rules/self.yaml
index 76cfe62..2a703c0 100644
--- a/oap-server/server-bootstrap/src/main/resources/fetcher-prom-rules/self.yaml
+++ b/oap-server/server-bootstrap/src/main/resources/fetcher-prom-rules/self.yaml
@@ -61,80 +61,217 @@ metricsRules:
- service
instance:
- instance
- - name: instance_jvm_gc_collection_seconds
+ - name: instance_jvm_young_gc_count
+ scope: SERVICE_INSTANCE
+ operation: avg
+ sources:
+ jvm_gc_collection_seconds_count:
+ counterFunction: INCREASE
+ range: PT1M
+ labelFilter:
+ - key: gc
+ options:
+ - "PS Scavenge"
+ - "Copy"
+ - "ParNew"
+ - "G1 Young Generation"
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_jvm_young_gc_time
scope: SERVICE_INSTANCE
operation: avg
sources:
jvm_gc_collection_seconds:
+ labelFilter:
+ - key: gc
+ options:
+ - "PS Scavenge"
+ - "Copy"
+ - "ParNew"
+ - "G1 Young Generation"
relabel:
service:
- service
instance:
- instance
-
- - name: instance_persistence_timer_bulk_execute_latency_heatmap
+ - name: instance_jvm_old_gc_count
scope: SERVICE_INSTANCE
- operation: avgHistogram
+ operation: avg
sources:
- persistence_timer_bulk_execute_latency:
+ jvm_gc_collection_seconds_count:
counterFunction: INCREASE
- range: PT5M
+ range: PT1M
+ labelFilter:
+ - key: gc
+ options:
+ - "PS MarkSweep"
+ - "MarkSweepCompact"
+ - "ConcurrentMarkSweep"
+ - "G1 Old Generation"
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_jvm_old_gc_time
+ scope: SERVICE_INSTANCE
+ operation: avg
+ sources:
+ jvm_gc_collection_seconds:
+ labelFilter:
+ - key: gc
+ options:
+ - "PS MarkSweep"
+ - "MarkSweepCompact"
+ - "ConcurrentMarkSweep"
+ - "G1 Old Generation"
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_trace_count
+ scope: SERVICE_INSTANCE
+ operation: avg
+ sources:
+ trace_in_latency_count:
+ counterFunction: INCREASE
+ range: PT1M
relabel:
service:
- service
instance:
- instance
- - name: instance_persistence_timer_bulk_execute_latency_percentile
+ - name: instance_trace_latency_percentile
scope: SERVICE_INSTANCE
operation: avgHistogramPercentile
percentiles: [50, 70, 90, 99]
sources:
- persistence_timer_bulk_execute_latency:
+ trace_in_latency:
counterFunction: INCREASE
- range: PT5M
+ range: PT1M
relabel:
service:
- service
instance:
- instance
- - name: instance_persistence_timer_bulk_execute_latency
+ - name: instance_trace_analysis_error_count
scope: SERVICE_INSTANCE
operation: avg
sources:
- persistence_timer_bulk_execute_latency:
+ trace_analysis_error_count:
+ counterFunction: INCREASE
+ range: PT1M
+ relabel:
+ service:
+ - service
+ instance:
+ - instanc
+ - name: instance_mesh_count
+ scope: SERVICE_INSTANCE
+ operation: avg
+ sources:
+ mesh_analysis_latency_count:
+ counterFunction: INCREASE
+ range: PT1M
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_mesh_latency_percentile
+ scope: SERVICE_INSTANCE
+ operation: avgHistogramPercentile
+ percentiles: [50, 70, 90, 99]
+ sources:
+ mesh_analysis_latency:
+ counterFunction: INCREASE
+ range: PT10M
relabel:
service:
- service
instance:
- instance
- - name: instance_persistence_timer_bulk_prepare_latency
+ - name: instance_mesh_analysis_error_count
scope: SERVICE_INSTANCE
operation: avg
sources:
- persistence_timer_bulk_prepare_latency:
+ mesh_analysis_error_count:
+ counterFunction: INCREASE
+ range: PT1M
relabel:
service:
- service
instance:
- instance
-
- - name: instance_persistence_timer_bulk_error_count
+ - name: instance_metrics_first_aggregation
scope: SERVICE_INSTANCE
operation: avg
sources:
- persistence_timer_bulk_error_count:
+ metrics_aggregation:
counterFunction: INCREASE
range: PT1M
+ labelFilter:
+ - key: dimensionality
+ options: ["min"]
+ - key: level
+ options: ["1"]
relabel:
service:
- service
instance:
- instance
- - name: instance_persistence_timer_bulk_execute_latency_count
+ - name: instance_metrics_second_aggregation
scope: SERVICE_INSTANCE
operation: avg
sources:
- persistence_timer_bulk_execute_latency_count:
+ metrics_aggregation:
+ counterFunction: INCREASE
+ range: PT1M
+ labelFilter:
+ - key: dimensionality
+ options: ["min"]
+ - key: level
+ options: ["2"]
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_persistence_timer_execute_latency_percentile
+ scope: SERVICE_INSTANCE
+ operation: avgHistogramPercentile
+ percentiles: [50, 70, 90, 99]
+ sources:
+ persistence_timer_bulk_execute_latency:
+ counterFunction: INCREASE
+ range: PT5M
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_persistence_timer_prepare_latency_percentile
+ scope: SERVICE_INSTANCE
+ operation: avgHistogramPercentile
+ percentiles: [50, 70, 90, 99]
+ sources:
+ persistence_timer_bulk_prepare_latency:
+ counterFunction: INCREASE
+ range: PT5M
+ relabel:
+ service:
+ - service
+ instance:
+ - instance
+ - name: instance_persistence_timer_bulk_error_count
+ scope: SERVICE_INSTANCE
+ operation: avg
+ sources:
+ persistence_timer_bulk_error_count:
counterFunction: INCREASE
range: PT1M
relabel:
@@ -142,11 +279,11 @@ metricsRules:
- service
instance:
- instance
- - name: instance_persistence_timer_bulk_prepare_latency_count
+ - name: instance_persistence_timer_execute_count
scope: SERVICE_INSTANCE
operation: avg
sources:
- persistence_timer_bulk_prepare_latency_count:
+ persistence_timer_bulk_execute_latency_count:
counterFunction: INCREASE
range: PT1M
relabel:
@@ -154,17 +291,15 @@ metricsRules:
- service
instance:
- instance
- - name: endpoint_metrics_aggregation
- scope: ENDPOINT
+ - name: instance_persistence_timer_prepare_count
+ scope: SERVICE_INSTANCE
operation: avg
sources:
- metrics_aggregation:
+ persistence_timer_bulk_prepare_latency_count:
counterFunction: INCREASE
range: PT1M
relabel:
service:
- service
- endpoint:
- - dimensionality
- - level
-
+ instance:
+ - instance
diff --git a/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates.yml b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates.yml
index f1c5e1f..c36261c 100644
--- a/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates.yml
+++ b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates.yml
@@ -893,3 +893,178 @@ templates:
activated: true
# True means wouldn't show up on the dashboard. Only keeps the definition in the storage.
disabled: false
+ - name: SelfObservability
+ type: "DASHBOARD"
+ configuration: |-
+ [
+ {
+ "name": "SelfObservability",
+ "type": "service",
+ "children": [
+ {
+ "name": "oap-server",
+ "children": [
+ {
+ "width": "3",
+ "title": "CPU",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_cpu_percentage",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "%"
+ },
+ {
+ "width": "3",
+ "title": "Memory",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_jvm_memory_bytes_used",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "MB",
+ "aggregation": "/",
+ "aggregationNum": "1000000"
+ },
+ {
+ "width": "3",
+ "title": "GC Count",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_jvm_young_gc_count,meter_instance_jvm_old_gc_count",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "Per Minute"
+ },
+ {
+ "width": 3,
+ "title": "GC Time",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_jvm_young_gc_time,meter_instance_jvm_old_gc_time",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "Millisecond"
+ },
+ {
+ "width": 3,
+ "title": "Trace Analysis Count",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_trace_count,meter_instance_trace_analysis_error_count",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "Per Minute"
+ },
+ {
+ "width": 3,
+ "title": "Trace Analysis Latency",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "meter_instance_trace_latency_percentile",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "labelsIndex": "50,70,90,99",
+ "metricLabels": "50,70,90,99",
+ "unit": "Millisecond"
+ },
+ {
+ "width": 3,
+ "title": "Mesh Analysis Count",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_mesh_count,meter_instance_mesh_analysis_error_count",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "Per Minute"
+ },
+ {
+ "width": 3,
+ "title": "Mesh Analysis Latency",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "meter_instance_mesh_latency_percentile",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "metricLabels": "50,70,90,99",
+ "labelsIndex": "50,70,90,99",
+ "unit": "Millisecond"
+ },
+ {
+ "width": "3",
+ "title": "Aggregation",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_metrics_first_aggregation,meter_instance_metrics_second_aggregation",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "Per Minute"
+ },
+ {
+ "width": 3,
+ "title": "Persistence Count",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "meter_instance_persistence_timer_prepare_count,meter_instance_persistence_timer_execute_count,meter_instance_persistence_timer_bulk_error_count",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "Per 5 Minutes"
+ },
+ {
+ "width": 3,
+ "title": "Persistence Preparing Latency ",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "unit": "Millisecond",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "metricName": "meter_instance_persistence_timer_prepare_latency_percentile",
+ "metricLabels": "50,70,90,99",
+ "labelsIndex": "50,70,90,99"
+ },
+ {
+ "width": 3,
+ "title": "Persistence Execution Latency ",
+ "height": "200",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "meter_instance_persistence_timer_execute_latency_percentile",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "metricLabels": "50,70,90,99",
+ "labelsIndex": "50,70,90,99",
+ "unit": "Millisecond"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ # Activated as the DASHBOARD type, makes this templates added into the UI page automatically.
+ # False means providing a basic template, user needs to add it manually.
+ activated: true
+ # True means wouldn't show up on the dashboard. Only keeps the definition in the storage.
+ disabled: false
\ No newline at end of file
diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgFunction.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgFunction.java
index 92fae1e..173de19 100644
--- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgFunction.java
+++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgFunction.java
@@ -28,19 +28,27 @@ import org.apache.skywalking.oap.server.core.Const;
import org.apache.skywalking.oap.server.core.UnexpectedException;
import org.apache.skywalking.oap.server.core.analysis.manual.instance.InstanceTraffic;
import org.apache.skywalking.oap.server.core.analysis.meter.MeterEntity;
-import org.apache.skywalking.oap.server.core.analysis.metrics.LongAvgMetrics;
import org.apache.skywalking.oap.server.core.analysis.metrics.Metrics;
+import org.apache.skywalking.oap.server.core.analysis.metrics.annotation.ConstOne;
+import org.apache.skywalking.oap.server.core.analysis.metrics.annotation.Entrance;
+import org.apache.skywalking.oap.server.core.analysis.metrics.annotation.SourceFrom;
+import org.apache.skywalking.oap.server.core.query.sql.Function;
import org.apache.skywalking.oap.server.core.remote.grpc.proto.RemoteData;
import org.apache.skywalking.oap.server.core.storage.StorageBuilder;
import org.apache.skywalking.oap.server.core.storage.annotation.Column;
@MeterFunction(functionName = "avg")
@ToString
-public abstract class AvgFunction extends LongAvgMetrics implements AcceptableValue<Long> {
+public abstract class AvgFunction extends Metrics implements AcceptableValue<Long> {
+ protected static final String SUMMATION = "summation";
+ protected static final String COUNT = "count";
+ protected static final String VALUE = "value";
+
@Setter
@Getter
@Column(columnName = ENTITY_ID)
private String entityId;
+
/**
* Service ID is required for sort query.
*/
@@ -49,6 +57,42 @@ public abstract class AvgFunction extends LongAvgMetrics implements AcceptableVa
@Column(columnName = InstanceTraffic.SERVICE_ID)
private String serviceId;
+ @Getter
+ @Setter
+ @Column(columnName = SUMMATION, storageOnly = true)
+ protected long summation;
+ @Getter
+ @Setter
+ @Column(columnName = COUNT, storageOnly = true)
+ protected long count;
+ @Getter
+ @Setter
+ @Column(columnName = VALUE, dataType = Column.ValueDataType.COMMON_VALUE, function = Function.Avg)
+ private long value;
+
+ @Entrance
+ public final void combine(@SourceFrom long summation, @ConstOne long count) {
+ this.summation += summation;
+ this.count += count;
+ }
+
+ @Override
+ public final void combine(Metrics metrics) {
+ AvgFunction longAvgMetrics = (AvgFunction) metrics;
+ combine(longAvgMetrics.summation, longAvgMetrics.count);
+ }
+
+ @Override
+ public final void calculate() {
+ long result = this.summation / this.count;
+ // The minimum of avg result is 1, that means once there's some data in a duration user can get "1" instead of
+ // "0".
+ if (result == 0 && this.summation > 0) {
+ result = 1;
+ }
+ this.value = result;
+ }
+
@Override
public Metrics toHour() {
AvgFunction metrics = (AvgFunction) createNew();
diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgHistogramPercentileFunction.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgHistogramPercentileFunction.java
index d88e46e..19de02c 100644
--- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgHistogramPercentileFunction.java
+++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/meter/function/AvgHistogramPercentileFunction.java
@@ -183,7 +183,8 @@ public abstract class AvgHistogramPercentileFunction extends Metrics implements
int count = 0;
int loopIndex = 0;
- for (String key : sortedKeys) {
+ for (int i = 0; i < sortedKeys.size(); i++) {
+ String key = sortedKeys.get(i);
final Long value = dataset.get(key);
count += value;
@@ -191,7 +192,8 @@ public abstract class AvgHistogramPercentileFunction extends Metrics implements
int roof = roofs[rankIdx];
if (count >= roof) {
- percentileValues.put(String.valueOf(ranks.get(rankIdx)), Long.parseLong(key));
+ long latency = (i + 1 == sortedKeys.size()) ? Long.MAX_VALUE : Long.parseLong(sortedKeys.get(i + 1));
+ percentileValues.put(String.valueOf(ranks.get(rankIdx)), latency);
loopIndex++;
} else {
break;
diff --git a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/PrometheusFetcherProvider.java b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/PrometheusFetcherProvider.java
index 7e9d40a..b67b93e 100644
--- a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/PrometheusFetcherProvider.java
+++ b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/PrometheusFetcherProvider.java
@@ -28,6 +28,7 @@ import io.vavr.control.Try;
import java.math.BigDecimal;
import java.time.Duration;
import java.util.Collection;
+import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -36,6 +37,7 @@ import java.util.StringJoiner;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import okhttp3.OkHttpClient;
@@ -56,8 +58,9 @@ import org.apache.skywalking.oap.server.core.analysis.meter.function.BucketedVal
import org.apache.skywalking.oap.server.core.analysis.worker.MetricsStreamProcessor;
import org.apache.skywalking.oap.server.fetcher.prometheus.module.PrometheusFetcherModule;
import org.apache.skywalking.oap.server.fetcher.prometheus.provider.counter.Window;
-import org.apache.skywalking.oap.server.fetcher.prometheus.provider.operation.Operation;
import org.apache.skywalking.oap.server.fetcher.prometheus.provider.operation.MetricSource;
+import org.apache.skywalking.oap.server.fetcher.prometheus.provider.operation.Operation;
+import org.apache.skywalking.oap.server.fetcher.prometheus.provider.rule.MetricsRule;
import org.apache.skywalking.oap.server.fetcher.prometheus.provider.rule.Rule;
import org.apache.skywalking.oap.server.fetcher.prometheus.provider.rule.Rules;
import org.apache.skywalking.oap.server.fetcher.prometheus.provider.rule.StaticConfig;
@@ -142,8 +145,14 @@ public class PrometheusFetcherProvider extends ModuleProvider {
final MeterSystem service = getManager().find(CoreModule.NAME).provider().getService(MeterSystem.class);
rules.forEach(r -> {
- r.getMetricsRules().forEach(rule -> {
+ final AtomicReference<String> lastRuleName = new AtomicReference<>();
+ r.getMetricsRules().stream().sorted(Comparator.comparing(MetricsRule::getName)).forEach(rule -> {
+ if (rule.getName().equals(lastRuleName.get())) {
+ lastRuleName.set(rule.getName());
+ return;
+ }
service.create(formatMetricName(rule.getName()), rule.getOperation(), rule.getScope());
+ lastRuleName.set(rule.getName());
});
ses.scheduleAtFixedRate(new Runnable() {
@@ -198,6 +207,13 @@ public class PrometheusFetcherProvider extends ModuleProvider {
r.getMetricsRules().stream()
.flatMap(rule -> rule.getSources().entrySet().stream().map(source -> Tuple.of(rule, source.getKey(), source.getValue())))
.filter(rule -> rule._2.equals(metric.getName()))
+ .filter(rule -> {
+ if (Objects.isNull(rule._3.getLabelFilter())) {
+ return true;
+ }
+ return rule._3.getLabelFilter().stream()
+ .allMatch(matchRule -> matchRule.getOptions().contains(metric.getLabels().get(matchRule.getKey())));
+ })
.map(rule -> Tuple.of(rule._1, rule._2, rule._3, metric))
)
.peek(tuple -> LOG.debug("Mapped rules to metrics: {}", tuple))
diff --git a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/counter/Window.java b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/counter/Window.java
index 35edb53..5e1a2a7 100644
--- a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/counter/Window.java
+++ b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/counter/Window.java
@@ -79,7 +79,7 @@ public class Window {
}
Queue<Tuple2<Long, Double>> window = windows.get(id);
long now = System.currentTimeMillis();
- window.offer(Tuple.of(System.currentTimeMillis(), value));
+ window.offer(Tuple.of(now, value));
Tuple2<Long, Double> ps = window.element();
if ((now - ps._1) >= windowSize) {
window.remove();
diff --git a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/LabelMatchRule.java
similarity index 85%
copy from oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java
copy to oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/LabelMatchRule.java
index 622cb4b..742ba7c 100644
--- a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java
+++ b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/LabelMatchRule.java
@@ -18,14 +18,13 @@
package org.apache.skywalking.oap.server.fetcher.prometheus.provider.rule;
+import java.util.List;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
-public class PrometheusMetric {
- private CounterFunction counterFunction;
- private String range;
- private Relabel relabel;
- private int scale = 0;
+public class LabelMatchRule {
+ private String key;
+ private List<String> options;
}
diff --git a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java
index 622cb4b..51c2865 100644
--- a/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java
+++ b/oap-server/server-fetcher-plugin/prometheus-fetcher-plugin/src/main/java/org/apache/skywalking/oap/server/fetcher/prometheus/provider/rule/PrometheusMetric.java
@@ -18,6 +18,7 @@
package org.apache.skywalking.oap.server.fetcher.prometheus.provider.rule;
+import java.util.List;
import lombok.Data;
import lombok.NoArgsConstructor;
@@ -26,6 +27,7 @@ import lombok.NoArgsConstructor;
public class PrometheusMetric {
private CounterFunction counterFunction;
private String range;
+ private List<LabelMatchRule> labelFilter;
private Relabel relabel;
private int scale = 0;
}
diff --git a/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/metrics/Summary.java b/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/metrics/Summary.java
index 1088336..d12633f 100644
--- a/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/metrics/Summary.java
+++ b/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/metrics/Summary.java
@@ -47,7 +47,7 @@ public class Summary extends Metric {
Summary s = (Summary) m;
this.sampleCount = this.sampleCount + s.getSampleCount();
this.sampleSum = this.sampleSum + s.getSampleSum();
- return null;
+ return this;
}
@Override public Double value() {
diff --git a/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/parser/Context.java b/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/parser/Context.java
index 01c3e9d..63320e7 100644
--- a/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/parser/Context.java
+++ b/oap-server/server-library/library-util/src/main/java/org/apache/skywalking/oap/server/library/util/prometheus/parser/Context.java
@@ -18,8 +18,11 @@
package org.apache.skywalking.oap.server.library.util.prometheus.parser;
+import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.skywalking.oap.server.library.util.prometheus.metrics.Counter;
import org.apache.skywalking.oap.server.library.util.prometheus.metrics.Gauge;
import org.apache.skywalking.oap.server.library.util.prometheus.metrics.Histogram;
@@ -30,6 +33,10 @@ import org.apache.skywalking.oap.server.library.util.prometheus.parser.sample.Te
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static java.util.stream.Collectors.groupingBy;
+import static java.util.stream.Collectors.mapping;
+import static java.util.stream.Collectors.toList;
+
public class Context {
private static final Logger LOG = LoggerFactory.getLogger(Context.class);
public MetricFamily metricFamily;
@@ -117,21 +124,33 @@ public class Context {
metricFamilyBuilder.addMetric(hBuilder.build());
break;
case SUMMARY:
- Summary.SummaryBuilder sBuilder = Summary.builder();
- sBuilder.name(name);
- samples.forEach(textSample -> {
- if (textSample.getName().endsWith("_count")) {
- sBuilder.sampleCount((long) convertStringToDouble(textSample.getValue()));
- } else if (textSample.getName().endsWith("_sum")) {
- sBuilder.sampleSum(convertStringToDouble(textSample.getValue()));
- } else if (textSample.getLabels().containsKey("quantile")) {
- sBuilder.quantile(
- convertStringToDouble(textSample.getLabels().remove("quantile")),
- convertStringToDouble(textSample.getValue())
- );
- }
- });
- metricFamilyBuilder.addMetric(sBuilder.build());
+
+ samples.stream()
+ .map(sample -> {
+ Map<String, String> labels = Maps.newHashMap(sample.getLabels());
+ labels.remove("quantile");
+ return Pair.of(labels, sample);
+ })
+ .collect(groupingBy(Pair::getLeft, mapping(Pair::getRight, toList())))
+ .forEach((labels, samples) -> {
+ Summary.SummaryBuilder sBuilder = Summary.builder();
+ sBuilder.name(name);
+ sBuilder.labels(labels);
+ samples.forEach(textSample -> {
+ if (textSample.getName().endsWith("_count")) {
+ sBuilder.sampleCount((long) convertStringToDouble(textSample.getValue()));
+ } else if (textSample.getName().endsWith("_sum")) {
+ sBuilder.sampleSum(convertStringToDouble(textSample.getValue()));
+ } else if (textSample.getLabels().containsKey("quantile")) {
+ sBuilder.quantile(
+ convertStringToDouble(textSample.getLabels().remove("quantile")),
+ convertStringToDouble(textSample.getValue())
+ );
+ }
+ });
+ metricFamilyBuilder.addMetric(sBuilder.build());
+ });
+
break;
}
metricFamily = metricFamilyBuilder.build();