You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by wu...@apache.org on 2021/04/06 06:11:17 UTC
[skywalking] branch master updated: Support k8s monitoring (#6479)
This is an automated email from the ASF dual-hosted git repository.
wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git
The following commit(s) were added to refs/heads/master by this push:
new f9096f5 Support k8s monitoring (#6479)
f9096f5 is described below
commit f9096f508673e72d1c255cad95fba2a3505e1a46
Author: wankai123 <wa...@foxmail.com>
AuthorDate: Tue Apr 6 14:11:02 2021 +0800
Support k8s monitoring (#6479)
---
CHANGES.md | 2 +
docs/en/setup/backend/backend-receivers.md | 3 +
.../meter/analyzer/dsl/tagOpt/K8sRetagType.java | 10 +-
.../oap/meter/analyzer/dsl/tagOpt/Retag.java | 1 +
.../oap/meter/analyzer/dsl/K8sTagTest.java | 5 +-
.../main/resources/otel-oc-rules/k8s-cluster.yaml | 89 ++++
.../src/main/resources/otel-oc-rules/k8s-node.yaml | 74 +++
.../main/resources/otel-oc-rules/k8s-service.yaml | 66 +++
.../resources/ui-initialized-templates/k8s.yml | 512 +++++++++++++++++++++
9 files changed, 755 insertions(+), 7 deletions(-)
diff --git a/CHANGES.md b/CHANGES.md
index 35e8874..4e41fc7 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -68,6 +68,8 @@ Release Notes.
* Optimize the self monitoring grafana dashboard.
* Enhance the export service.
* Add function `retagByK8sMeta` and opt type `K8sRetagType.Pod2Service` in MAL for k8s to relate pods and services.
+* Using "service.istio.io/canonical-name" to replace "app" label to resolve Envoy ALS service name.
+* Support k8s monitoring.
* Make the flushing metrics operation concurrent.
* Fix ALS K8SServiceRegistry didn't remove the correct entry.
* Using "service.istio.io/canonical-name" to replace "app" label to resolve Envoy ALS service name.
diff --git a/docs/en/setup/backend/backend-receivers.md b/docs/en/setup/backend/backend-receivers.md
index 4512aed..b02b14b 100644
--- a/docs/en/setup/backend/backend-receivers.md
+++ b/docs/en/setup/backend/backend-receivers.md
@@ -132,6 +132,9 @@ to be the identification of the metric data.
|istio-controlplane| Metrics of Istio control panel | otel-oc-rules/istio-controlplane.yaml | Istio Control Panel -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
|oap| Metrics of SkyWalking OAP server itself | otel-oc-rules/oap.yaml | SkyWalking OAP Server(SelfObservability) -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
|vm| Metrics of VMs | otel-oc-rules/vm.yaml | Prometheus node-exporter(VMs) -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
+|k8s-cluster| Metrics of K8s cluster | otel-oc-rules/k8s-cluster.yaml | K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
+|k8s-node| Metrics of K8s cluster | otel-oc-rules/k8s-node.yaml | cAdvisor & K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
+|k8s-service| Metrics of K8s cluster | otel-oc-rules/k8s-service.yaml | cAdvisor & K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
## Meter receiver
diff --git a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java
index 5b179f6..df12ed6 100644
--- a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java
+++ b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java
@@ -27,7 +27,6 @@ import org.apache.skywalking.oap.meter.analyzer.dsl.Sample;
import org.apache.skywalking.oap.meter.analyzer.k8s.K8sInfoRegistry;
public enum K8sRetagType implements Retag {
-
Pod2Service {
@Override
public Sample[] execute(final Sample[] ss,
@@ -39,11 +38,12 @@ public enum K8sRetagType implements Retag {
String namespace = sample.getLabels().get(namespaceLabelName);
if (!Strings.isNullOrEmpty(podName) && !Strings.isNullOrEmpty(namespace)) {
String serviceName = K8sInfoRegistry.getInstance().findServiceName(namespace, podName);
- if (!Strings.isNullOrEmpty(serviceName)) {
- Map<String, String> labels = Maps.newHashMap(sample.getLabels());
- labels.put(newLabelName, serviceName);
- return sample.toBuilder().labels(ImmutableMap.copyOf(labels)).build();
+ if (Strings.isNullOrEmpty(serviceName)) {
+ serviceName = BLANK;
}
+ Map<String, String> labels = Maps.newHashMap(sample.getLabels());
+ labels.put(newLabelName, serviceName);
+ return sample.toBuilder().labels(ImmutableMap.copyOf(labels)).build();
}
return sample;
}).toArray(Sample[]::new);
diff --git a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java
index e02a971..070b9c3 100644
--- a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java
+++ b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java
@@ -21,5 +21,6 @@ package org.apache.skywalking.oap.meter.analyzer.dsl.tagOpt;
import org.apache.skywalking.oap.meter.analyzer.dsl.Sample;
public interface Retag {
+ String BLANK = "";
Sample[] execute(Sample[] ss, String newLabelName, String existingLabelName, String namespaceLabelName);
}
diff --git a/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java b/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java
index fbdf1ad..7a4e75e 100644
--- a/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java
+++ b/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java
@@ -28,6 +28,7 @@ import java.util.Collection;
import java.util.Map;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
+import org.apache.skywalking.oap.meter.analyzer.dsl.tagOpt.Retag;
import org.apache.skywalking.oap.meter.analyzer.k8s.K8sInfoRegistry;
import org.junit.Before;
import org.junit.Test;
@@ -133,7 +134,7 @@ public class K8sTagTest {
.labels(
of(
"namespace", "default", "container", "my-nginx", "cpu", "total", "pod",
- "my-nginx-5dc4865748-no-pod"
+ "my-nginx-5dc4865748-no-pod" , "service", Retag.BLANK
))
.value(2)
.build(),
@@ -175,7 +176,7 @@ public class K8sTagTest {
.labels(
of(
"namespace", "default", "container", "my-nginx", "cpu", "total", "pod",
- "my-nginx-5dc4865748-no-service"
+ "my-nginx-5dc4865748-no-service" , "service", Retag.BLANK
))
.value(2)
.build(),
diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml
new file mode 100644
index 0000000..f3ed97c
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This will parse a textual representation of a duration. The formats
+# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
+# with days considered to be exactly 24 hours.
+# <p>
+# Examples:
+# <pre>
+# "PT20.345S" -- parses as "20.345 seconds"
+# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
+# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
+# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
+# "P-6H3M" -- parses as "-6 hours and +3 minutes"
+# "-P6H3M" -- parses as "-6 hours and -3 minutes"
+# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
+# </pre>
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).service(['cluster'])
+metricPrefix: k8s_cluster
+metricsRules:
+
+
+ - name: cpu_cores
+ exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+ - name: cpu_cores_allocatable
+ exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+ - name: cpu_cores_requests
+ exp: (kube_pod_container_resource_requests * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+ - name: cpu_cores_limits
+ exp: (kube_pod_container_resource_limits * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+
+ - name: memory_total
+ exp: kube_node_status_capacity.tagEqual('resource' , 'memory').sum(['cluster'])
+ - name: memory_allocatable
+ exp: kube_node_status_allocatable.tagEqual('resource' , 'memory').sum(['cluster'])
+ - name: memory_requests
+ exp: kube_pod_container_resource_requests.tagEqual('resource' , 'memory').sum(['cluster'])
+ - name: memory_limits
+ exp: kube_pod_container_resource_limits.tagEqual('resource' , 'memory').sum(['cluster'])
+
+ - name: storage_total
+ exp: kube_node_status_capacity.tagEqual('resource' , 'ephemeral_storage').sum(['cluster'])
+ - name: storage_allocatable
+ exp: kube_node_status_allocatable.tagEqual('resource' , 'ephemeral_storage').sum(['cluster'])
+
+ - name: node_total
+ exp: kube_node_info.sum(['cluster'])
+ - name: node_status
+ exp: kube_node_status_condition.valueEqual(1).tagMatch('status' , 'true|unknown').sum(['cluster' , 'node' ,'condition'])
+
+ - name: namespace_total
+ exp: kube_namespace_labels.sum(['cluster'])
+
+ - name: deployment_total
+ exp: kube_deployment_labels.sum(['cluster'])
+ - name: deployment_status
+ exp: kube_deployment_status_condition.valueEqual(1).tagMatch('condition' , 'Available').sum(['cluster' , 'deployment' ,'condition' , 'status']).tag({tags -> tags.remove('condition')})
+ - name: deployment_spec_replicas
+ exp: kube_deployment_spec_replicas.sum(['cluster' , 'deployment'])
+
+ - name: service_total
+ exp: kube_service_info.sum(['cluster'])
+ - name: service_pod_status
+ exp: kube_pod_status_phase.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'phase'])
+
+ - name: pod_total
+ exp: kube_pod_info.sum(['cluster'])
+ - name: pod_status_not_running
+ exp: kube_pod_status_phase.valueEqual(1).tagNotMatch('phase' , 'Running').sum(['cluster' , 'pod' , 'phase'])
+
+ - name: container_total
+ exp: kube_pod_container_info.sum(['cluster'])
+ - name: pod_status_waiting
+ exp: kube_pod_container_status_waiting_reason.valueEqual(1).sum(['cluster' , 'pod' , 'container' , 'reason'])
+ - name: pod_status_terminated
+ exp: kube_pod_container_status_terminated_reason.valueEqual(1).sum(['cluster' , 'pod' , 'container' , 'reason'])
diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml
new file mode 100644
index 0000000..4f32102
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This will parse a textual representation of a duration. The formats
+# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
+# with days considered to be exactly 24 hours.
+# <p>
+# Examples:
+# <pre>
+# "PT20.345S" -- parses as "20.345 seconds"
+# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
+# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
+# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
+# "P-6H3M" -- parses as "-6 hours and +3 minutes"
+# "-P6H3M" -- parses as "-6 hours and -3 minutes"
+# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
+# </pre>
+
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).instance(['cluster'] , ['node'])
+metricPrefix: k8s_node
+metricsRules:
+
+ - name: cpu_cores
+ exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+ - name: cpu_usage
+ exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).rate('PT1M')
+ - name: cpu_cores_allocatable
+ exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+ - name: cpu_cores_requests
+ exp: (kube_pod_container_resource_requests * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+ - name: cpu_cores_limits
+ exp: (kube_pod_container_resource_limits * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+
+ - name: memory_total
+ exp: kube_node_status_capacity.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+ - name: memory_allocatable
+ exp: kube_node_status_allocatable.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+ - name: memory_requests
+ exp: kube_pod_container_resource_requests.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+ - name: memory_limits
+ exp: kube_pod_container_resource_limits.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+
+ - name: memory_usage
+ exp: container_memory_working_set_bytes.tagEqual('id' , '/').sum(['cluster' , 'node'])
+
+
+ - name: storage_total
+ exp: kube_node_status_capacity.tagEqual('resource' , 'ephemeral_storage').sum(['cluster' , 'node'])
+ - name: storage_allocatable
+ exp: kube_node_status_allocatable.tagEqual('resource' , 'ephemeral_storage').sum(['cluster' , 'node'])
+
+ - name: node_status
+ exp: kube_node_status_condition.valueEqual(1).tagMatch('status' , 'true|unknown').sum(['cluster' , 'node' ,'condition'])
+
+ - name: pod_total
+ exp: kube_pod_info.sum(['cluster' , 'node'])
+
+ - name: network_receive
+ exp: container_network_receive_bytes_total.sum(['cluster' , 'node']).irate()
+ - name: network_transmit
+ exp: container_network_transmit_bytes_total.sum(['cluster' , 'node']).irate()
diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml
new file mode 100644
index 0000000..79b86c2
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This will parse a textual representation of a duration. The formats
+# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
+# with days considered to be exactly 24 hours.
+# <p>
+# Examples:
+# <pre>
+# "PT20.345S" -- parses as "20.345 seconds"
+# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
+# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
+# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
+# "P-6H3M" -- parses as "-6 hours and +3 minutes"
+# "-P6H3M" -- parses as "-6 hours and -3 minutes"
+# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
+# </pre>
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).endpoint(['cluster'] , ['service'])
+metricPrefix: k8s_service
+metricsRules:
+
+ - name: pod_total
+ exp: kube_pod_info.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service'])
+
+ - name: cpu_cores_requests
+ exp: (kube_pod_container_resource_requests * 1000).retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'cpu').sum(['cluster' , 'service'])
+ - name: cpu_cores_limits
+ exp: (kube_pod_container_resource_limits * 1000).retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'cpu').sum(['cluster' , 'service'])
+ - name: memory_requests
+ exp: kube_pod_container_resource_requests.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'memory').sum(['cluster' , 'service'])
+ - name: memory_limits
+ exp: kube_pod_container_resource_limits.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'memory').sum(['cluster' , 'service'])
+
+ - name: pod_status
+ exp: kube_pod_status_phase.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'phase'])
+ - name: pod_status_waiting
+ exp: kube_pod_container_status_waiting_reason.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'container' , 'reason'])
+ - name: pod_status_terminated
+ exp: kube_pod_container_status_terminated_reason.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'container' , 'reason'])
+ - name: pod_status_restarts_total
+ exp: kube_pod_container_status_restarts_total.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])
+
+ - name: pod_cpu_usage
+ exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).rate('PT1M')
+ - name: pod_memory_usage
+ exp: container_memory_working_set_bytes.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])
+
+ - name: pod_network_receive
+ exp: container_network_receive_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate()
+ - name: pod_network_transmit
+ exp: container_network_transmit_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate()
+ - name: pod_fs_usage
+ exp: container_fs_usage_bytes.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])
diff --git a/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml
new file mode 100644
index 0000000..9745d0d
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml
@@ -0,0 +1,512 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# UI templates initialized file includes the default template when the SkyWalking OAP starts up at the first time.
+#
+# Also, SkyWalking would detect the existing templates in the database, once they are missing, all templates in this file
+# could be added automatically.
+
+templates:
+ - name: "K8s"
+ # The type includes DASHBOARD, TOPOLOGY_INSTANCE, TOPOLOGY_ENDPOINT.
+ # DASHBOARD type templates could have multiple definitions, by using different names.
+ # TOPOLOGY_INSTANCE, TOPOLOGY_ENDPOINT type templates should be defined once, as they are used in the topology page only.
+ type: "DASHBOARD"
+ # Configuration could be defined through UI, and use `export` to format in the standard JSON.
+ configuration: |-
+ [
+ {
+ "name": "K8s",
+ "type": "service",
+ "serviceGroup": "k8s-cluster",
+ "children": [
+ {
+ "name": "Cluster",
+ "children": [
+ {
+ "width": "2",
+ "title": "Node Total",
+ "height": "100",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_node_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": "2",
+ "title": "Namespace Total",
+ "height": "100",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_namespace_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": "2",
+ "title": "Deployment Total",
+ "height": "100",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_deployment_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": "2",
+ "title": "Service Total",
+ "height": "100",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_service_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": "2",
+ "title": "Pod Total",
+ "height": "100",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_pod_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": 2,
+ "title": "Container Total",
+ "height": "100",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_container_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": "4",
+ "title": "CPU Resources",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_cpu_cores,k8s_cluster_cpu_cores_requests,k8s_cluster_cpu_cores_limits,k8s_cluster_cpu_cores_allocatable",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "unit": "m"
+ },
+ {
+ "width": "4",
+ "title": "Memory Resources",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_memory_total,k8s_cluster_memory_requests,k8s_cluster_memory_limits,k8s_cluster_memory_allocatable",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "aggregation": "/",
+ "aggregationNum": "1073741824",
+ "unit": "GB"
+ },
+ {
+ "width": "4",
+ "title": "Storage Resources",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_cluster_storage_total,k8s_cluster_storage_allocatable",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "aggregation": "/",
+ "aggregationNum": "1073741824",
+ "unit": "GB"
+ },
+ {
+ "width": "4",
+ "title": "Node Status",
+ "height": "200",
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_cluster_node_status",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Status-Node",
+ "showTableValues": "false"
+ },
+ {
+ "width": "4",
+ "title": "Deployment Status",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_cluster_deployment_status",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Deployment-Available",
+ "showTableValues": "false"
+ },
+ {
+ "width": "4",
+ "title": "Deployment Spec Replicas",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "metricName": "k8s_cluster_deployment_spec_replicas",
+ "showTableValues": "true",
+ "tableHeaderCol1": "Deployment",
+ "tableHeaderCol2": "Replicas"
+ },
+ {
+ "width": "4",
+ "title": "Service Status",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_cluster_service_pod_status",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "showTableValues": "false",
+ "tableHeaderCol1": "Status-Service"
+ },
+ {
+ "width": "4",
+ "title": "Pod Status Not Running",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_cluster_pod_status_not_running",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Status-Pod"
+ },
+ {
+ "width": "4",
+ "title": "Pod Status Waiting",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_cluster_pod_status_waiting",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Container-Pod-Waiting Reason",
+ "showTableValues": "false"
+ },
+ {
+ "width": "4",
+ "title": "Pod Status Terminated",
+ "height": 200,
+ "entityType": "Service",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_cluster_container_status_terminated",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartBar"
+ }
+ ]
+ },
+ {
+ "name": "Node",
+ "children": [
+ {
+ "width": "3",
+ "title": "Pod Total",
+ "height": 350,
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_node_pod_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": "3",
+ "title": "Node Status",
+ "height": 350,
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_node_node_status",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Status",
+ "showTableValues": "false"
+ },
+ {
+ "width": "3",
+ "title": "CPU Resources",
+ "height": "350",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_node_cpu_cores,k8s_node_cpu_cores_allocatable,k8s_node_cpu_cores_requests,k8s_node_cpu_cores_limits",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "unit": "m"
+ },
+ {
+ "width": "3",
+ "title": "Memory Resources",
+ "height": "350",
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "unit": "GB",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "metricName": "k8s_node_memory_total,k8s_node_memory_allocatable,k8s_node_memory_requests,k8s_node_memory_limits",
+ "aggregation": "/",
+ "aggregationNum": "1073741824"
+ },
+ {
+ "width": "3",
+ "title": "Storage Resources",
+ "height": 350,
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "unit": "GB",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "metricName": "k8s_node_storage_total,k8s_node_storage_allocatable",
+ "aggregation": "/",
+ "aggregationNum": "1073741824"
+ },
+ {
+ "width": 3,
+ "title": "CPU Usage",
+ "height": 350,
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_node_cpu_usage",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "m"
+ },
+ {
+ "width": 3,
+ "title": "Memory Usage",
+ "height": 350,
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_node_memory_usage",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "aggregation": "/",
+ "aggregationNum": "1073741824",
+ "unit": "GB"
+ },
+ {
+ "width": "3",
+ "title": "Network I/O",
+ "height": 350,
+ "entityType": "ServiceInstance",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_node_network_receive,k8s_node_network_transmit",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "KB/s",
+ "aggregation": "/",
+ "aggregationNum": "1024"
+ }
+ ]
+ },
+ {
+ "name": "Service",
+ "children": [
+ {
+ "width": 3,
+ "title": "Service Pod Total",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_service_pod_total",
+ "queryMetricType": "readMetricsValue",
+ "chartType": "ChartNum"
+ },
+ {
+ "width": 3,
+ "title": "Service Pod Status",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_status",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Status-Pod",
+ "showTableValues": "false"
+ },
+ {
+ "width": 3,
+ "title": "Service CPU Resources",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_service_cpu_cores_requests,k8s_service_cpu_cores_limits",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "unit": "m"
+ },
+ {
+ "width": 3,
+ "title": "Service Memory Resources",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "REGULAR_VALUE",
+ "metricName": "k8s_service_memory_requests,k8s_service_memory_limits",
+ "queryMetricType": "readMetricsValues",
+ "chartType": "ChartArea",
+ "aggregation": "/",
+ "aggregationNum": "1048576",
+ "unit": "MB"
+ },
+ {
+ "width": 3,
+ "title": "Pod CPU Usage",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_cpu_usage",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "unit": "m"
+ },
+ {
+ "width": 3,
+ "title": "Pod Memory Usage",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_memory_usage",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "aggregation": "/",
+ "aggregationNum": "1048576",
+ "unit": "MB"
+ },
+ {
+ "width": 3,
+ "title": "Pod Waiting",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_status_waiting",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "showTableValues": "false",
+ "tableHeaderCol1": "Container-Pod-Waiting Reason"
+ },
+ {
+ "width": 3,
+ "title": "Pod Terminated",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_status_terminated",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartBar"
+ },
+ {
+ "width": 3,
+ "title": "Pod Restarts",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_status_restarts_total",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartTable",
+ "tableHeaderCol1": "Pod",
+ "showTableValues": "true",
+ "tableHeaderCol2": "Restarts Total"
+ },
+ {
+ "width": 3,
+ "title": "Pod Network Receive",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_network_receive",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "aggregation": "/",
+ "aggregationNum": "1024",
+ "unit": "KB/s"
+ },
+ {
+ "width": 3,
+ "title": "Pod Network Transmit",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_network_transmit",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartLine",
+ "aggregationNum": "1024",
+ "aggregation": "/",
+ "unit": "KB/s"
+ },
+ {
+ "width": 3,
+ "title": "Pod Storage Usage",
+ "height": "248",
+ "entityType": "Endpoint",
+ "independentSelector": false,
+ "metricType": "LABELED_VALUE",
+ "metricName": "k8s_service_pod_fs_usage",
+ "queryMetricType": "readLabeledMetricsValues",
+ "chartType": "ChartArea",
+ "aggregation": "/",
+ "aggregationNum": "1048576",
+ "unit": "MB"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ # Activated as the DASHBOARD type, makes this templates added into the UI page automatically.
+ # False means providing a basic template, user needs to add it manually.
+ activated: true
+ # True means wouldn't show up on the dashboard. Only keeps the definition in the storage.
+ disabled: false