You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by wu...@apache.org on 2021/04/06 06:11:17 UTC

[skywalking] branch master updated: Support k8s monitoring (#6479)

This is an automated email from the ASF dual-hosted git repository.

wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git


The following commit(s) were added to refs/heads/master by this push:
     new f9096f5  Support k8s monitoring (#6479)
f9096f5 is described below

commit f9096f508673e72d1c255cad95fba2a3505e1a46
Author: wankai123 <wa...@foxmail.com>
AuthorDate: Tue Apr 6 14:11:02 2021 +0800

    Support k8s monitoring (#6479)
---
 CHANGES.md                                         |   2 +
 docs/en/setup/backend/backend-receivers.md         |   3 +
 .../meter/analyzer/dsl/tagOpt/K8sRetagType.java    |  10 +-
 .../oap/meter/analyzer/dsl/tagOpt/Retag.java       |   1 +
 .../oap/meter/analyzer/dsl/K8sTagTest.java         |   5 +-
 .../main/resources/otel-oc-rules/k8s-cluster.yaml  |  89 ++++
 .../src/main/resources/otel-oc-rules/k8s-node.yaml |  74 +++
 .../main/resources/otel-oc-rules/k8s-service.yaml  |  66 +++
 .../resources/ui-initialized-templates/k8s.yml     | 512 +++++++++++++++++++++
 9 files changed, 755 insertions(+), 7 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 35e8874..4e41fc7 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -68,6 +68,8 @@ Release Notes.
 * Optimize the self monitoring grafana dashboard.
 * Enhance the export service.
 * Add function `retagByK8sMeta` and opt type `K8sRetagType.Pod2Service` in MAL for k8s to relate pods and services.
+* Using "service.istio.io/canonical-name" to replace "app" label to resolve Envoy ALS service name.
+* Support k8s monitoring.
 * Make the flushing metrics operation concurrent.
 * Fix ALS K8SServiceRegistry didn't remove the correct entry.
 * Using "service.istio.io/canonical-name" to replace "app" label to resolve Envoy ALS service name.
diff --git a/docs/en/setup/backend/backend-receivers.md b/docs/en/setup/backend/backend-receivers.md
index 4512aed..b02b14b 100644
--- a/docs/en/setup/backend/backend-receivers.md
+++ b/docs/en/setup/backend/backend-receivers.md
@@ -132,6 +132,9 @@ to be the identification of the metric data.
 |istio-controlplane| Metrics of Istio control panel | otel-oc-rules/istio-controlplane.yaml | Istio Control Panel -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
 |oap| Metrics of SkyWalking OAP server itself | otel-oc-rules/oap.yaml | SkyWalking OAP Server(SelfObservability) -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
 |vm| Metrics of VMs | otel-oc-rules/vm.yaml | Prometheus node-exporter(VMs) -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
+|k8s-cluster| Metrics of K8s cluster | otel-oc-rules/k8s-cluster.yaml | K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
+|k8s-node| Metrics of K8s cluster | otel-oc-rules/k8s-node.yaml | cAdvisor & K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
+|k8s-service| Metrics of K8s cluster | otel-oc-rules/k8s-service.yaml | cAdvisor & K8s kube-state-metrics -> OpenTelemetry Collector --OC format--> SkyWalking OAP Server |
 
 ## Meter receiver
 
diff --git a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java
index 5b179f6..df12ed6 100644
--- a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java
+++ b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/K8sRetagType.java
@@ -27,7 +27,6 @@ import org.apache.skywalking.oap.meter.analyzer.dsl.Sample;
 import org.apache.skywalking.oap.meter.analyzer.k8s.K8sInfoRegistry;
 
 public enum K8sRetagType implements Retag {
-
     Pod2Service {
         @Override
         public Sample[] execute(final Sample[] ss,
@@ -39,11 +38,12 @@ public enum K8sRetagType implements Retag {
                 String namespace = sample.getLabels().get(namespaceLabelName);
                 if (!Strings.isNullOrEmpty(podName) && !Strings.isNullOrEmpty(namespace)) {
                     String serviceName = K8sInfoRegistry.getInstance().findServiceName(namespace, podName);
-                    if (!Strings.isNullOrEmpty(serviceName)) {
-                        Map<String, String> labels = Maps.newHashMap(sample.getLabels());
-                        labels.put(newLabelName, serviceName);
-                        return sample.toBuilder().labels(ImmutableMap.copyOf(labels)).build();
+                    if (Strings.isNullOrEmpty(serviceName)) {
+                        serviceName = BLANK;
                     }
+                    Map<String, String> labels = Maps.newHashMap(sample.getLabels());
+                    labels.put(newLabelName, serviceName);
+                    return sample.toBuilder().labels(ImmutableMap.copyOf(labels)).build();
                 }
                 return sample;
             }).toArray(Sample[]::new);
diff --git a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java
index e02a971..070b9c3 100644
--- a/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java
+++ b/oap-server/analyzer/meter-analyzer/src/main/java/org/apache/skywalking/oap/meter/analyzer/dsl/tagOpt/Retag.java
@@ -21,5 +21,6 @@ package org.apache.skywalking.oap.meter.analyzer.dsl.tagOpt;
 import org.apache.skywalking.oap.meter.analyzer.dsl.Sample;
 
 public interface Retag {
+    String BLANK = "";
     Sample[] execute(Sample[] ss, String newLabelName, String existingLabelName, String namespaceLabelName);
 }
diff --git a/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java b/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java
index fbdf1ad..7a4e75e 100644
--- a/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java
+++ b/oap-server/analyzer/meter-analyzer/src/test/java/org/apache/skywalking/oap/meter/analyzer/dsl/K8sTagTest.java
@@ -28,6 +28,7 @@ import java.util.Collection;
 import java.util.Map;
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.skywalking.oap.meter.analyzer.dsl.tagOpt.Retag;
 import org.apache.skywalking.oap.meter.analyzer.k8s.K8sInfoRegistry;
 import org.junit.Before;
 import org.junit.Test;
@@ -133,7 +134,7 @@ public class K8sTagTest {
                           .labels(
                               of(
                                   "namespace", "default", "container", "my-nginx", "cpu", "total", "pod",
-                                  "my-nginx-5dc4865748-no-pod"
+                                  "my-nginx-5dc4865748-no-pod" , "service", Retag.BLANK
                               ))
                           .value(2)
                           .build(),
@@ -175,7 +176,7 @@ public class K8sTagTest {
                           .labels(
                               of(
                                   "namespace", "default", "container", "my-nginx", "cpu", "total", "pod",
-                                  "my-nginx-5dc4865748-no-service"
+                                  "my-nginx-5dc4865748-no-service" , "service", Retag.BLANK
                               ))
                           .value(2)
                           .build(),
diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml
new file mode 100644
index 0000000..f3ed97c
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-cluster.yaml
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This will parse a textual representation of a duration. The formats
+# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
+# with days considered to be exactly 24 hours.
+# <p>
+# Examples:
+# <pre>
+#    "PT20.345S" -- parses as "20.345 seconds"
+#    "PT15M"     -- parses as "15 minutes" (where a minute is 60 seconds)
+#    "PT10H"     -- parses as "10 hours" (where an hour is 3600 seconds)
+#    "P2D"       -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+#    "P2DT3H4M"  -- parses as "2 days, 3 hours and 4 minutes"
+#    "P-6H3M"    -- parses as "-6 hours and +3 minutes"
+#    "-P6H3M"    -- parses as "-6 hours and -3 minutes"
+#    "-P-6H+3M"  -- parses as "+6 hours and -3 minutes"
+# </pre>
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).service(['cluster'])
+metricPrefix: k8s_cluster
+metricsRules:
+
+
+  - name: cpu_cores
+    exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+  - name: cpu_cores_allocatable
+    exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+  - name: cpu_cores_requests
+    exp: (kube_pod_container_resource_requests * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+  - name: cpu_cores_limits
+    exp: (kube_pod_container_resource_limits * 1000).tagEqual('resource' , 'cpu').sum(['cluster'])
+
+  - name: memory_total
+    exp: kube_node_status_capacity.tagEqual('resource' , 'memory').sum(['cluster'])
+  - name: memory_allocatable
+    exp: kube_node_status_allocatable.tagEqual('resource' , 'memory').sum(['cluster'])
+  - name: memory_requests
+    exp: kube_pod_container_resource_requests.tagEqual('resource' , 'memory').sum(['cluster'])
+  - name: memory_limits
+    exp: kube_pod_container_resource_limits.tagEqual('resource' , 'memory').sum(['cluster'])
+
+  - name: storage_total
+    exp: kube_node_status_capacity.tagEqual('resource' , 'ephemeral_storage').sum(['cluster'])
+  - name: storage_allocatable
+    exp: kube_node_status_allocatable.tagEqual('resource' , 'ephemeral_storage').sum(['cluster'])
+
+  - name: node_total
+    exp: kube_node_info.sum(['cluster'])
+  - name: node_status
+    exp: kube_node_status_condition.valueEqual(1).tagMatch('status' , 'true|unknown').sum(['cluster' , 'node' ,'condition'])
+
+  - name: namespace_total
+    exp: kube_namespace_labels.sum(['cluster'])
+
+  - name: deployment_total
+    exp: kube_deployment_labels.sum(['cluster'])
+  - name: deployment_status
+    exp: kube_deployment_status_condition.valueEqual(1).tagMatch('condition' , 'Available').sum(['cluster' , 'deployment' ,'condition' , 'status']).tag({tags -> tags.remove('condition')})
+  - name: deployment_spec_replicas
+    exp: kube_deployment_spec_replicas.sum(['cluster' , 'deployment'])
+
+  - name: service_total
+    exp: kube_service_info.sum(['cluster'])
+  - name: service_pod_status
+    exp: kube_pod_status_phase.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'phase'])
+
+  - name: pod_total
+    exp: kube_pod_info.sum(['cluster'])
+  - name: pod_status_not_running
+    exp: kube_pod_status_phase.valueEqual(1).tagNotMatch('phase' , 'Running').sum(['cluster' , 'pod' , 'phase'])
+
+  - name: container_total
+    exp: kube_pod_container_info.sum(['cluster'])
+  - name: pod_status_waiting
+    exp: kube_pod_container_status_waiting_reason.valueEqual(1).sum(['cluster' , 'pod' , 'container' , 'reason'])
+  - name: pod_status_terminated
+    exp: kube_pod_container_status_terminated_reason.valueEqual(1).sum(['cluster' , 'pod' , 'container' , 'reason'])
diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml
new file mode 100644
index 0000000..4f32102
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-node.yaml
@@ -0,0 +1,74 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This will parse a textual representation of a duration. The formats
+# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
+# with days considered to be exactly 24 hours.
+# <p>
+# Examples:
+# <pre>
+#    "PT20.345S" -- parses as "20.345 seconds"
+#    "PT15M"     -- parses as "15 minutes" (where a minute is 60 seconds)
+#    "PT10H"     -- parses as "10 hours" (where an hour is 3600 seconds)
+#    "P2D"       -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+#    "P2DT3H4M"  -- parses as "2 days, 3 hours and 4 minutes"
+#    "P-6H3M"    -- parses as "-6 hours and +3 minutes"
+#    "-P6H3M"    -- parses as "-6 hours and -3 minutes"
+#    "-P-6H+3M"  -- parses as "+6 hours and -3 minutes"
+# </pre>
+
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).instance(['cluster'] , ['node'])
+metricPrefix: k8s_node
+metricsRules:
+
+  - name: cpu_cores
+    exp: (kube_node_status_capacity * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+  - name: cpu_usage
+    exp: (container_cpu_usage_seconds_total * 1000).tagEqual('id' , '/').sum(['cluster' , 'node']).rate('PT1M')
+  - name: cpu_cores_allocatable
+    exp: (kube_node_status_allocatable * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+  - name: cpu_cores_requests
+    exp: (kube_pod_container_resource_requests * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+  - name: cpu_cores_limits
+    exp: (kube_pod_container_resource_limits * 1000).tagEqual('resource' , 'cpu').sum(['cluster' , 'node'])
+
+  - name: memory_total
+    exp: kube_node_status_capacity.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+  - name: memory_allocatable
+    exp: kube_node_status_allocatable.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+  - name: memory_requests
+    exp: kube_pod_container_resource_requests.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+  - name: memory_limits
+    exp: kube_pod_container_resource_limits.tagEqual('resource' , 'memory').sum(['cluster' , 'node'])
+
+  - name: memory_usage
+    exp: container_memory_working_set_bytes.tagEqual('id' , '/').sum(['cluster' , 'node'])
+
+
+  - name: storage_total
+    exp: kube_node_status_capacity.tagEqual('resource' , 'ephemeral_storage').sum(['cluster' , 'node'])
+  - name: storage_allocatable
+    exp: kube_node_status_allocatable.tagEqual('resource' , 'ephemeral_storage').sum(['cluster' , 'node'])
+
+  - name: node_status
+    exp: kube_node_status_condition.valueEqual(1).tagMatch('status' , 'true|unknown').sum(['cluster' , 'node' ,'condition'])
+
+  - name: pod_total
+    exp: kube_pod_info.sum(['cluster' , 'node'])
+
+  - name: network_receive
+    exp: container_network_receive_bytes_total.sum(['cluster' , 'node']).irate()
+  - name: network_transmit
+    exp: container_network_transmit_bytes_total.sum(['cluster' , 'node']).irate()
diff --git a/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml
new file mode 100644
index 0000000..79b86c2
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/otel-oc-rules/k8s-service.yaml
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This will parse a textual representation of a duration. The formats
+# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
+# with days considered to be exactly 24 hours.
+# <p>
+# Examples:
+# <pre>
+#    "PT20.345S" -- parses as "20.345 seconds"
+#    "PT15M"     -- parses as "15 minutes" (where a minute is 60 seconds)
+#    "PT10H"     -- parses as "10 hours" (where an hour is 3600 seconds)
+#    "P2D"       -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
+#    "P2DT3H4M"  -- parses as "2 days, 3 hours and 4 minutes"
+#    "P-6H3M"    -- parses as "-6 hours and +3 minutes"
+#    "-P6H3M"    -- parses as "-6 hours and -3 minutes"
+#    "-P-6H+3M"  -- parses as "+6 hours and -3 minutes"
+# </pre>
+expSuffix: tag({tags -> tags.cluster = 'k8s-cluster::' + tags.cluster}).endpoint(['cluster'] , ['service'])
+metricPrefix: k8s_service
+metricsRules:
+
+  - name: pod_total
+    exp: kube_pod_info.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service'])
+
+  - name: cpu_cores_requests
+    exp: (kube_pod_container_resource_requests * 1000).retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'cpu').sum(['cluster' , 'service'])
+  - name: cpu_cores_limits
+    exp: (kube_pod_container_resource_limits * 1000).retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'cpu').sum(['cluster' , 'service'])
+  - name: memory_requests
+    exp: kube_pod_container_resource_requests.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'memory').sum(['cluster' , 'service'])
+  - name: memory_limits
+    exp: kube_pod_container_resource_limits.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').tagEqual('resource' , 'memory').sum(['cluster' , 'service'])
+
+  - name: pod_status
+    exp: kube_pod_status_phase.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' , 'phase'])
+  - name: pod_status_waiting
+    exp: kube_pod_container_status_waiting_reason.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' ,  'container' , 'reason'])
+  - name: pod_status_terminated
+    exp: kube_pod_container_status_terminated_reason.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').valueEqual(1).sum(['cluster' , 'service' , 'pod' ,  'container' , 'reason'])
+  - name: pod_status_restarts_total
+    exp: kube_pod_container_status_restarts_total.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])
+
+  - name: pod_cpu_usage
+    exp: (container_cpu_usage_seconds_total * 1000).tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).rate('PT1M')
+  - name: pod_memory_usage
+    exp: container_memory_working_set_bytes.retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])
+
+  - name: pod_network_receive
+    exp: container_network_receive_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate()
+  - name: pod_network_transmit
+    exp: container_network_transmit_bytes_total.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod']).irate()
+  - name: pod_fs_usage
+    exp: container_fs_usage_bytes.tagNotEqual('pod' , '').retagByK8sMeta('service' , K8sRetagType.Pod2Service , 'pod' , 'namespace').tagNotEqual('service' , '').sum(['cluster' , 'service' , 'pod'])
diff --git a/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml
new file mode 100644
index 0000000..9745d0d
--- /dev/null
+++ b/oap-server/server-bootstrap/src/main/resources/ui-initialized-templates/k8s.yml
@@ -0,0 +1,512 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# UI templates initialized file includes the default template when the SkyWalking OAP starts up at the first time.
+#
+# Also, SkyWalking would detect the existing templates in the database, once they are missing, all templates in this file
+# could be added automatically.
+
+templates:
+  - name: "K8s"
+    # The type includes DASHBOARD, TOPOLOGY_INSTANCE, TOPOLOGY_ENDPOINT.
+    # DASHBOARD type templates could have multiple definitions, by using different names.
+    # TOPOLOGY_INSTANCE, TOPOLOGY_ENDPOINT type templates should be defined once, as they are used in the topology page only.
+    type: "DASHBOARD"
+    # Configuration could be defined through UI, and use `export` to format in the standard JSON.
+    configuration: |-
+      [
+          {
+              "name": "K8s",
+              "type": "service",
+              "serviceGroup": "k8s-cluster",
+              "children": [
+                  {
+                      "name": "Cluster",
+                      "children": [
+                          {
+                              "width": "2",
+                              "title": "Node Total",
+                              "height": "100",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_node_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": "2",
+                              "title": "Namespace Total",
+                              "height": "100",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_namespace_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": "2",
+                              "title": "Deployment Total",
+                              "height": "100",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_deployment_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": "2",
+                              "title": "Service Total",
+                              "height": "100",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_service_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": "2",
+                              "title": "Pod Total",
+                              "height": "100",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_pod_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": 2,
+                              "title": "Container Total",
+                              "height": "100",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_container_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": "4",
+                              "title": "CPU Resources",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_cpu_cores,k8s_cluster_cpu_cores_requests,k8s_cluster_cpu_cores_limits,k8s_cluster_cpu_cores_allocatable",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "unit": "m"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Memory Resources",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_memory_total,k8s_cluster_memory_requests,k8s_cluster_memory_limits,k8s_cluster_memory_allocatable",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "aggregation": "/",
+                              "aggregationNum": "1073741824",
+                              "unit": "GB"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Storage Resources",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_cluster_storage_total,k8s_cluster_storage_allocatable",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "aggregation": "/",
+                              "aggregationNum": "1073741824",
+                              "unit": "GB"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Node Status",
+                              "height": "200",
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_cluster_node_status",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Status-Node",
+                              "showTableValues": "false"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Deployment Status",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_cluster_deployment_status",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Deployment-Available",
+                              "showTableValues": "false"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Deployment Spec Replicas",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "metricName": "k8s_cluster_deployment_spec_replicas",
+                              "showTableValues": "true",
+                              "tableHeaderCol1": "Deployment",
+                              "tableHeaderCol2": "Replicas"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Service Status",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_cluster_service_pod_status",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "showTableValues": "false",
+                              "tableHeaderCol1": "Status-Service"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Pod Status Not Running",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_cluster_pod_status_not_running",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Status-Pod"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Pod Status Waiting",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_cluster_pod_status_waiting",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Container-Pod-Waiting Reason",
+                              "showTableValues": "false"
+                          },
+                          {
+                              "width": "4",
+                              "title": "Pod Status Terminated",
+                              "height": 200,
+                              "entityType": "Service",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_cluster_container_status_terminated",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartBar"
+                          }
+                      ]
+                  },
+                  {
+                      "name": "Node",
+                      "children": [
+                          {
+                              "width": "3",
+                              "title": "Pod Total",
+                              "height": 350,
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_node_pod_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": "3",
+                              "title": "Node Status",
+                              "height": 350,
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_node_node_status",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Status",
+                              "showTableValues": "false"
+                          },
+                          {
+                              "width": "3",
+                              "title": "CPU Resources",
+                              "height": "350",
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_node_cpu_cores,k8s_node_cpu_cores_allocatable,k8s_node_cpu_cores_requests,k8s_node_cpu_cores_limits",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "unit": "m"
+                          },
+                          {
+                              "width": "3",
+                              "title": "Memory Resources",
+                              "height": "350",
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "unit": "GB",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "metricName": "k8s_node_memory_total,k8s_node_memory_allocatable,k8s_node_memory_requests,k8s_node_memory_limits",
+                              "aggregation": "/",
+                              "aggregationNum": "1073741824"
+                          },
+                          {
+                              "width": "3",
+                              "title": "Storage Resources",
+                              "height": 350,
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "unit": "GB",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "metricName": "k8s_node_storage_total,k8s_node_storage_allocatable",
+                              "aggregation": "/",
+                              "aggregationNum": "1073741824"
+                          },
+                          {
+                              "width": 3,
+                              "title": "CPU Usage",
+                              "height": 350,
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_node_cpu_usage",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartLine",
+                              "unit": "m"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Memory Usage",
+                              "height": 350,
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_node_memory_usage",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartLine",
+                              "aggregation": "/",
+                              "aggregationNum": "1073741824",
+                              "unit": "GB"
+                          },
+                          {
+                              "width": "3",
+                              "title": "Network I/O",
+                              "height": 350,
+                              "entityType": "ServiceInstance",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_node_network_receive,k8s_node_network_transmit",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartLine",
+                              "unit": "KB/s",
+                              "aggregation": "/",
+                              "aggregationNum": "1024"
+                          }
+                      ]
+                  },
+                  {
+                      "name": "Service",
+                      "children": [
+                          {
+                              "width": 3,
+                              "title": "Service Pod Total",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_service_pod_total",
+                              "queryMetricType": "readMetricsValue",
+                              "chartType": "ChartNum"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Service Pod Status",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_status",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Status-Pod",
+                              "showTableValues": "false"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Service CPU Resources",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_service_cpu_cores_requests,k8s_service_cpu_cores_limits",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "unit": "m"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Service Memory Resources",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "REGULAR_VALUE",
+                              "metricName": "k8s_service_memory_requests,k8s_service_memory_limits",
+                              "queryMetricType": "readMetricsValues",
+                              "chartType": "ChartArea",
+                              "aggregation": "/",
+                              "aggregationNum": "1048576",
+                              "unit": "MB"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod CPU Usage",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_cpu_usage",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartLine",
+                              "unit": "m"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Memory Usage",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_memory_usage",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartLine",
+                              "aggregation": "/",
+                              "aggregationNum": "1048576",
+                              "unit": "MB"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Waiting",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_status_waiting",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "showTableValues": "false",
+                              "tableHeaderCol1": "Container-Pod-Waiting Reason"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Terminated",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_status_terminated",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartBar"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Restarts",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_status_restarts_total",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartTable",
+                              "tableHeaderCol1": "Pod",
+                              "showTableValues": "true",
+                              "tableHeaderCol2": "Restarts Total"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Network Receive",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_network_receive",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartLine",
+                              "aggregation": "/",
+                              "aggregationNum": "1024",
+                              "unit": "KB/s"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Network Transmit",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_network_transmit",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartLine",
+                              "aggregationNum": "1024",
+                              "aggregation": "/",
+                              "unit": "KB/s"
+                          },
+                          {
+                              "width": 3,
+                              "title": "Pod Storage Usage",
+                              "height": "248",
+                              "entityType": "Endpoint",
+                              "independentSelector": false,
+                              "metricType": "LABELED_VALUE",
+                              "metricName": "k8s_service_pod_fs_usage",
+                              "queryMetricType": "readLabeledMetricsValues",
+                              "chartType": "ChartArea",
+                              "aggregation": "/",
+                              "aggregationNum": "1048576",
+                              "unit": "MB"
+                          }
+                      ]
+                  }
+              ]
+          }
+      ]
+    # Activated as the DASHBOARD type, makes this templates added into the UI page automatically.
+    # False means providing a basic template, user needs to add it manually.
+    activated: true
+    # True means wouldn't show up on the dashboard. Only keeps the definition in the storage.
+    disabled: false