You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/11/26 05:02:39 UTC

[iotdb] branch master updated: [IOTDB-5053] Update Metric Doc and fix. (#8179)

This is an automated email from the ASF dual-hosted git repository.

qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new b18b7bfdd0 [IOTDB-5053] Update Metric Doc and fix. (#8179)
b18b7bfdd0 is described below

commit b18b7bfdd033d971852d87a55f06257fc9e5c55c
Author: ZhangHongYin <46...@users.noreply.github.com>
AuthorDate: Sat Nov 26 13:02:34 2022 +0800

    [IOTDB-5053] Update Metric Doc and fix. (#8179)
---
 .../resources/conf/iotdb-confignode.properties     |   4 -
 .../persistence/metric/PartitionInfoMetrics.java   |   4 +-
 .../confignode1conf/iotdb-confignode.properties    |   3 +-
 .../confignode2conf/iotdb-confignode.properties    |   1 -
 .../confignode3conf/iotdb-confignode.properties    |   1 -
 docs/UserGuide/Monitor-Alert/Metric-Tool.md        | 559 ++++++++--------
 docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md     | 740 ++++++++++-----------
 .../apache/iotdb/metrics/config/MetricConfig.java  |  16 +-
 .../metrics/config/MetricConfigDescriptor.java     |  12 +-
 .../metricsets/jvm/JvmClassLoaderMetrics.java      |   8 +-
 .../metrics/metricsets/jvm/JvmCompileMetrics.java  |   4 +-
 .../iotdb/metrics/metricsets/jvm/JvmGcMetrics.java |  24 +-
 .../metrics/metricsets/jvm/JvmMemoryMetrics.java   |  24 +-
 .../metrics/metricsets/jvm/JvmThreadMetrics.java   |  16 +-
 .../metrics/metricsets/logback/LogbackMetrics.java |  10 +-
 .../metricsets/logback/MetricsTurboFilter.java     |  10 +-
 .../iotdb/metrics/config/MetricConfigTest.java     |   3 -
 .../iotdb/commons/service/metric/enums/Metric.java |   1 +
 .../resources/conf/iotdb-datanode.properties       |   4 -
 .../db/engine/storagegroup/DataRegionMetrics.java  |   4 +-
 .../iotdb/db/service/metrics/SystemMetrics.java    |   2 +-
 .../service/thrift/impl/ClientRPCServiceImpl.java  |  20 +-
 .../db/service/thrift/impl/TSServiceImpl.java      |  20 +-
 .../datanode1conf/iotdb-datanode.properties        |   1 -
 .../datanode2conf/iotdb-datanode.properties        |   1 -
 .../datanode3conf/iotdb-datanode.properties        |   1 -
 26 files changed, 703 insertions(+), 790 deletions(-)

diff --git a/confignode/src/assembly/resources/conf/iotdb-confignode.properties b/confignode/src/assembly/resources/conf/iotdb-confignode.properties
index 24d62ecd29..eed5d144b5 100644
--- a/confignode/src/assembly/resources/conf/iotdb-confignode.properties
+++ b/confignode/src/assembly/resources/conf/iotdb-confignode.properties
@@ -107,10 +107,6 @@ cn_target_config_node_list=127.0.0.1:22277
 ### Metric Configuration
 ####################
 
-# Whether statistic operation performance
-# Datatype: boolean
-# cn_enable_performance_stat=false
-
 # The reporters of metric module to report metrics
 # If there are more than one reporter, please separate them by commas ",".
 # Options: [JMX, PROMETHEUS, IOTDB]
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/persistence/metric/PartitionInfoMetrics.java b/confignode/src/main/java/org/apache/iotdb/confignode/persistence/metric/PartitionInfoMetrics.java
index b30555098f..59dc871c02 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/persistence/metric/PartitionInfoMetrics.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/persistence/metric/PartitionInfoMetrics.java
@@ -46,7 +46,7 @@ public class PartitionInfoMetrics implements IMetricSet {
         partitionInfo,
         PartitionInfo::getStorageGroupPartitionTableSize,
         Tag.NAME.toString(),
-        "storageGroup");
+        "database");
     metricService.createAutoGauge(
         Metric.REGION.toString(),
         MetricLevel.IMPORTANT,
@@ -70,7 +70,7 @@ public class PartitionInfoMetrics implements IMetricSet {
   @Override
   public void unbindFrom(AbstractMetricService metricService) {
     metricService.remove(
-        MetricType.AUTO_GAUGE, Metric.QUANTITY.toString(), Tag.NAME.toString(), "storageGroup");
+        MetricType.AUTO_GAUGE, Metric.QUANTITY.toString(), Tag.NAME.toString(), "database");
     metricService.remove(
         MetricType.AUTO_GAUGE,
         Metric.REGION.toString(),
diff --git a/confignode/src/test/resources/confignode1conf/iotdb-confignode.properties b/confignode/src/test/resources/confignode1conf/iotdb-confignode.properties
index e6e6d0ba07..3c4e761ffd 100644
--- a/confignode/src/test/resources/confignode1conf/iotdb-confignode.properties
+++ b/confignode/src/test/resources/confignode1conf/iotdb-confignode.properties
@@ -25,8 +25,7 @@ cn_system_dir=target/confignode1/system
 cn_data_dirs=target/confignode1/data
 cn_consensus_dir=target/confignode1/consensus
 
-cn_enable_performance_stat=false
-cn_metric_reporter_list=JMX,PROMETHEUS
+cn_metric_reporter_list=PROMETHEUS
 cn_metric_frame_type=MICROMETER
 cn_metric_level=IMPORTANT
 cn_metric_async_collect_period=5
diff --git a/confignode/src/test/resources/confignode2conf/iotdb-confignode.properties b/confignode/src/test/resources/confignode2conf/iotdb-confignode.properties
index c8150e9179..a0d4e4ef98 100644
--- a/confignode/src/test/resources/confignode2conf/iotdb-confignode.properties
+++ b/confignode/src/test/resources/confignode2conf/iotdb-confignode.properties
@@ -25,7 +25,6 @@ cn_system_dir=target/confignode2/system
 cn_data_dirs=target/confignode2/data
 cn_consensus_dir=target/confignode2/consensus
 
-cn_enable_performance_stat=false
 cn_metric_reporter_list=PROMETHEUS
 cn_metric_frame_type=MICROMETER
 cn_metric_level=IMPORTANT
diff --git a/confignode/src/test/resources/confignode3conf/iotdb-confignode.properties b/confignode/src/test/resources/confignode3conf/iotdb-confignode.properties
index 08e811c91a..b95a94e5d8 100644
--- a/confignode/src/test/resources/confignode3conf/iotdb-confignode.properties
+++ b/confignode/src/test/resources/confignode3conf/iotdb-confignode.properties
@@ -25,7 +25,6 @@ cn_system_dir=target/confignode3/system
 cn_data_dirs=target/confignode3/data
 cn_consensus_dir=target/confignode3/consensus
 
-cn_enable_performance_stat=false
 cn_metric_reporter_list=PROMETHEUS
 cn_metric_frame_type=MICROMETER
 cn_metric_level=IMPORTANT
diff --git a/docs/UserGuide/Monitor-Alert/Metric-Tool.md b/docs/UserGuide/Monitor-Alert/Metric-Tool.md
index 7f4bd41753..f35ce9d50d 100644
--- a/docs/UserGuide/Monitor-Alert/Metric-Tool.md
+++ b/docs/UserGuide/Monitor-Alert/Metric-Tool.md
@@ -19,26 +19,21 @@
 
 -->
 
+Along with IoTDB running, we hope to observe the status of IoTDB, so as to troubleshoot system problems or discover potential system risks in time. A series of metrics that can **reflect the operating status of the system** are system monitoring metrics.
 
-Currently, users can use various methods to monitor the running IoTDB process, including using Java's Jconsole tool to monitor the system status of the running IoTDB process, using the interface developed by IoTDB for users to view data statistics, and using the monitor framework to monitor the system status of the running IoTDB process.
-
-# 1. Metric Framework
-
-Along with IoTDB running, some metrics reflecting current system's status will be collected continuously, which will provide some useful information helping us resolving system problems and detecting potential system risks.
-
-## 1.1. When to use monitor framework?
+# 1. When to use metric framework?
 
 Belows are some typical application scenarios
 
 1. System is running slowly
 
-   When system is running slowly, we always hope to have information about system's running status as detail as possible, such as
+   When system is running slowly, we always hope to have information about system's running status as detail as possible, such as:
 
-   - JVM:Is there FGC?How long does it cost?How much does  the memory usage decreased after GC?Are there lots of threads?
+   - JVM:Is there FGC? How long does it cost? How much does the memory usage decreased after GC? Are there lots of threads?
    - System:Is the CPU usage too hi?Are there many disk IOs?
    - Connections:How many connections are there in the current time?
    - Interface:What is the TPS and latency of every interface?
-   - ThreadPool:Are there many pending tasks?
+   - Thread Pool:Are there many pending tasks?
    - Cache Hit Ratio
 
 2. No space left on device
@@ -49,241 +44,282 @@ Belows are some typical application scenarios
 
    We could use the count of error logs、the alive status of nodes in cluster, etc, to determine whether the system is running abnormally.
 
-## 1.2. Who will use monitor framework?
-
+# 2. Who will use metric framework?
 Any person cares about the system's status, including but not limited to RD, QA, SRE, DBA, can use the metrics to work more efficiently.
 
-## 1.3. What metrics does IoTDB have?
-
-For now, we have provided some metrics for several core modules of IoTDB, and more metrics will be added or updated along with the development of new features and optimization or refactoring of architecture.
-
-### 1.3.1. Key Concept
-
-Before step into next, we'd better stop to have a look into some key concepts about metrics.
-
-Every metric data has two properties
-
-- Metric Name
-
-  The name of this metric,for example, ```logback_events_total``` indicates the total count of log events。
-
-- Tag
+# 3. What is metrics?
+
+## 3.1. Key Concept
+
+In IoTDB's metric module, each metrics is uniquely identified by `Metric Name` and `Tags`.
+
+- `Metric Name`: The name of metric's category, for example, ```logback_events``` indicates log events。
+- `Tags`: **Metric classification**, in the form of Key-Value pairs, each metric can have 0 or more categories, common Key-Value pairs:
+  - `name = xxx`: The name of the metric. For example, for the monitoring item`entry_seconds_count`, the meaning of name is the name of the monitored interface.
+  - `status = xxx`: The status of the metric is subdivided. For example, the monitoring item of the monitoring task can use this parameter to separate the running task and the stopped task.
+  - `user = xxx`: The metric is related to a specific user, such as the total number of writes by the root user.
+  - Customize for the situation: For example, there is a ```level``` classification under logback_events, which is used to indicate the number of logs under a specific level
+- `Metric Level`: The level of metric managing level, The default startup level is `Core` level, the recommended startup level is `Important level`, and the audit strictness is `Core > Important > Normal > All`
+  - `Core`: Core metrics of the system, used by the **operation and maintenance personnel**, which is related to the **performance, stability, and security** of the system, such as the status of the instance, the load of the system, etc.
+  - `Important`: Important metrics of the module, which is used by **operation and maintenance and testers**, and is directly related to **the running status of each module**, such as the number of merged files, execution status, etc.
+  - `Normal`: Normal metrics of the module, used by **developers** to facilitate **locating the module** when problems occur, such as specific key operation situations in the merger.
+  - `All`: All metrics of the module, used by **module developers**, often used when the problem is reproduced, so as to solve the problem quickly.
+
+### 3.1.1. External data format for metrics
+- IoTDB provides metrics in JMX, Prometheus and IoTDB formats:
+   - For JMX, metrics can be obtained through ```org.apache.iotdb.metrics```.
+   - For Prometheus, the value of the metrics can be obtained through the externally exposed port
+   - External exposure in IoTDB mode: metrics can be obtained by executing IoTDB queries
+
+# 4. The detail of metrics
+
+Currently, IoTDB provides metrics for some main modules externally, and with the development of new functions and system optimization or refactoring, metrics will be added and updated synchronously.
+
+If you want to add your own metrics data in IoTDB, please see the [IoTDB Metric Framework] (https://github.com/apache/iotdb/tree/master/metrics) document.
+
+## 4.1. Core level metrics
+Core-level metrics are enabled by default during system operation. The addition of each Core-level metrics needs to be carefully evaluated. The current Core-level metrics are as follows:
+
+### 4.1.1. Cluster
+| Metric      | Tags                                            | Type      | Description                                         |
+| ----------- | ----------------------------------------------- | --------- | --------------------------------------------------- |
+| config_node | name="total",status="Registered/Online/Unknown" | AutoGauge | The number of registered/online/unknown confignodes |
+| data_node   | name="total",status="Registered/Online/Unknown" | AutoGauge | The number of registered/online/unknown datanodes   |
+
+### 4.1.2. IoTDB process
+| Metric            | Tags          | Type      | Description                                            |
+| ----------------- | ------------- | --------- | ------------------------------------------------------ |
+| process_cpu_load  | name="cpu"    | AutoGauge | The current CPU usage of IoTDB process, Unit: %        |
+| process_cpu_time  | name="cpu"    | AutoGauge | The total CPU time occupied of IoTDB process, Unit: ns |
+| process_max_mem   | name="memory" | AutoGauge | The maximum available memory of IoTDB process          |
+| process_total_mem | name="memory" | AutoGauge | The current requested memory for IoTDB process         |
+| process_free_mem  | name="memory" | AutoGauge | The free available memory of IoTDB process             |
+
+### 4.1.3. System
+| Metric                         | Tags          | Type      | Description                                                |
+| ------------------------------ | ------------- | --------- | ---------------------------------------------------------- |
+| sys_cpu_load                   | name="cpu"    | AutoGauge | The current CPU usage of system, Unit: %                   |
+| sys_cpu_cores                  | name="cpu"    | Gauge     | The available number of CPU cores                          |
+| sys_total_physical_memory_size | name="memory" | Gauge     | The maximum physical memory of system                      |
+| sys_free_physical_memory_size  | name="memory" | AutoGauge | The current available memory of system                     |
+| sys_total_swap_space_size      | name="memory" | AutoGauge | The maximum swap space of system                           |
+| sys_free_swap_space_size       | name="memory" | AutoGauge | The available swap space of system                         |
+| sys_committed_vm_size          | name="memory" | AutoGauge | The space of virtual memory available to running processes |
+| sys_disk_total_space           | name="disk"   | AutoGauge | The total disk space                                       |
+| sys_disk_free_space            | name="disk"   | AutoGauge | The available disk space                                   |
+
+## 4.2. Important level metrics
+
+### 4.2.1. Cluster
+| Metric                    | Tags                                              | Type  | Description                                      |
+| ------------------------- | ------------------------------------------------- | ----- | ------------------------------------------------ |
+| cluster_node_leader_count | name="{{ip}}:{{port}}"                            | Gauge | The count of consensus group leader on each node |
+| cluster_node_status       | name="{{ip}}:{{port}}",type="ConfigNode/DataNode" | Gauge | The current node status, 0=Unkonwn 1=online      |
+
+### 4.2.2. Node
+| Metric   | Tags                                       | Type      | Description                                                   |
+| -------- | ------------------------------------------ | --------- | ------------------------------------------------------------- |
+| quantity | name="database"                            | AutoGauge | The number of database                                        |
+| quantity | name="timeSeries"                          | AutoGauge | The number of timeseries                                      |
+| quantity | name="pointsIn"                            | Counter   | The number of write points                                    |
+| region   | name="total",type="SchemaRegion"           | AutoGauge | The total number of SchemaRegion in PartitionTable            |
+| region   | name="total",type="DataRegion"             | AutoGauge | The total number of DataRegion in PartitionTable              |
+| region   | name="{{ip}}:{{port}}",type="SchemaRegion" | Gauge     | The number of SchemaRegion in PartitionTable of specific node |
+| region   | name="{{ip}}:{{port}}",type="DataRegion"   | Gauge     | The number of DataRegion in PartitionTable of specific node   |
+
+### 4.2.3. MultiLeader
+| Metric       | Tags                                                                                         | Type      | Description                                                           |
+| ------------ | -------------------------------------------------------------------------------------------- | --------- | --------------------------------------------------------------------- |
+| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex"           | AutoGauge | The sync index of synchronization thread in replica group             |
+| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | AutoGauge | The size of cache requests of synchronization thread in replica group |
+| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex"                        | AutoGauge | The write process of main process in replica group                    |
+| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="safeIndex"                          | AutoGauge | The sync index of replica group                                       |
+| stage        | name="multi_leader", region="{{region}}", type="getStateMachineLock"                         | Histogram | The time consumed to get statemachine lock in main process            |
+| stage        | name="multi_leader", region="{{region}}", type="checkingBeforeWrite"                         | Histogram | The time consumed to precheck before write in main process            |
+| stage        | name="multi_leader", region="{{region}}", type="writeStateMachine"                           | Histogram | The time consumed to write statemachine in main process               |
+| stage        | name="multi_leader", region="{{region}}", type="offerRequestToQueue"                         | Histogram | The time consumed to try to offer request to queue in main process    |
+| stage        | name="multi_leader", region="{{region}}", type="consensusWrite"                              | Histogram | The time consumed to the whole write in main process                  |
+| stage        | name="multi_leader", region="{{region}}", type="constructBatch"                              | Histogram | The time consumed to construct batch in synchronization thread        |
+| stage        | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest"                       | Histogram | The time consumed to sync log in asynchronous callback process        |
+
+### 4.2.4. Cache
+| Metric    | Tags                               | Type      | Description                                                              |
+| --------- | ---------------------------------- | --------- | ------------------------------------------------------------------------ |
+| cache_hit | name="chunk"                       | AutoGauge | The cache hit ratio of ChunkCache, Unit: %                               |
+| cache_hit | name="schema"                      | AutoGauge | The cache hit ratio of SchemaCache, Unit: %                              |
+| cache_hit | name="timeSeriesMeta"              | AutoGauge | The cache hit ratio of TimeseriesMetadataCache, Unit: %                  |
+| cache_hit | name="bloomFilter"                 | AutoGauge | The interception rate of bloomFilter in TimeseriesMetadataCache, Unit: % |
+| cache     | name="StorageGroup", type="hit"    | Counter   | The hit number of StorageGroup Cache                                     |
+| cache     | name="StorageGroup", type="all"    | Counter   | The access number of StorageGroup Cache                                  |
+| cache     | name="SchemaPartition", type="hit" | Counter   | The hit number of SchemaPartition Cache                                  |
+| cache     | name="SchemaPartition", type="all" | Counter   | The access number of SSchemaPartition Cache                              |
+| cache     | name="DataPartition", type="hit"   | Counter   | The hit number of DataPartition Cache                                    |
+| cache     | name="DataPartition", type="all"   | Counter   | The access number of SDataPartition Cache                                |
+
+### 4.2.5. Interface
+| Metric                | Tags                               | Type      | Description                                                    |
+| --------------------- | ---------------------------------- | --------- | -------------------------------------------------------------- |
+| operation             | name = "{{name}}"                  | Histogram | The time consumed of operations in client                      |
+| entry                 | name="{{interface}}"               | Timer     | The time consumed of thrift operations                         |
+| thrift_connections    | name="ConfigNodeRPC"               | AutoGauge | The number of thrift internal connections in ConfigNode        |
+| thrift_connections    | name="Internal"                    | AutoGauge | The number of thrift internal connections in DataNode          |
+| thrift_connections    | name="MPPDataExchange"             | AutoGauge | The number of thrift internal connections in MPP               |
+| thrift_connections    | name="RPC"                         | AutoGauge | The number of thrift connections of Client                     |
+| thrift_active_threads | name="ConfigNodeRPC-Service"       | AutoGauge | The number of thrift active internal connections in ConfigNode |
+| thrift_active_threads | name="DataNodeInternalRPC-Service" | AutoGauge | The number of thrift active internal connections in DataNode   |
+| thrift_active_threads | name="MPPDataExchangeRPC-Service"  | AutoGauge | The number of thrift active internal connections in MPP        |
+| thrift_active_threads | name="ClientRPC-Service"           | AutoGauge | The number of thrift active connections of client              |
+
+### 4.2.6. Memory
+| Metric | Tags                          | Type      | Description                                                        |
+| ------ | ----------------------------- | --------- | ------------------------------------------------------------------ |
+| mem    | name="database_{{name}}"      | AutoGauge | The memory usage of DataRegion in DataNode, Unit: byte             |
+| mem    | name="chunkMetaData_{{name}}" | AutoGauge | The memory usage of chunkMetaData when writting TsFile, Unit: byte |
+| mem    | name="MultiLeaderConsensus"   | AutoGauge | The memory usage of MultiLeader consensus, Unit: byte              |
+
+### 4.2.7. Task
+| Metric    | Tags                                              | Type      | Description                           |
+| --------- | ------------------------------------------------- | --------- | ------------------------------------- |
+| queue     | name="compaction_inner", status="running/waiting" | Gauge     | The number of inner compaction tasks  |
+| queue     | name="compaction_cross", status="running/waiting" | Gauge     | The number of cross compatcion tasks  |
+| cost_task | name="inner_compaction/cross_compaction/flush"    | Gauge     | The time consumed of compaction tasks |
+| queue     | name="flush",status="running/waiting"             | AutoGauge | The number of flush tasks             |
+| queue     | name="Sub_RawQuery",status="running/waiting"      | AutoGauge | The number of Sub_RawQuery            |
+
+
+### 4.2.8. Compaction
+| Metric                | Tags                                                | Type    | Description                            |
+| --------------------- | --------------------------------------------------- | ------- | -------------------------------------- |
+| data_written          | name="compaction", type="aligned/not-aligned/total" | Counter | The written size of compaction         |
+| data_read             | name="compaction"                                   | Counter | The read size of compaction            |
+| compaction_task_count | name = "inner_compaction", type="sequence"          | Counter | The number of inner sequence compction |
+| compaction_task_count | name = "inner_compaction", type="unsequence"        | Counter | The number of inner sequence compction |
+| compaction_task_count | name = "cross_compaction", type="cross"             | Counter | The number of corss compction          |
+
+### 4.2.9. File
+| Metric     | Tags         | Type      | Description                               |
+| ---------- | ------------ | --------- | ----------------------------------------- |
+| file_size  | name="wal"   | AutoGauge | The size of WAL file, Unit: byte          |
+| file_size  | name="seq"   | AutoGauge | The size of sequence TsFile, Unit: byte   |
+| file_size  | name="unseq" | AutoGauge | The size of unsequence TsFile, Unit: byte |
+| file_count | name="wal"   | AutoGauge | The count of WAL file                     |
+| file_count | name="seq"   | AutoGauge | The count of sequence TsFile              |
+| file_count | name="unseq" | AutoGauge | The count of unsequence TsFile            |
+
+### 4.2.10. IoTDB Process
+| Metric                | Tags           | Type      | Description                                 |
+| --------------------- | -------------- | --------- | ------------------------------------------- |
+| process_used_mem      | name="memory"  | AutoGauge | The used memory of IoTDB process            |
+| process_mem_ratio     | name="memory"  | AutoGauge | The used memory ratio of IoTDB process      |
+| process_threads_count | name="process" | AutoGauge | The number of thread of IoTDB process       |
+| process_status        | name="process" | AutoGauge | The status of IoTDB process, 1=live, 0=dead |
+
+### 4.2.11. Log
+| Metric         | Tags                                | Type    | Description              |
+| -------------- | ----------------------------------- | ------- | ------------------------ |
+| logback_events | level="trace/debug/info/warn/error" | Counter | The number of log events |
+
+### 4.2.12. JVM Thread
+
+| Metric                     | Tags                                                          | Type      | Description                              |
+| -------------------------- | ------------------------------------------------------------- | --------- | ---------------------------------------- |
+| jvm_threads_live_threads   |                                                               | AutoGauge | The number of live thread                |
+| jvm_threads_daemon_threads |                                                               | AutoGauge | The number of daemon thread              |
+| jvm_threads_peak_threads   |                                                               | AutoGauge | The number of peak thread                |
+| jvm_threads_states_threads | state="runnable/blocked/waiting/timed-waiting/new/terminated" | AutoGauge | The number of thread in different states |
+
+### 4.2.13. JVM GC
+| Metric                        | Tags                                                  | Type      | Description                                                                 |
+| ----------------------------- | ----------------------------------------------------- | --------- | --------------------------------------------------------------------------- |
+| jvm_gc_pause                  | action="end of major GC/end of minor GC",cause="xxxx" | Timer     | The number and time consumed of Young GC/Full Gc caused by different reason |
+|                               |
+| jvm_gc_concurrent_phase_time  | action="{{action}}",cause="{{cause}}"                 | Timer     | The number and time consumed of Young GC/Full Gc caused by different        |
+|                               |
+| jvm_gc_max_data_size_bytes    |                                                       | AutoGauge | The historical maximum value of old memory                                  |
+| jvm_gc_live_data_size_bytes   |                                                       | AutoGauge | The usage of old memory                                                     |
+| jvm_gc_memory_promoted_bytes  |                                                       | Counter   | The accumulative value of positive memory growth of old memory              |
+| jvm_gc_memory_allocated_bytes |                                                       | Counter   | The accumulative value of positive memory growth of allocated memory        |
+
+### 4.2.14. JVM Memory
+| Metric                          | Tags                            | Type      | Description                 |
+| ------------------------------- | ------------------------------- | --------- | --------------------------- |
+| jvm_buffer_memory_used_bytes    | id="direct/mapped"              | AutoGauge | The used size of buffer     |
+| jvm_buffer_total_capacity_bytes | id="direct/mapped"              | AutoGauge | The max size of buffer      |
+| jvm_buffer_count_buffers        | id="direct/mapped"              | AutoGauge | The number of buffer        |
+| jvm_memory_committed_bytes      | {area="heap/nonheap",id="xxx",} | AutoGauge | The committed memory of JVM |
+| jvm_memory_max_bytes            | {area="heap/nonheap",id="xxx",} | AutoGauge | The max memory of JVM       |
+| jvm_memory_used_bytes           | {area="heap/nonheap",id="xxx",} | AutoGauge | The used memory of JVM      |
+
+### 4.2.15. JVM Class
+| Metric                       | Tags | Type      | Description                  |
+| ---------------------------- | ---- | --------- | ---------------------------- |
+| jvm_classes_unloaded_classes |      | AutoGauge | The number of unloaded class |
+| jvm_classes_loaded_classes   |      | AutoGauge | The number of loaded class   |
+
+### 4.2.16. JVM Compilation
+| Metric                  | Tags                                          | Type      | Description                      |
+| ----------------------- | --------------------------------------------- | --------- | -------------------------------- |
+| jvm_compilation_time_ms | {compiler="HotSpot 64-Bit Tiered Compilers",} | AutoGauge | The time consumed in compilation |
+
+## 4.3. Normal level Metrics
+
+### 4.3.1. Cluster
+| Metric | Tags                                                               | Type      | Description                                                        |
+| ------ | ------------------------------------------------------------------ | --------- | ------------------------------------------------------------------ |
+| region | name="{{storageGroupName}}",type="SchemaRegion/DataRegion"         | AutoGauge | The number of DataRegion/SchemaRegion of database in specific node |
+| slot   | name="{{storageGroupName}}",type="schemaSlotNumber/dataSlotNumber" | AutoGauge | The number of DataSlot/SchemaSlot of database in specific node     |
+
+## 4.4. All Metric
+Currently there is no All level metrics, and it will continue to be added in the future.
+
+# 5. How to get these metrics?
 
-  Each metric could have 0 or several sub classes (Tag), for the same example, the ```logback_events_total``` metric has a sub class named ```level```, which means ```the total count of log events at the specific level```
-
-### 1.3.2. Data Format
-
-IoTDB provides metrics data both in JMX and Prometheus format. For JMX, you can get these metrics via ```org.apache.iotdb.metrics```.  
-
-Next, we will choose Prometheus format data as samples to describe each kind of metric.
-
-### 1.3.3. IoTDB Metrics
-
-#### 1.3.3.1. API
-
-| Metric                | Tag                      | level     | Description                              | Sample                                       |
-| --------------------- | ------------------------ |-----------| ---------------------------------------- | -------------------------------------------- |
-| entry_seconds_count   | name="{{interface}}"     | important | The total request count of the interface | entry_seconds_count{name="openSession",} 1.0 |
-| entry_seconds_sum     | name="{{interface}}"     | important | The total cost seconds of the interface  | entry_seconds_sum{name="openSession",} 0.024 |
-| entry_seconds_max     | name="{{interface}}"     | important | The max latency of the interface         | entry_seconds_max{name="openSession",} 0.024 |
-| quantity_total        | name="pointsIn"          | important | The total points inserted into IoTDB     | quantity_total{name="pointsIn",} 1.0         |
-| thrift_connections    | name="{{thriftService}}" | important | current number of thrift connections     | thrift_connections{name="RPC",} 1.0          |
-| thrift_active_threads | name="{{thriftThread}}"  | important | current number if thrift worker threads  | thrift_active_threads{name="RPC",} 1.0       |
-
-#### 1.3.3.2. Task
-| Metric                      | Tag                                                                           | level     | Description                                              | Sample                                                                                  |
-| --------------------------- | ----------------------------------------------------------------------------- | --------- | -------------------------------------------------------- | --------------------------------------------------------------------------------------- |
-| queue                       | name="compaction_inner/compaction_cross/flush",<br />status="running/waiting" | important | The count of current tasks in running and waiting status | queue{name="flush",status="waiting",} 0.0<br/>queue{name="flush",status="running",} 0.0 |
-| cost_task_seconds_count     | name="inner_compaction/cross_compaction/flush"                                | important | The total count of tasks occurs till now                 | cost_task_seconds_count{name="flush",} 1.0                                              |
-| cost_task_seconds_max       | name="inner_compaction/cross_compaction/flush"                                | important | The seconds of the longest task takes till now           | cost_task_seconds_max{name="flush",} 0.363                                              |
-| cost_task_seconds_sum       | name="inner_compaction/cross_compaction/flush"                                | important | The total cost seconds of all tasks till now             | cost_task_seconds_sum{name="flush",} 0.363                                              |
-| data_written_total          | name="compaction", <br />type="aligned/not-aligned/total"                     | important | The size of data written in compaction                   | data_written_total{name="compaction",type="total",} 10240                               |
-| data_read_total             | name="compaction"                                                             | important | The size of data read in compaction                      | data_read_total{name="compaction",} 10240                                               |
-| compaction_task_count_total | name = "inner_compaction/cross_compaction", type="sequence/unsequence/cross"  | important | The number of compaction task                            | compaction_task_count_total{name="inner_compaction",type="sequence",} 1                 |
-
-#### 1.3.3.3. Memory Usage
+The relevant configuration of the metric module is in `conf/iotdb-{datanode/confignode}.properties`, and all configuration items support hot loading through the `load configuration` command.
 
-| Metric | Tag                                     | level     | Description                                                           | Sample                            |
-| ------ | --------------------------------------- | --------- | --------------------------------------------------------------------- | --------------------------------- |
-| mem    | name="chunkMetaData/storageGroup/mtree" | important | Current memory size of chunkMetaData/storageGroup/mtree data in bytes | mem{name="chunkMetaData",} 2050.0 |
+## 5.1. JMX
+For metrics exposed externally using JMX, you can view them through Jconsole. After entering the Jconsole monitoring page, you will first see an overview of various running conditions of IoTDB. Here you can see heap memory information, thread information, class information, and the server's CPU usage.
 
-#### 1.3.3.4. Cache
+### 5.1.1. Obtain metric data
+After connecting to JMX, you can find the "MBean" named "org.apache.iotdb.metrics" through the "MBeans" tab, and you can view the specific values of all monitoring metrics in the sidebar.
 
-| Metric      | Tag                                                               | level     | Description                                                                               | Sample                                              |
-| ----------- | ----------------------------------------------------------------- | --------- | ----------------------------------------------------------------------------------------- | --------------------------------------------------- |
-| cache_hit   | name="chunk/timeSeriesMeta/bloomFilter/SchemaCache"               | important | Cache hit ratio of chunk/timeSeriesMeta/SchemaCache  and prevention ratio of bloom filter | cache_hit{name="chunk",} 80                         |
-| cache_total | name="StorageGroup/SchemaPartition/DataPartition", type="hit/all" | important | The cache hit/all counts of StorageGroup/SchemaPartition/DataPartition                    | cache_total{name="DataPartition",type="all",} 801.0 |
+<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" alt="metric-jmx" src="https://user-images.githubusercontent.com/46039728/204018765-6fda9391-ebcf-4c80-98c5-26f34bd74df0.png">
 
+### 5.1.2. Get other relevant data
+After connecting to JMX, you can find the "MBean" named "org.apache.iotdb.service" through the "MBeans" tab, as shown in the image below, to understand the basic status of the service
 
-#### 1.3.3.5. Business Data
+<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/46039728/149951720-707f1ee8-32ee-4fde-9252-048caebd232e.png"> <br>
 
-| Metric   | Tag                                   | level     | Description                                                   | Sample                           |
-| -------- | ------------------------------------- | --------- | ------------------------------------------------------------- | -------------------------------- |
-| quantity | name="timeSeries/storageGroup/device" | important | The current count of timeSeries/storageGroup/devices in IoTDB | quantity{name="timeSeries",} 1.0 |
+In order to improve query performance, IOTDB caches ChunkMetaData and TsFileMetaData. Users can use MXBean and expand the sidebar `org.apache.iotdb.db.service` to view the cache hit ratio:
 
-#### 1.3.3.6. Cluster
+<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/19167280/112426760-73e3da80-8d73-11eb-9a8f-9232d1f2033b.png">
 
-##### The status of cluster
-| Metric                    | Tag                                                                | level     | Description                                                                                  | Sample                                                                       |
-| ------------------------- | ------------------------------------------------------------------ | --------- | -------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
-| cluster_node_leader_count | name="{{ip}}:{{port}}"                                             | important | The count of  ```dataGroupLeader``` on each node, which reflects the distribution of leaders | cluster_node_leader_count{name="127.0.0.1",} 2.0                             |
-| cluster_uncommitted_log   | name="{{ip_datagroupHeader}}"                                      | important | The count of ```uncommitted_log``` on each node in data groups it belongs to                 | cluster_uncommitted_log{name="127.0.0.1_Data-127.0.0.1-40010-raftId-0",} 0.0 |
-| cluster_node_status       | name="{{ip}}:{{port}}",type="ConfigNode/DataNode"                  | important | The current node status, 0=Unkonwn 1=online                                                  | cluster_node_status{name="0.0.0.0:22277",type="ConfigNode",} 1.0             |
-| cluster_elect_total       | name="{{ip}}",status="fail/win"                                    | important | The count and result (won or failed) of elections the node participated in.                  | cluster_elect_total{name="127.0.0.1",status="win",} 1.0                      |
-| config_node               | name="total",status="Registered/Online/Unknown"                    | core      | The number of registered/online/offline confignodes                                          | config_node{name="total",status="Online",} 3.0                               |
-| data_node                 | name="total",status="Registered/Online/Unknown"                    | core      | The number of registered/online/offline datanodes                                            | data_node{name="total",status="Registered",} 3.0                             |
-| partition_table           | name="number"                                                      | core      | The number of partition table                                                                | partition_table{name="number",} 2.0                                          |
-| region                    | name="total/{{ip}}:{{port}}",type="SchemaRegion/DataRegion"        | important | The number of schemaRegion/dataRegion of cluster or specific node                            | region{name="127.0.0.1:6671",type="DataRegion",} 10.0                        |
-| region                    | name="{{storageGroupName}}",type="SchemaRegion/DataRegion"         | normal    | The number of DataRegion/SchemaRegion in database                                       | region{name="root.schema.sg1",type="DataRegion",} 14.0                       |
-| slot                      | name="{{storageGroupName}}",type="schemaSlotNumber/dataSlotNumber" | normal    | The number of dataSlot/schemaSlot in database                                           | slot{name="root.schema.sg1",type="schemaSlotNumber",} 2.0                    |
-
-##### 1.3.3.6.2. MultiLeader
-| Metric       | Tag                                                                                          | level     | 说明                                                                          | 示例                                                                                                             |
-| ------------ | -------------------------------------------------------------------------------------------- | --------- | ----------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
-| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex"              | core      | The searchIndex and safeIndex of region in multiLeader                        | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0                     |
-| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex"           | important | The currentSyncIndex of LogDispatcherThread of related region                 | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[7]",type="currentSyncIndex",} 1945.0        |
-| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | important | The total size of queues that buffers requests in LogDispatcher               | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[9]",type="cachedRequestInMemoryQueue",} 0.0 |
-| stage        | name="multi_leader", region="{{region}}", type="getStateMachineLock"                         | important | The time consumed to get lock of statemachine in multiLeader                  | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0                 |
-| stage        | name="multi_leader", region="{{region}}", type="checkingBeforeWrite"                         | important | The time consumed to check before write in multiLeader                        | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0                 |
-| stage        | name="multi_leader", region="{{region}}", type="writeStateMachine"                           | important | The time consumed to write consensus request into statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0                   |
-| stage        | name="multi_leader", region="{{region}}", type="offerRequestToQueue"                         | important | The time consumed to try to offer request into queue in multiLeader           | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0                 |
-| stage        | name="multi_leader", region="{{region}}", type="consensusWrite"                              | important | The total time consumed to write a consensus request in multiLeader           | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625                   |
-| stage        | name="multi_leader", region="{{region}}", type="constructBatch"                              | important | The time consumed to construct batch in LogDispatcher per request             | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0                      |
-| stage        | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest"                       | important | The time consumed to sync one request in multiLeader                          | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0               |
-
-### 1.3.4. IoTDB PreDefined Metrics Set
-
-#### 1.3.4.1. JVM
-
-##### 1.3.4.1.1. Threads
-
-| Metric                     | Tag                                                           | level     | Description                          | Sample                                             |
-| -------------------------- | ------------------------------------------------------------- | --------- | ------------------------------------ | -------------------------------------------------- |
-| jvm_threads_live_threads   | None                                                          | Important | The current count of threads         | jvm_threads_live_threads 25.0                      |
-| jvm_threads_daemon_threads | None                                                          | Important | The current count of  daemon threads | jvm_threads_daemon_threads 12.0                    |
-| jvm_threads_peak_threads   | None                                                          | Important | The max count of threads till now    | jvm_threads_peak_threads 28.0                      |
-| jvm_threads_states_threads | state="runnable/blocked/waiting/timed-waiting/new/terminated" | Important | The count of threads in each status  | jvm_threads_states_threads{state="runnable",} 10.0 |
-
-##### 1.3.4.1.2. GC
-
-| Metric                              | Tag                                                    | level     | Description                                                                                             | Sample                                                                                  |
-| ----------------------------------- | ------------------------------------------------------ | --------- | ------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- |
-| jvm_gc_pause_seconds_count          | action="end of major GC/end of minor GC",cause="xxxx"  | Important | The total count of YGC/FGC events and its cause                                                         | jvm_gc_pause_seconds_count{action="end of major GC",cause="Metadata GC Threshold",} 1.0 |
-| jvm_gc_pause_seconds_sum            | action="end of major GC/end of minor GC",cause="xxxx"  | Important | The total cost seconds of YGC/FGC and its cause                                                         | jvm_gc_pause_seconds_sum{action="end of major GC",cause="Metadata GC Threshold",} 0.03  |
-| jvm_gc_pause_seconds_max            | action="end of major GC",cause="Metadata GC Threshold" | Important | The max  cost seconds of YGC/FGC till now and its cause                                                 | jvm_gc_pause_seconds_max{action="end of major GC",cause="Metadata GC Threshold",} 0.0   |
-| jvm_gc_memory_promoted_bytes_total  | None                                                   | Important | Count of positive increases in the size of the old generation memory pool before GC to after GC         | jvm_gc_memory_promoted_bytes_total 8425512.0                                            |
-| jvm_gc_max_data_size_bytes          | None                                                   | Important | Max size of long-lived heap memory pool                                                                 | jvm_gc_max_data_size_bytes 2.863661056E9                                                |
-| jvm_gc_live_data_size_bytes         | None                                                   | Important | Size of long-lived heap memory pool after reclamation                                                   | jvm_gc_live_data_size_bytes 8450088.0                                                   |
-| jvm_gc_memory_allocated_bytes_total | None                                                   | Important | Incremented for an increase in the size of the (young) heap memory pool after one GC to before the next | jvm_gc_memory_allocated_bytes_total 4.2979144E7                                         |
-
-##### 1.3.4.1.3. Memory
-
-| Metric                          | Tag                             | level     | Description                                                                           | Sample                                                                                                                                                        |
-| ------------------------------- | ------------------------------- | --------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| jvm_buffer_memory_used_bytes    | id="direct/mapped"              | Important | An estimate of the memory that the Java virtual machine is using for this buffer pool | jvm_buffer_memory_used_bytes{id="direct",} 3.46728099E8                                                                                                       |
-| jvm_buffer_total_capacity_bytes | id="direct/mapped"              | Important | An estimate of the total capacity of the buffers in this pool                         | jvm_buffer_total_capacity_bytes{id="mapped",} 0.0                                                                                                             |
-| jvm_buffer_count_buffers        | id="direct/mapped"              | Important | An estimate of the number of buffers in the pool                                      | jvm_buffer_count_buffers{id="direct",} 183.0                                                                                                                  |
-| jvm_memory_committed_bytes      | {area="heap/nonheap",id="xxx",} | Important | The amount of memory in bytes that is committed for the Java virtual machine to use   | jvm_memory_committed_bytes{area="heap",id="Par Survivor Space",} 2.44252672E8<br/>jvm_memory_committed_bytes{area="nonheap",id="Metaspace",} 3.9051264E7<br/> |
-| jvm_memory_max_bytes            | {area="heap/nonheap",id="xxx",} | Important | The maximum amount of memory in bytes that can be used for memory management          | jvm_memory_max_bytes{area="heap",id="Par Survivor Space",} 2.44252672E8<br/>jvm_memory_max_bytes{area="nonheap",id="Compressed Class Space",} 1.073741824E9   |
-| jvm_memory_used_bytes           | {area="heap/nonheap",id="xxx",} | Important | The amount of used memory                                                             | jvm_memory_used_bytes{area="heap",id="Par Eden Space",} 1.000128376E9<br/>jvm_memory_used_bytes{area="nonheap",id="Code Cache",} 2.9783808E7<br/>             |
-
-##### 1.3.4.1.4. Classes
-
-| Metric                       | Tag                                           | level     | Description                                                                               | Sample                                                                        |
-| ---------------------------- | --------------------------------------------- | --------- | ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
-| jvm_classes_unloaded_classes | None                                          | Important | The total number of classes unloaded since the Java virtual machine has started execution | jvm_classes_unloaded_classes 680.0                                            |
-| jvm_classes_loaded_classes   | None                                          | Important | The number of classes that are currently loaded in the Java virtual machine               | jvm_classes_loaded_classes 5975.0                                             |
-| jvm_compilation_time_ms      | {compiler="HotSpot 64-Bit Tiered Compilers",} | Important | The approximate accumulated elapsed time spent in compilation                             | jvm_compilation_time_ms{compiler="HotSpot 64-Bit Tiered Compilers",} 107092.0 |
-
-#### 1.3.4.2. File
-
-| Metric     | Tag                  | level     | Description                                     | Sample                      |
-| ---------- | -------------------- | --------- | ----------------------------------------------- | --------------------------- |
-| file_size  | name="wal/seq/unseq" | important | The current file size of wal/seq/unseq in bytes | file_size{name="wal",} 67.0 |
-| file_count | name="wal/seq/unseq" | important | The current count of wal/seq/unseq files        | file_count{name="seq",} 1.0 |
-
-#### 1.3.4.3. Logback
-
-| Metric               | Tag                                    | level     | Description                                                   | 示例                                    |
-| -------------------- | -------------------------------------- | --------- | ------------------------------------------------------------- | --------------------------------------- |
-| logback_events_total | {level="trace/debug/info/warn/error",} | Important | The count of  trace/debug/info/warn/error log events till now | logback_events_total{level="warn",} 0.0 |
-
-#### 1.3.4.4. Process
-| Metric                | Tag            | level     | Description                                                                   | 示例                                            |
-| --------------------- | -------------- |-----------| ----------------------------------------------------------------------------- | ----------------------------------------------- |
-| process_cpu_load      | name="cpu"     | core      | current process CPU Usage (%)                                                 | process_cpu_load{name="process",} 5.0           |
-| process_cpu_time      | name="cpu"     | core      | total Process CPU Time Occupied (ns)                                          | process_cpu_time{name="process",} 3.265625E9    |
-| process_max_mem       | name="memory"  | core      | The maximum available memory for the JVM                                      | process_max_mem{name="process",} 3.545759744E9  |
-| process_used_mem      | name="memory"  | important | The current available memory for the JVM                                      | process_used_mem{name="process",} 4.6065456E7   |
-| process_total_mem     | name="memory"  | core      | The current requested memory for the JVM                                      | process_total_mem{name="process",} 2.39599616E8 |
-| process_free_mem      | name="memory"  | core      | The free available memory for the JVM                                         | process_free_mem{name="process",} 1.94035584E8  |
-| process_mem_ratio     | name="memory"  | important | Memory footprint ratio of process                                             | process_mem_ratio{name="process",} 0.0          |
-| process_threads_count | name="process" | important | The current number of threads                                                 | process_threads_count{name="process",} 11.0     |
-| process_status        | name="process" | important | The process survivor status, 1.0 means survivorship, and 0.0 means terminated | process_status{name="process",} 1.0             |
-
-#### 1.3.4.5. System
-| Metric                         | Tag           | level     | Description                                                 | 示例                                                           |
-| ------------------------------ | ------------- | --------- | ----------------------------------------------------------- | -------------------------------------------------------------- |
-| sys_cpu_load                   | name="cpu"    | core      | current system CPU Usage(%)                                 | sys_cpu_load{name="system",} 15.0                              |
-| sys_cpu_cores                  | name="cpu"    | core      | available CPU cores                                         | sys_cpu_cores{name="system",} 16.0                             |
-| sys_total_physical_memory_size | name="memory" | core      | Maximum physical memory of system                           | sys_total_physical_memory_size{name="system",} 1.5950999552E10 |
-| sys_free_physical_memory_size  | name="memory" | core      | The current available memory of system                      | sys_free_physical_memory_size{name="system",} 4.532396032E9    |
-| sys_total_swap_space_size      | name="memory" | core      | The maximum swap area of system                             | sys_total_swap_space_size{name="system",} 2.1051273216E10      |
-| sys_free_swap_space_size       | name="memory" | core      | The available swap area of system                           | sys_free_swap_space_size{name="system",} 2.931576832E9         |
-| sys_committed_vm_size          | name="memory" | important | the amount of virtual memory available to running processes | sys_committed_vm_size{name="system",} 5.04344576E8             |
-| sys_disk_total_space           | name="disk"   | core      | The total disk space                                        | sys_disk_total_space{name="system",} 5.10770798592E11          |
-| sys_disk_free_space            | name="disk"   | core      | The available  disk space                                   | sys_disk_free_space{name="system",} 3.63467845632E11           |
-
-### 1.3.5. Add custom metrics
-- If you want to add your own metrics data in IoTDB, please see the [IoTDB Metric Framework] (https://github.com/apache/iotdb/tree/master/metrics) document.
-- Metric embedded point definition rules
-  - `Metric`: The name of the monitoring item. For example, `entry_seconds_count` is the cumulative number of accesses to the interface, and `file_size` is the total number of files.
-  - `Tags`: Key-Value pair, used to identify monitored items, optional
-    - `name = xxx`: The name of the monitored item. For example, for the monitoring item`entry_seconds_count`, the meaning of name is the name of the monitored interface.
-    - `status = xxx`: The status of the monitored item is subdivided. For example, the monitoring item of the monitoring task can use this parameter to separate the running task and the stopped task.
-    - `user = xxx`: The monitored item is related to a specific user, such as the total number of writes by the root user.
-    - Customize for the situation...
-- Monitoring indicator level meaning:
-  - The default startup level for online operation is `Important` level, the default startup level for offline debugging is `Normal` level, and the audit strictness is `Core > Important > Normal > All`
-  - `Core`: The core indicator of the system, used by the **operation and maintenance personnel**, which is related to the performance, stability, and security** of the system, such as the status of the instance, the load of the system, etc.
-  - `Important`: An important indicator of the module, which is used by **operation and maintenance and testers**, and is directly related to **the running status of each module**, such as the number of merged files, execution status, etc.
-  - `Normal`: General indicators of the module, used by **developers** to facilitate **locating the module** when problems occur, such as specific key operation situations in the merger.
-  - `All`: All indicators of the module, used by **module developers**, often used when the problem is reproduced, so as to solve the problem quickly.
-
-## 1.4. How to get these metrics?
+## 5.2. Prometheus
 
-The relevant configuration of the metric module is in `conf/iotdb-{datanode/confignode}.properties`, and all configuration items support hot loading through the `load configuration` command.
+### 5.2.1. The mapping from metric type to prometheus forma
+> For metrics whose Metric Name is name and Tags are K1=V1, ..., Kn=Vn, the mapping is as follows, where value is a specific value
 
-### 1.4.1. Config File
+| Metric Type      | Mapping                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Counter          | name_total{k1="V1", ..., Kn="Vn"} value                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| AutoGauge、Gauge | name{k1="V1", ..., Kn="Vn"} value                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| Histogram        | name_max{k1="V1", ..., Kn="Vn"} value <br> name_sum{k1="V1", ..., Kn="Vn"} value <br> name_count{k1="V1", ..., Kn="Vn"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.0"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.25"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.5"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.75"} value <br> name{k1="V1", ..., Kn="Vn", quantile="1.0"} value                                                                 |
+| Rate             | name_total{k1="V1", ..., Kn="Vn"} value <br> name_total{k1="V1", ..., Kn="Vn", rate="m1"} value <br> name_total{k1="V1", ..., Kn="Vn", rate="m5"} value  <br> name_total{k1="V1", ..., Kn="Vn", rate="m15"} value <br> name_total{k1="V1", ..., Kn="Vn", rate="mean"} value                                                                                                                                                                                                        |
+| Timer            | name_seconds_max{k1="V1", ..., Kn="Vn"} value <br> name_seconds_sum{k1="V1", ..., Kn="Vn"} value <br> name_seconds_count{k1="V1", ..., Kn="Vn"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.0"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.25"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.5"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.75"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="1.0"} value |
 
-Take DataNode as example:
+### 5.2.2. Config File
+1) Taking DataNode as an example, modify the iotdb-datanode.properties configuration file as follows:
 
 ```properties
-# Whether enable metric module
-# Datatype: boolean
-dn_enable_metric=true
-
-# The reporters of metric module to report metrics
-# If there are more than one reporter, please separate them by commas ",".
-# Options: [JMX, PROMETHEUS, IOTDB]
-# Datatype: String
-dn_metric_reporter_list=JMX,PROMETHEUS
-
-# The level of metric module
-# Options: [Core, Important, Normal, All]
-# Datatype: String
+dn_metric_reporter_list=PROMETHEUS
 dn_metric_level=CORE
-
-# The port of prometheus reporter of metric module
-# Datatype: int
 dn_metric_prometheus_reporter_port=9091
 ```
 
 Then you can get metrics data as follows
 
-1. Modify parameters above in config file
-2. Start/Restart your IoTDB
-3. Open your browser or use the ```curl``` command to request ```http://servier_ip:9091/metrics```,then you will get metrics data like follows:
+2) Start IoTDB DataNodes
+3) Open a browser or use ```curl``` to visit ```http://servier_ip:9091/metrics```, you can get the following metric data:
+
 
 ```
 ...
@@ -292,37 +328,12 @@ Then you can get metrics data as follows
 file_count{name="wal",} 0.0
 file_count{name="unseq",} 0.0
 file_count{name="seq",} 2.0
-# HELP file_size
-# TYPE file_size gauge
-file_size{name="wal",} 0.0
-file_size{name="unseq",} 0.0
-file_size{name="seq",} 560.0
-# HELP queue
-# TYPE queue gauge
-queue{name="flush",status="waiting",} 0.0
-queue{name="flush",status="running",} 0.0
-# HELP quantity
-# TYPE quantity gauge
-quantity{name="timeSeries",} 1.0
-quantity{name="storageGroup",} 1.0
-quantity{name="device",} 1.0
-# HELP logback_events_total Number of error level events that made it to the logs
-# TYPE logback_events_total counter
-logback_events_total{level="warn",} 0.0
-logback_events_total{level="debug",} 2760.0
-logback_events_total{level="error",} 0.0
-logback_events_total{level="trace",} 0.0
-logback_events_total{level="info",} 71.0
-# HELP mem
-# TYPE mem gauge
-mem{name="storageGroup",} 0.0
-mem{name="mtree",} 1328.0
 ...
 ```
 
-### 1.4.2. Integrating with Prometheus and Grafana
+### 5.2.3. Prometheus + Grafana
 
-As above descriptions,IoTDB provides metrics data in standard Prometheus format,so we can integrate with Prometheus and Grafana directly. 
+As shown above, IoTDB exposes monitoring metrics data in the standard Prometheus format to the outside world. Prometheus can be used to collect and store monitoring indicators, and Grafana can be used to visualize monitoring indicators.
 
 The following picture describes the relationships among IoTDB, Prometheus and Grafana
 
@@ -361,19 +372,19 @@ The following documents may help you have a good journey with Prometheus and Gra
 
 [Grafana query metrics from Prometheus](https://prometheus.io/docs/visualization/grafana/#grafana-support-for-prometheus)
 
-### 1.4.3. Apache IoTDB Dashboard
+### 5.2.4. Apache IoTDB Dashboard
 We provide the Apache IoTDB Dashboard, and the rendering shown in Grafana is as follows:
 
 ![Apache IoTDB Dashboard](https://github.com/apache/iotdb-bin-resources/blob/main/docs/UserGuide/System%20Tools/Metrics/dashboard.png)
 
-#### 1.4.3.1. How to get Apache IoTDB Dashboard
+#### 5.2.4.1. How to get Apache IoTDB Dashboard
 
 1. You can obtain the json files of Dashboards corresponding to different iotdb versions in the grafana-metrics-example folder.
 2. You can visit [Grafana Dashboard official website](https://grafana.com/grafana/dashboards/), search for `Apache IoTDB Dashboard` and use
 
 When creating Grafana, you can select the json file you just downloaded to `Import` and select the corresponding target data source for Apache IoTDB Dashboard.
 
-#### 1.4.3.2. Apache IoTDB StandaAlone Dashboard Instructions
+#### 5.2.4.2. Apache IoTDB StandaAlone Dashboard Instructions
 > Except for the metrics specified specially, the following metrics are guaranteed to be available in the monitoring framework at the Important levels.
 
 1. `Overview`:
@@ -396,7 +407,7 @@ When creating Grafana, you can select the json file you just downloaded to `Impo
    6. `Off-heap Memory`: The off-heap memory of IoTDB
    7. `The number of Java Thread`: The number of threads in different states of IoTDB
 
-#### 1.4.3.3. Apache IoTDB ConfigNode Dashboard Instructions
+#### 5.2.4.3. Apache IoTDB ConfigNode Dashboard Instructions
 > Except for the metrics specified specially, the following metrics are guaranteed to be available in the monitoring framework at the Important levels.
 
 1. `Overview`:
@@ -427,7 +438,7 @@ When creating Grafana, you can select the json file you just downloaded to `Impo
    7. `CPU Load`:The load of cpu
    8. `Memory`:The size of system memory and used system memory
 
-#### 1.4.3.4. Apache IoTDB DataNode Dashboard Instructions
+#### 5.2.4.4. Apache IoTDB DataNode Dashboard Instructions
 > Except for the metrics specified specially, the following metrics are guaranteed to be available in the monitoring framework at the Important levels.
 
 1. `Overview`:
@@ -469,41 +480,3 @@ When creating Grafana, you can select the json file you just downloaded to `Impo
    4. `Off-heap Memory`: The off-heap memory of IoTDB
    5. `The number of Java Thread`: The number of threads in different states of IoTDB
 
-# 2. System Status Monitoring
-
-After starting JConsole tool and connecting to IoTDB server, a basic look at IoTDB system status(CPU Occupation, in-memory information, etc.) is provided. See [official documentation](https://docs.oracle.com/javase/7/docs/technotes/guides/management/jconsole.html) for more information.
-
-# 3. JMX MBean Monitoring
-By using JConsole tool and connecting with JMX you are provided with some system statistics and parameters.
-
-This section describes how to use the JConsole ```Mbean```tab of jconsole to monitor some system configurations of IoTDB, the statistics of writing, and so on. After connecting to JMX, you can find the "MBean" of "org.apache.iotdb.service", as shown in the figure below.
-
-<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/46039728/149951720-707f1ee8-32ee-4fde-9252-048caebd232e.png"> <br>
-
-# 4. Performance Monitor
-
-## 4.1. Introduction
-
-The performance monitoring module is used to monitor the time-consuming of each operation of IOTDB, so that users can better understand the overall performance of the database. The module counts the lower quartile, median, upper quartile, and maximum value for each operation. Current operations include `EXECUTE_BATCH`, `EXECUTE_ONE_SQL_IN_BATCH`, and `EXECUTE_QUERY`.
-
-## 4.2. Configuration parameter
-
-- location
-  - datanode:conf/iotdb-datanode.properties
-  - confignode:conf/iotdb-confignode.properties
-
-<center>
-
-**Table -parameter and description**
-
-| Parameter                 | Default Value | Description                               |
-| :------------------------ | :------------ | :---------------------------------------- |
-| enable\_performance\_stat | false         | Is stat performance of sub-module enable. |
-</center>
-
-# 5. Cache Hit Ratio Statistics
-To improve query performance, IOTDB caches ChunkMetaData and TsFileMetaData. Users can view the cache hit ratio through debug level log and MXBean, and adjust the memory occupied by the cache according to the cache hit ratio and system memory. The method of using MXBean to view cache hit ratio is as follows:
-1. Connect to jconsole with port 31999 and select 'MBean' in the menu item above.
-2. Expand the sidebar and select 'org.apache.iotdb.db.service'. You will get the results shown in the following figure:
-
-<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/19167280/112426760-73e3da80-8d73-11eb-9a8f-9232d1f2033b.png">
diff --git a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
index 83b5f229db..2daec75224 100644
--- a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
+++ b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
@@ -19,13 +19,9 @@
 
 -->
 
-当前用户可以使用多种手段对正在运行的IoTDB进程进行系统监控,包括使用 Java 的 Jconsole 工具对正在运行的 IoTDB 进程进行系统状态监控,使用 IoTDB 为用户开发的接口查看数据统计量,使用监控框架进行 IoTDB 的运行状态监控
+在 IoTDB 的运行过程中,我们希望对 IoTDB 的状态进行观测,以便于排查系统问题或者及时发现系统潜在的风险,能够**反映系统运行状态的一系列指标**就是系统监控指标。
 
-# 1. 监控框架
-
-在IoTDB运行过程中,我们希望对IoTDB的状态进行观测,以便于排查系统问题或者及时发现系统潜在的风险。能**反映系统运行状态的一系列指标**就是系统监控指标。
-
-## 1.1. 什么场景下会使用到监控框架?
+# 1. 什么场景下会使用到监控?
 
 那么什么时候会用到监控框架呢?下面列举一些常见的场景。
 
@@ -48,239 +44,277 @@
 
    此时我们可能需要通过错误日志的数量、集群节点的状态等指标来判断系统是否在正常运行。
 
-## 1.2. 什么人需要使用监控框架?
+# 2. 什么人需要使用监控?
 
 所有关注系统状态的人员都可以使用,包括但不限于研发、测试、运维、DBA等等
 
-## 1.3. IoTDB都有哪些监控指标?
-
-目前,IoTDB对外提供一些主要模块的监控指标,并且随着新功能的开发以及系统优化或者重构,监控指标也会同步添加和更新。
-
-### 1.3.1. 名词解释
-
-在进一步了解这些指标之前,我们先来看几个名词解释:
-
-- Metric Name
-
-  指标名称,比如logback_events_total表示日志事件发生的总次数。
-
-- Tag
-
-  每个指标下面可以有0到多个分类,比如logback_events_total下有一个```level```的分类,用来表示特定级别下的日志数量。
-
-### 1.3.2. 数据格式
-
-IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通过```org.apache.iotdb.metrics```获取系统监控指标指标。
+# 3. 什么是监控指标?
 
-接下来我们以Prometheus格式为例对目前已有监控项进行说明。
+## 3.1. 监控指标名词解释
 
-### 1.3.3. IoTDB 默认指标
+在 IoTDB 的监控模块,每个监控指标被 `Metric Name` 和 `Tags` 唯一标识。
 
-#### 1.3.3.1. Interface
-
-| Metric                | Tag                      | level     | 说明                | 示例                                         |
-| --------------------- | ------------------------ |-----------| ------------------- | -------------------------------------------- |
-| entry_seconds_count   | name="{{interface}}"     | important | 接口累计访问次数    | entry_seconds_count{name="openSession",} 1.0 |
-| entry_seconds_sum     | name="{{interface}}"     | important | 接口累计耗时(s)     | entry_seconds_sum{name="openSession",} 0.024 |
-| entry_seconds_max     | name="{{interface}}"     | important | 接口最大耗时(s)     | entry_seconds_max{name="openSession",} 0.024 |
-| quantity_total        | name="pointsIn"          | important | 系统累计写入点数    | quantity_total{name="pointsIn",} 1.0         |
-| thrift_connections    | name="{{thriftService}}" | important | thrift当前连接数    | thrift_connections{name="RPC",} 1.0          |
-| thrift_active_threads | name="{{thriftThread}}"  | important | thrift worker线程数 | thrift_active_threads{name="RPC",} 1.0       |
-
-#### 1.3.3.2. Task
-
-| Metric                      | Tag                                                                          | level     | 说明                            | 示例                                                                                               |
-| --------------------------- | ---------------------------------------------------------------------------- | --------- | ------------------------------- | -------------------------------------------------------------------------------------------------- |
-| queue                       | name="compaction_inner/compaction_cross/flush",<br/>status="running/waiting" | important | 当前时间任务数                  | queue{name="flush",status="waiting",} 0.0<br/>queue{name="compaction/flush",status="running",} 0.0 |
-| cost_task_seconds_count     | name="inner_compaction/cross_compaction/flush"                               | important | 任务累计发生次数                | cost_task_seconds_count{name="flush",} 1.0                                                         |
-| cost_task_seconds_max       | name="inner_compaction/cross_compaction/flush"                               | important | 到目前为止任务耗时(s)最大的一次 | cost_task_seconds_max{name="flush",} 0.363                                                         |
-| cost_task_seconds_sum       | name="inner_compaction/cross_compaction/flush"                               | important | 任务累计耗时(s)                 | cost_task_seconds_sum{name="flush",} 0.363                                                         |
-| data_written_total          | name="compaction", <br/>type="aligned/not-aligned/total"                     | important | 合并文件时写入量                | data_written_total{name="compaction",type="total",} 10240                                          |
-| data_read_total             | name="compaction"                                                            | important | 合并文件时的读取量              | data_read_total{name="compaction",} 10240                                                          |
-| compaction_task_count_total | name = "inner_compaction/cross_compaction", type="sequence/unsequence/cross" | important | 合并任务个数                    | compaction_task_count_total{name="inner_compaction",type="sequence",} 1                            |
-
-#### 1.3.3.3. 内存占用
-
-| Metric | Tag                                                          | level     | 说明                       | 示例                              |
-| ------ | ------------------------------------------------------------ | --------- | -------------------------- | --------------------------------- |
-| mem    | name="chunkMetaData/storageGroup/mtree/MultiLeaderConsensus" | important | 对应部分占用的内存(byte) | mem{name="chunkMetaData",} 2050.0 |
-
-#### 1.3.3.4. 缓存
-
-| Metric      | Tag                                                               | level     | 说明                                                         | 示例                                                |
-| ----------- | ----------------------------------------------------------------- | --------- | ------------------------------------------------------------ | --------------------------------------------------- |
-| cache_hit   | name="chunk/timeSeriesMeta/bloomFilter/SchemaCache"               | important | chunk/timeSeriesMeta/SchemaCache缓存命中率,bloomFilter拦截率 | cache_hit{name="chunk",} 80                         |
-| cache_total | name="StorageGroup/SchemaPartition/DataPartition", type="hit/all" | important | StorageGroup/SchemaPartition/DataPartition 的命中/总次数     | cache_total{name="DataPartition",type="all",} 801.0 |
+- `Metric Name`:**指标类型名称**,比如logback_events表示日志事件。
+- `Tags`:**指标分类**,形式为Key-Value对,每个指标下面可以有0到多个分类,常见的Key-Value对:
+  - `name = xxx`:被监控项的名称,比如对`entry_seconds_count`这个监控项,name 的含义是被监控的接口名称。
+  - `status = xxx`:被监控项的状态细分,比如监控 Task 的监控项可以通过该参数,将运行的 Task 和停止的 Task 分开。
+  - `user = xxx`:被监控项和某个特定用户相关,比如统计root用户的写入总次数。
+  - 根据具体情况自定义:比如logback_events_total下有一个```level```的分类,用来表示特定级别下的日志数量
+- `Metric Level`:**指标管理级别**,默认启动级别为`Core`级别,建议启动级别为`Important级别`,审核严格程度`Core > Important > Normal > All`
+    - `Core`:系统的核心指标,供**系统内核和运维人员**使用,关乎系统的**性能、稳定性、安全性**,比如实例的状况,系统的负载等。
+    - `Important`:模块的重要指标,供**运维和测试人员**使用,直接关乎**每个模块的运行状态**,比如合并文件个数、执行情况等。
+    - `Normal`:模块的一般指标,供**开发人员**使用,方便在出现问题时**定位模块**,比如合并中的特定关键操作情况。
+    - `All`:模块的全部指标,供**模块开发人员**使用,往往在复现问题的时候使用,从而快速解决问题。
 
-#### 1.3.3.5. 业务数据
+## 3.2. 监控指标对外获取数据格式
+- IoTDB 对外提供 JMX、 Prometheus 和 IoTDB 格式的监控指标:
+  - 对于 JMX ,可以通过```org.apache.iotdb.metrics```获取系统监控指标指标。
+  - 对于 Prometheus ,可以通过对外暴露的端口获取监控指标的值
+  - 对于 IoTDB 方式对外暴露:可以通过执行 IoTDB 的查询来获取监控指标
+
+# 4. 监控指标有哪些?
+
+目前,IoTDB 对外提供一些主要模块的监控指标,并且随着新功能的开发以及系统优化或者重构,监控指标也会同步添加和更新。如果想自己在 IoTDB 中添加更多系统监控指标埋点,可以参考[IoTDB Metrics Framework](https://github.com/apache/iotdb/tree/master/metrics)使用说明。
+
+## 4.1. Core 级别监控指标
+
+Core 级别的监控指标在系统运行中默认开启,每一个 Core 级别的监控指标的添加都需要经过谨慎的评估,目前 Core 级别的监控指标如下所述:
+
+### 4.1.1. 集群运行状态
+
+| Metric      | Tags                                            | Type      | Description                            |
+| ----------- | ----------------------------------------------- | --------- | -------------------------------------- |
+| config_node | name="total",status="Registered/Online/Unknown" | AutoGauge | 已注册/在线/离线 confignode 的节点数量 |
+| data_node   | name="total",status="Registered/Online/Unknown" | AutoGauge | 已注册/在线/离线 datanode 的节点数量   |
+
+### 4.1.2. IoTDB 进程运行状态
+| Metric            | Tags          | Type      | Description                         |
+| ----------------- | ------------- | --------- | ----------------------------------- |
+| process_cpu_load  | name="cpu"    | AutoGauge | IoTDB 进程的 CPU 占用率,单位为%    |
+| process_cpu_time  | name="cpu"    | AutoGauge | IoTDB 进程占用的 CPU 时间,单位为ns |
+| process_max_mem   | name="memory" | AutoGauge | IoTDB 进程最大可用内存              |
+| process_total_mem | name="memory" | AutoGauge | IoTDB 进程当前已申请内存            |
+| process_free_mem  | name="memory" | AutoGauge | IoTDB 进程当前剩余可用内存          |
+
+### 4.1.3. 系统运行状态
+| Metric                         | Tags          | Type      | Description                              |
+| ------------------------------ | ------------- | --------- | ---------------------------------------- |
+| sys_cpu_load                   | name="cpu"    | AutoGauge | 系统的 CPU 占用率,单位为%               |
+| sys_cpu_cores                  | name="cpu"    | Gauge     | 系统的可用处理器数                       |
+| sys_total_physical_memory_size | name="memory" | Gauge     | 系统的最大物理内存                       |
+| sys_free_physical_memory_size  | name="memory" | AutoGauge | 系统的剩余可用内存                       |
+| sys_total_swap_space_size      | name="memory" | AutoGauge | 系统的交换区最大空间                     |
+| sys_free_swap_space_size       | name="memory" | AutoGauge | 系统的交换区剩余可用空间                 |
+| sys_committed_vm_size          | name="memory" | AutoGauge | 系统保证可用于正在运行的进程的虚拟内存量 |
+| sys_disk_total_space           | name="disk"   | AutoGauge | 系统磁盘总大小                           |
+| sys_disk_free_space            | name="disk"   | AutoGauge | 系统磁盘可用大小                         |
+
+## 4.2. Important 级别监控指标
+
+目前 Important 级别的监控指标如下所述:
+
+### 4.2.1. 集群运行状态
+| Metric                    | Tags                                              | Type  | Description                    |
+| ------------------------- | ------------------------------------------------- | ----- | ------------------------------ |
+| cluster_node_leader_count | name="{{ip}}:{{port}}"                            | Gauge | 节点上共识组Leader的数量       |
+| cluster_node_status       | name="{{ip}}:{{port}}",type="ConfigNode/DataNode" | Gauge | 节点的状态,0=Unkonwn 1=online |
+
+### 4.2.2. 节点统计
+| Metric   | Tags                                       | Type      | Description                        |
+| -------- | ------------------------------------------ | --------- | ---------------------------------- |
+| quantity | name="database"                            | AutoGauge | 系统数据库数量                     |
+| quantity | name="timeSeries"                          | AutoGauge | 系统时间序列数量                   |
+| quantity | name="pointsIn"                            | Counter   | 系统累计写入点数                   |
+| region   | name="total",type="SchemaRegion"           | AutoGauge | 分区表中 SchemaRegion 总数量         |
+| region   | name="total",type="DataRegion"             | AutoGauge | 分区表中 DataRegion 总数量           |
+| region   | name="{{ip}}:{{port}}",type="SchemaRegion" | Gauge     | 分区表中对应节点上 DataRegion 总数量 |
+| region   | name="{{ip}}:{{port}}",type="DataRegion"   | Gauge     | 分区表中对应节点上 DataRegion 总数量 |
+
+### 4.2.3. 弱一致性共识协议统计
+| Metric       | Tags                                                                                         | Type      | Description                      |
+| ------------ | -------------------------------------------------------------------------------------------- | --------- | -------------------------------- |
+| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex"           | AutoGauge | 副本组同步线程的当前同步进度     |
+| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | AutoGauge | 副本组同步线程缓存队列请求总大小 |
+| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex"                        | AutoGauge | 副本组主流程写入进度             |
+| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="safeIndex"                          | AutoGauge | 副本组同步进度                   |
+| stage        | name="multi_leader", region="{{region}}", type="getStateMachineLock"                         | Histogram | 主流程获取状态机锁耗时           |
+| stage        | name="multi_leader", region="{{region}}", type="checkingBeforeWrite"                         | Histogram | 主流程写入状态机检查耗时         |
+| stage        | name="multi_leader", region="{{region}}", type="writeStateMachine"                           | Histogram | 主流程写入状态机耗时             |
+| stage        | name="multi_leader", region="{{region}}", type="offerRequestToQueue"                         | Histogram | 主流程尝试添加队列耗时           |
+| stage        | name="multi_leader", region="{{region}}", type="consensusWrite"                              | Histogram | 主流程全写入耗时                 |
+| stage        | name="multi_leader", region="{{region}}", type="constructBatch"                              | Histogram | 同步线程构造 Batch 耗时          |
+| stage        | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest"                       | Histogram | 异步回调流程同步日志耗时         |
+
+### 4.2.4. 缓存统计
+| Metric    | Tags                               | Type      | Description                                             |
+| --------- | ---------------------------------- | --------- | ------------------------------------------------------- |
+| cache_hit | name="chunk"                       | AutoGauge | ChunkCache的命中率,单位为%                             |
+| cache_hit | name="schema"                      | AutoGauge | SchemaCache的命中率,单位为%                            |
+| cache_hit | name="timeSeriesMeta"              | AutoGauge | TimeseriesMetadataCache的命中率,单位为%                |
+| cache_hit | name="bloomFilter"                 | AutoGauge | TimeseriesMetadataCache中的bloomFilter的拦截率,单位为% |
+| cache     | name="StorageGroup", type="hit"    | Counter   | StorageGroup Cache 的命中次数                           |
+| cache     | name="StorageGroup", type="all"    | Counter   | StorageGroup Cache 的访问次数                           |
+| cache     | name="SchemaPartition", type="hit" | Counter   | SchemaPartition Cache 的命中次数                        |
+| cache     | name="SchemaPartition", type="all" | Counter   | SchemaPartition Cache 的访问次数                        |
+| cache     | name="DataPartition", type="hit"   | Counter   | DataPartition Cache 的命中次数                          |
+| cache     | name="DataPartition", type="all"   | Counter   | DataPartition Cache 的访问次数                          |
+
+### 4.2.5. 接口层统计
+| Metric                | Tags                               | Type      | Description                         |
+| --------------------- | ---------------------------------- | --------- | ----------------------------------- |
+| operation             | name = "{{name}}"                  | Histogram | 客户端执行的操作的耗时情况          |
+| entry                 | name="{{interface}}"               | Timer     | Client 建立的 Thrift 的耗时情况     |
+| thrift_connections    | name="ConfigNodeRPC"               | AutoGauge | ConfigNode 的内部 Thrift 连接数     |
+| thrift_connections    | name="Internal"                    | AutoGauge | DataNode 的内部 Thrift 连接数       |
+| thrift_connections    | name="MPPDataExchange"             | AutoGauge | MPP 框架的内部 Thrift 连接数        |
+| thrift_connections    | name="RPC"                         | AutoGauge | Client 建立的 Thrift 连接数         |
+| thrift_active_threads | name="ConfigNodeRPC-Service"       | AutoGauge | ConfigNode 的内部活跃 Thrift 连接数 |
+| thrift_active_threads | name="DataNodeInternalRPC-Service" | AutoGauge | DataNode 的内部活跃 Thrift 连接数   |
+| thrift_active_threads | name="MPPDataExchangeRPC-Service"  | AutoGauge | MPP 框架的内部活跃 Thrift 连接数    |
+| thrift_active_threads | name="ClientRPC-Service"           | AutoGauge | Client 建立的活跃 Thrift 连接数     |
+
+### 4.2.6. 内存统计
+| Metric | Tags                          | Type      | Description                                       |
+| ------ | ----------------------------- | --------- | ------------------------------------------------- |
+| mem    | name="database_{{name}}"      | AutoGauge | DataNode内对应DataRegion的内存占用,单位为byte    |
+| mem    | name="chunkMetaData_{{name}}" | AutoGauge | 写入TsFile时的ChunkMetaData的内存占用,单位为byte |
+| mem    | name="MultiLeaderConsensus"   | AutoGauge | 弱一致性共识协议的内存占用,单位为byte            |
+
+### 4.2.7. 任务统计
+| Metric    | Tags                                              | Type      | Description        |
+| --------- | ------------------------------------------------- | --------- | ------------------ |
+| queue     | name="compaction_inner", status="running/waiting" | Gauge     | 空间内合并任务数   |
+| queue     | name="compaction_cross", status="running/waiting" | Gauge     | 跨空间合并任务数   |
+| cost_task | name="inner_compaction/cross_compaction/flush"    | Gauge     | 任务耗时情况       |
+| queue     | name="flush",status="running/waiting"             | AutoGauge | 刷盘任务数         |
+| queue     | name="Sub_RawQuery",status="running/waiting"      | AutoGauge | Sub_RawQuery任务数 |
+
+
+### 4.2.8. 合并统计
+| Metric                | Tags                                                | Type    | Description        |
+| --------------------- | --------------------------------------------------- | ------- | ------------------ |
+| data_written          | name="compaction", type="aligned/not-aligned/total" | Counter | 合并时写入量       |
+| data_read             | name="compaction"                                   | Counter | 合并时的读取量     |
+| compaction_task_count | name = "inner_compaction", type="sequence"          | Counter | 顺序空间内合并次数 |
+| compaction_task_count | name = "inner_compaction", type="unsequence"        | Counter | 乱序空间内合并次数 |
+| compaction_task_count | name = "cross_compaction", type="cross"             | Counter | 跨空间合并次数     |
+
+### 4.2.9. 文件统计信息
+| Metric     | Tags         | Type      | Description                  |
+| ---------- | ------------ | --------- | ---------------------------- |
+| file_size  | name="wal"   | AutoGauge | 写前日志总大小,单位为byte   |
+| file_size  | name="seq"   | AutoGauge | 顺序TsFile总大小,单位为byte |
+| file_size  | name="unseq" | AutoGauge | 乱序TsFile总大小,单位为byte |
+| file_count | name="wal"   | AutoGauge | 写前日志文件个数             |
+| file_count | name="seq"   | AutoGauge | 顺序TsFile文件个数           |
+| file_count | name="unseq" | AutoGauge | 乱序TsFile文件个数           |
+
+### 4.2.10. IoTDB 进程统计
+| Metric                | Tags           | Type      | Description                          |
+| --------------------- | -------------- | --------- | ------------------------------------ |
+| process_used_mem      | name="memory"  | AutoGauge | IoTDB 进程当前使用内存               |
+| process_mem_ratio     | name="memory"  | AutoGauge | IoTDB 进程的内存占用比例             |
+| process_threads_count | name="process" | AutoGauge | IoTDB 进程当前线程数                 |
+| process_status        | name="process" | AutoGauge | IoTDB 进程存活状态,1为存活,0为终止 |
+
+### 4.2.11. IoTDB 日志统计
+| Metric         | Tags                                | Type    | Description        |
+| -------------- | ----------------------------------- | ------- | ------------------ |
+| logback_events | level="trace/debug/info/warn/error" | Counter | 不同类型的日志个数 |
+
+
+### 4.2.12. JVM 线程统计
+
+| Metric                     | Tags                                                          | Type      | Description              |
+| -------------------------- | ------------------------------------------------------------- | --------- | ------------------------ |
+| jvm_threads_live_threads   |                                                               | AutoGauge | 当前线程数               |
+| jvm_threads_daemon_threads |                                                               | AutoGauge | 当前 Daemon 线程数       |
+| jvm_threads_peak_threads   |                                                               | AutoGauge | 峰值线程数               |
+| jvm_threads_states_threads | state="runnable/blocked/waiting/timed-waiting/new/terminated" | AutoGauge | 当前处于各种状态的线程数 |
+
+### 4.2.13. JVM GC 统计
+| Metric                        | Tags                                                  | Type      | Description                            |
+| ----------------------------- | ----------------------------------------------------- | --------- | -------------------------------------- |
+| jvm_gc_pause                  | action="end of major GC/end of minor GC",cause="xxxx" | Timer     | 不同原因的Young GC/Full GC的次数与耗时 |
+|                               |
+| jvm_gc_concurrent_phase_time  | action="{{action}}",cause="{{cause}}"                 | Timer     | 不同原因的Young GC/Full GC的次数与耗时 |
+|                               |
+| jvm_gc_max_data_size_bytes    |                                                       | AutoGauge | 老年代内存的历史最大值                 |
+| jvm_gc_live_data_size_bytes   |                                                       | AutoGauge | 老年代内存的使用值                     |
+| jvm_gc_memory_promoted_bytes  |                                                       | Counter   | 老年代内存正向增长累计值               |
+| jvm_gc_memory_allocated_bytes |                                                       | Counter   | GC分配内存正向增长累计值               |
+
+### 4.2.14. JVM 内存统计
+| Metric                          | Tags                            | Type      | Description          |
+| ------------------------------- | ------------------------------- | --------- | -------------------- |
+| jvm_buffer_memory_used_bytes    | id="direct/mapped"              | AutoGauge | 已经使用的缓冲区大小 |
+| jvm_buffer_total_capacity_bytes | id="direct/mapped"              | AutoGauge | 最大缓冲区大小       |
+| jvm_buffer_count_buffers        | id="direct/mapped"              | AutoGauge | 当前缓冲区数量       |
+| jvm_memory_committed_bytes      | {area="heap/nonheap",id="xxx",} | AutoGauge | 当前申请的内存大小   |
+| jvm_memory_max_bytes            | {area="heap/nonheap",id="xxx",} | AutoGauge | 最大内存             |
+| jvm_memory_used_bytes           | {area="heap/nonheap",id="xxx",} | AutoGauge | 已使用内存大小       |
+
+### 4.2.15. JVM 类加载统计
+| Metric                       | Tags | Type      | Description         |
+| ---------------------------- | ---- | --------- | ------------------- |
+| jvm_classes_unloaded_classes |      | AutoGauge | 累计卸载的class数量 |
+| jvm_classes_loaded_classes   |      | AutoGauge | 累计加载的class数量 |
+
+###  4.2.16. JVM 编译时间统计
+| Metric                  | Tags                                          | Type      | Description        |
+| ----------------------- | --------------------------------------------- | --------- | ------------------ |
+| jvm_compilation_time_ms | {compiler="HotSpot 64-Bit Tiered Compilers",} | AutoGauge | 耗费在编译上的时间 |
+
+
+## 4.3. All 级别监控指标
+目前还没有All级别的监控指标,后续会持续添加。
+
+# 5. 怎样获取这些系统监控?
+
+- 监控模块的相关配置均在`conf/iotdb-{datanode/confignode}.properties`中,所有配置项支持通过`load configuration`命令热加载。
+
+## 5.1. 使用 JMX 方式
+对于使用 JMX 对外暴露的指标,可以通过 Jconsole 来进行查看。在进入 Jconsole 监控页面后,首先会看到 IoTDB 的各类运行情况的概览。在这里,您可以看到堆内存信息、线程信息、类信息以及服务器的 CPU 使用情况。
+
+### 5.1.1. 获取监控指标数据
+连接到 JMX 后,您可以通过 "MBeans" 标签找到名为 "org.apache.iotdb.metrics" 的 "MBean",可以在侧边栏中查看所有监控指标的具体值。
+
+<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" alt="metric-jmx" src="https://user-images.githubusercontent.com/46039728/204018765-6fda9391-ebcf-4c80-98c5-26f34bd74df0.png">
+
+### 5.1.2. 获取其他相关数据
+连接到 JMX 后,您可以通过 "MBeans" 标签找到名为 "org.apache.iotdb.service" 的 "MBean",如下图所示,了解服务的基本状态
 
-| Metric   | Tag                                   | level     | 说明                                         | 示例                             |
-| -------- | ------------------------------------- | --------- | -------------------------------------------- | -------------------------------- |
-| quantity | name="timeSeries/storageGroup/device" | important | 当前时间timeSeries/storageGroup/device的数量 | quantity{name="timeSeries",} 1.0 |
+<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/46039728/149951720-707f1ee8-32ee-4fde-9252-048caebd232e.png"> <br>
 
-#### 1.3.3.6. 集群
+为了提高查询性能,IOTDB 对 ChunkMetaData 和 TsFileMetaData 进行了缓存。用户可以使用 MXBean ,展开侧边栏`org.apache.iotdb.db.service`查看缓存命中率:
 
-##### 1.3.3.6.1. 集群状态
+<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/19167280/112426760-73e3da80-8d73-11eb-9a8f-9232d1f2033b.png">
 
-| Metric                    | Tag                                                                | level     | 说明                                                          | 示例                                                                         |
-| ------------------------- | ------------------------------------------------------------------ | --------- | ------------------------------------------------------------- | ---------------------------------------------------------------------------- |
-| cluster_node_leader_count | name="{{ip}}:{{port}}"                                             | important | 节点上```dataGroupLeader```的数量,用来观察leader是否分布均匀 | cluster_node_leader_count{name="127.0.0.1",} 2.0                             |
-| cluster_uncommitted_log   | name="{{ip_datagroupHeader}}"                                      | important | 节点```uncommitted_log```的数量                               | cluster_uncommitted_log{name="127.0.0.1_Data-127.0.0.1-40010-raftId-0",} 0.0 |
-| cluster_node_status       | name="{{ip}}:{{port}}",type="ConfigNode/DataNode"                  | important | 节点状态,0=Unkonwn 1=online                                  | cluster_node_status{name="0.0.0.0:22277",type="ConfigNode",} 1.0             |
-| cluster_elect_total       | name="{{ip}}",status="fail/win"                                    | important | 节点参与选举的次数及结果                                      | cluster_elect_total{name="127.0.0.1",status="win",} 1.0                      |
-| config_node               | name="total",status="Registered/Online/Unknown"                    | core      | 已注册/在线/离线 confignode 的节点数量                        | config_node{name="total",status="Online",} 2.0                               |
-| data_node                 | name="total",status="Registered/Online/Unknown"                    | core      | 已注册/在线/离线 datanode 的节点数量                          | data_node{name="total",status="Registered",} 3.0                             |
-| partition_table           | name="number"                                                      | core      | partition table表的个数                                       | partition_table{name="number",} 2.0                                          |
-| region                    | name="total/{{ip}}:{{port}}",type="SchemaRegion/DataRegion"        | important | 全部或某个节点的schemaRegion/dataRegion个数                   | region{name="127.0.0.1:6671",type="DataRegion",} 10.0                        |
-| region                    | name="{{storageGroupName}}",type="SchemaRegion/DataRegion"         | normal    | database 的 DataRegion/Schema个数                                 | region{name="root.schema.sg1",type="DataRegion",} 14.0                       |
-| slot                      | name="{{storageGroupName}}",type="schemaSlotNumber/dataSlotNumber" | normal    | database 的 schemaSlot/dataSlot个数                               | slot{name="root.schema.sg1",type="schemaSlotNumber",} 2.0                    |
-
-##### 1.3.3.6.2. 弱一致性
-| Metric       | Tag                                                                                          | level    | 说明                                                 | 示例                                                                                                             |
-| ------------ | -------------------------------------------------------------------------------------------- | -------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
-| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex"              | core     | 弱一致性对应region的写入index和同步index             | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0                     |
-| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex"           | important | 弱一致性对应region的同步线程当前的同步index          | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[7]",type="currentSyncIndex",} 1945.0        |
-| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | important | 弱一致性对应region的同步线程缓存的队列总大小         | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[9]",type="cachedRequestInMemoryQueue",} 0.0 |
-| stage        | name="multi_leader", region="{{region}}", type="getStateMachineLock"                         | important | 弱一致性对应region获取状态机锁的耗时                 | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0                 |
-| stage        | name="multi_leader", region="{{region}}", type="checkingBeforeWrite"                         | important | 弱一致性对应region状态机完成写前检查的耗时           | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0                 |
-| stage        | name="multi_leader", region="{{region}}", type="writeStateMachine"                           | important | 弱一致性对应region状态机写入请求的耗时               | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0                   |
-| stage        | name="multi_leader", region="{{region}}", type="offerRequestToQueue"                         | important | 弱一致性对应region状态机尝试将请求放入同步队列的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0                 |
-| stage        | name="multi_leader", region="{{region}}", type="consensusWrite"                              | important | 弱一致性对应region状态机处理共识层请求的耗时         | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625                   |
-| stage        | name="multi_leader", region="{{region}}", type="constructBatch"                              | important | 弱一致性对应同步线程完成一个请求构造的耗时           | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0                      |
-| stage        | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest"                       | important | 弱一致性对应同步线程完成一个请求同步的耗时           | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0               |
-
-
-### 1.3.4. IoTDB 预定义指标集
-
-#### 1.3.4.1. JVM
-
-##### 1.3.4.1.1. 线程
-
-| Metric                     | Tag                                                           | level     | 说明                     | 示例                                               |
-| -------------------------- | ------------------------------------------------------------- | --------- | ------------------------ | -------------------------------------------------- |
-| jvm_threads_live_threads   | 无                                                            | important | 当前线程数               | jvm_threads_live_threads 25.0                      |
-| jvm_threads_daemon_threads | 无                                                            | important | 当前daemon线程数         | jvm_threads_daemon_threads 12.0                    |
-| jvm_threads_peak_threads   | 无                                                            | important | 峰值线程数               | jvm_threads_peak_threads 28.0                      |
-| jvm_threads_states_threads | state="runnable/blocked/waiting/timed-waiting/new/terminated" | important | 当前处于各种状态的线程数 | jvm_threads_states_threads{state="runnable",} 10.0 |
-
-##### 1.3.4.1.2. 垃圾回收
-
-| Metric                              | Tag                                                    | level     | 说明                                         | 示例                                                                                    |
-| ----------------------------------- | ------------------------------------------------------ | --------- | -------------------------------------------- | --------------------------------------------------------------------------------------- |
-| jvm_gc_pause_seconds_count          | action="end of major GC/end of minor GC",cause="xxxx"  | important | YGC/FGC发生次数及其原因                      | jvm_gc_pause_seconds_count{action="end of major GC",cause="Metadata GC Threshold",} 1.0 |
-| jvm_gc_pause_seconds_sum            | action="end of major GC/end of minor GC",cause="xxxx"  | important | YGC/FGC累计耗时及其原因                      | jvm_gc_pause_seconds_sum{action="end of major GC",cause="Metadata GC Threshold",} 0.03  |
-| jvm_gc_pause_seconds_max            | action="end of major GC",cause="Metadata GC Threshold" | important | YGC/FGC最大耗时及其原因                      | jvm_gc_pause_seconds_max{action="end of major GC",cause="Metadata GC Threshold",} 0.0   |
-| jvm_gc_memory_promoted_bytes_total  | 无                                                     | important | 从GC之前到GC之后老年代内存池大小正增长的累计 | jvm_gc_memory_promoted_bytes_total 8425512.0                                            |
-| jvm_gc_max_data_size_bytes          | 无                                                     | important | 老年代内存的历史最大值                       | jvm_gc_max_data_size_bytes 2.863661056E9                                                |
-| jvm_gc_live_data_size_bytes         | 无                                                     | important | GC后老年代内存的大小                         | jvm_gc_live_data_size_bytes 8450088.0                                                   |
-| jvm_gc_memory_allocated_bytes_total | 无                                                     | important | 在一个GC之后到下一个GC之前年轻代增加的内存   | jvm_gc_memory_allocated_bytes_total 4.2979144E7                                         |
-
-##### 1.3.4.1.3. 内存
-
-| Metric                          | Tag                             | level     | 说明                    | 示例                                                                                                                                                          |
-| ------------------------------- | ------------------------------- | --------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| jvm_buffer_memory_used_bytes    | id="direct/mapped"              | important | 已经使用的缓冲区大小    | jvm_buffer_memory_used_bytes{id="direct",} 3.46728099E8                                                                                                       |
-| jvm_buffer_total_capacity_bytes | id="direct/mapped"              | important | 最大缓冲区大小          | jvm_buffer_total_capacity_bytes{id="mapped",} 0.0                                                                                                             |
-| jvm_buffer_count_buffers        | id="direct/mapped"              | important | 当前缓冲区数量          | jvm_buffer_count_buffers{id="direct",} 183.0                                                                                                                  |
-| jvm_memory_committed_bytes      | {area="heap/nonheap",id="xxx",} | important | 当前向JVM申请的内存大小 | jvm_memory_committed_bytes{area="heap",id="Par Survivor Space",} 2.44252672E8<br/>jvm_memory_committed_bytes{area="nonheap",id="Metaspace",} 3.9051264E7<br/> |
-| jvm_memory_max_bytes            | {area="heap/nonheap",id="xxx",} | important | JVM最大内存             | jvm_memory_max_bytes{area="heap",id="Par Survivor Space",} 2.44252672E8<br/>jvm_memory_max_bytes{area="nonheap",id="Compressed Class Space",} 1.073741824E9   |
-| jvm_memory_used_bytes           | {area="heap/nonheap",id="xxx",} | important | JVM已使用内存大小       | jvm_memory_used_bytes{area="heap",id="Par Eden Space",} 1.000128376E9<br/>jvm_memory_used_bytes{area="nonheap",id="Code Cache",} 2.9783808E7<br/>             |
-
-##### 1.3.4.1.4. Classes
-
-| Metric                       | Tag                                           | level     | 说明                   | 示例                                                                          |
-| ---------------------------- | --------------------------------------------- | --------- | ---------------------- | ----------------------------------------------------------------------------- |
-| jvm_classes_unloaded_classes | 无                                            | important | jvm累计卸载的class数量 | jvm_classes_unloaded_classes 680.0                                            |
-| jvm_classes_loaded_classes   | 无                                            | important | jvm累计加载的class数量 | jvm_classes_loaded_classes 5975.0                                             |
-| jvm_compilation_time_ms      | {compiler="HotSpot 64-Bit Tiered Compilers",} | important | jvm耗费在编译上的时间  | jvm_compilation_time_ms{compiler="HotSpot 64-Bit Tiered Compilers",} 107092.0 |
-
-#### 1.3.4.2. 文件(File)
-
-| Metric     | Tag                  | level     | 说明                                | 示例                        |
-| ---------- | -------------------- | --------- | ----------------------------------- | --------------------------- |
-| file_size  | name="wal/seq/unseq" | important | 当前时间wal/seq/unseq文件大小(byte) | file_size{name="wal",} 67.0 |
-| file_count | name="wal/seq/unseq" | important | 当前时间wal/seq/unseq文件个数       | file_count{name="seq",} 1.0 |
-
-#### 1.3.4.3. 日志(logback)
-
-| Metric               | Tag                                    | level     | 说明                                    | 示例                                    |
-| -------------------- | -------------------------------------- | --------- | --------------------------------------- | --------------------------------------- |
-| logback_events_total | {level="trace/debug/info/warn/error",} | important | trace/debug/info/warn/error日志累计数量 | logback_events_total{level="warn",} 0.0 |
-
-#### 1.3.4.4. 进程(Process)
-| Metric                | Tag            | level     | 说明                               | 示例                                            |
-| --------------------- | -------------- |-----------| ---------------------------------- | ----------------------------------------------- |
-| process_cpu_load      | name="cpu"     | core      | process当前CPU占用率(%)          | process_cpu_load{name="process",} 5.0           |
-| process_cpu_time      | name="cpu"     | core      | process累计占用CPU时间(ns)        | process_cpu_time{name="process",} 3.265625E9    |
-| process_max_mem       | name="memory"  | core      | JVM最大可用内存                    | process_max_mem{name="process",} 3.545759744E9  |
-| process_used_mem      | name="memory"  | important | JVM当前使用内存                    | process_used_mem{name="process",} 4.6065456E7   |
-| process_total_mem     | name="memory"  | core      | JVM当前已申请内存                  | process_total_mem{name="process",} 2.39599616E8 |
-| process_free_mem      | name="memory"  | core      | JVM当前剩余可用内存                | process_free_mem{name="process",} 1.94035584E8  |
-| process_mem_ratio     | name="memory"  | important | 进程的内存占用比例                 | process_mem_ratio{name="process",} 0.0          |
-| process_threads_count | name="process" | important | 当前线程数                         | process_threads_count{name="process",} 11.0     |
-| process_status        | name="process" | important | 进程存活状态,1.0为存活,0.0为终止 | process_status{name="process",} 1.0             |
-
-#### 1.3.4.5. 系统(System)
-| Metric                         | Tag           | level     | 说明                                       | 示例                                                           |
-| ------------------------------ | ------------- | --------- | ------------------------------------------ | -------------------------------------------------------------- |
-| sys_cpu_load                   | name="cpu"    | core      | system当前CPU占用率(%)                   | sys_cpu_load{name="system",} 15.0                              |
-| sys_cpu_cores                  | name="cpu"    | core      | jvm可用处理器数                            | sys_cpu_cores{name="system",} 16.0                             |
-| sys_total_physical_memory_size | name="memory" | core      | system最大物理内存                         | sys_total_physical_memory_size{name="system",} 1.5950999552E10 |
-| sys_free_physical_memory_size  | name="memory" | core      | system当前剩余可用内存                     | sys_free_physical_memory_size{name="system",} 4.532396032E9    |
-| sys_total_swap_space_size      | name="memory" | core      | system交换区最大空间                       | sys_total_swap_space_size{name="system",} 2.1051273216E10      |
-| sys_free_swap_space_size       | name="memory" | core      | system交换区剩余可用空间                   | sys_free_swap_space_size{name="system",} 2.931576832E9         |
-| sys_committed_vm_size          | name="memory" | important | system保证可用于正在运行的进程的虚拟内存量 | sys_committed_vm_size{name="system",} 5.04344576E8             |
-| sys_disk_total_space           | name="disk"   | core      | 磁盘总大小                                 | sys_disk_total_space{name="system",} 5.10770798592E11          |
-| sys_disk_free_space            | name="disk"   | core      | 磁盘可用大小                               | sys_disk_free_space{name="system",} 3.63467845632E11           |
-
-### 1.3.5. 自定义添加埋点
-
-- 如果想自己在IoTDB中添加更多系统监控指标埋点,可以参考[IoTDB Metrics Framework](https://github.com/apache/iotdb/tree/master/metrics)使用说明
-- Metric 埋点定义规则
-    - `Metric`:监控项的名称,比如`entry_seconds_count`为接口累计访问次数,file_size 为文件总数。
-    - `Tags`:Key-Value对,用来明确被监控项,可选项
-        - `name = xxx`:被监控项的名称,比如对`entry_seconds_count`这个监控项,name 的含义是被监控的接口名称。
-        - `status = xxx`:被监控项的状态细分,比如监控 Task 的监控项可以通过该参数,将运行的 Task 和停止的 Task 分开。
-        - `user = xxx`:被监控项和某个特定用户相关,比如统计root用户的写入总次数。
-        - 根据具体情况自定义......
-- 监控指标级别含义:
-    - 线上运行默认启动级别为`Important`级,线下调试默认启动级别为`Normal`级,审核严格程度`Core > Important > Normal > All`
-    - `Core`:系统的核心指标,供**运维人员**使用,关乎系统的**性能、稳定性、安全性**,比如实例的状况,系统的负载等。
-    - `Important`:模块的重要指标,供**运维和测试人员**使用,直接关乎**每个模块的运行状态**,比如合并文件个数、执行情况等。
-    - `Normal`:模块的一般指标,供**开发人员**使用,方便在出现问题时**定位模块**,比如合并中的特定关键操作情况。
-    - `All`:模块的全部指标,供**模块开发人员**使用,往往在复现问题的时候使用,从而快速解决问题。
+## 5.2. 使用 Prometheus 方式
 
-## 1.4. 怎样获取这些系统监控指标?
+### 5.2.1. 监控指标的 Prometheus 映射关系
+> 对于 Metric Name 为 name, Tags 为 K1=V1, ..., Kn=Vn 的监控指标有如下映射,其中 value 为具体值
 
-监控模块的相关配置均在`conf/iotdb-{datanode/confignode}.properties`中,所有配置项支持通过`load configuration`命令热加载。
+| 监控指标类型     | 映射关系                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Counter          | name_total{k1="V1", ..., Kn="Vn"} value                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| AutoGauge、Gauge | name{k1="V1", ..., Kn="Vn"} value                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| Histogram        | name_max{k1="V1", ..., Kn="Vn"} value <br> name_sum{k1="V1", ..., Kn="Vn"} value <br> name_count{k1="V1", ..., Kn="Vn"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.0"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.25"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.5"} value <br> name{k1="V1", ..., Kn="Vn", quantile="0.75"} value <br> name{k1="V1", ..., Kn="Vn", quantile="1.0"} value                                                                 |
+| Rate             | name_total{k1="V1", ..., Kn="Vn"} value <br> name_total{k1="V1", ..., Kn="Vn", rate="m1"} value <br> name_total{k1="V1", ..., Kn="Vn", rate="m5"} value  <br> name_total{k1="V1", ..., Kn="Vn", rate="m15"} value <br> name_total{k1="V1", ..., Kn="Vn", rate="mean"} value                                                                                                                                                                                                        |
+| Timer            | name_seconds_max{k1="V1", ..., Kn="Vn"} value <br> name_seconds_sum{k1="V1", ..., Kn="Vn"} value <br> name_seconds_count{k1="V1", ..., Kn="Vn"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.0"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.25"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.5"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="0.75"} value <br> name_seconds{k1="V1", ..., Kn="Vn", quantile="1.0"} value |
 
-### 1.4.1. 配置文件
-以DataNode为例
+### 5.2.2. 修改配置文件
+1) 以 DataNode 为例,修改 iotdb-datanode.properties 配置文件如下:
 
 ```properties
-# Whether enable metric module
-# Datatype: boolean
-dn_enable_metric=true
-
-# The reporters of metric module to report metrics
-# If there are more than one reporter, please separate them by commas ",".
-# Options: [JMX, PROMETHEUS, IOTDB]
-# Datatype: String
-dn_metric_reporter_list=JMX,PROMETHEUS
-
-# The level of metric module
-# Options: [Core, Important, Normal, All]
-# Datatype: String
+dn_metric_reporter_list=PROMETHEUS
 dn_metric_level=CORE
-
-# The port of prometheus reporter of metric module
-# Datatype: int
 dn_metric_prometheus_reporter_port=9091
 ```
 
-1. 在配置文件中修改如上配置
-2. 启动IoTDB
-3. 打开浏览器或者用```curl``` 访问 ```http://servier_ip:9091/metrics```, 就能看到metric数据了:
+2) 启动 IoTDB DataNode
+
+3) 打开浏览器或者用```curl``` 访问 ```http://servier_ip:9091/metrics```, 就能得到如下 metric 数据:
 
 ```
 ...
@@ -289,37 +323,12 @@ dn_metric_prometheus_reporter_port=9091
 file_count{name="wal",} 0.0
 file_count{name="unseq",} 0.0
 file_count{name="seq",} 2.0
-# HELP file_size
-# TYPE file_size gauge
-file_size{name="wal",} 0.0
-file_size{name="unseq",} 0.0
-file_size{name="seq",} 560.0
-# HELP queue
-# TYPE queue gauge
-queue{name="flush",status="waiting",} 0.0
-queue{name="flush",status="running",} 0.0
-# HELP quantity
-# TYPE quantity gauge
-quantity{name="timeSeries",} 1.0
-quantity{name="storageGroup",} 1.0
-quantity{name="device",} 1.0
-# HELP logback_events_total Number of error level events that made it to the logs
-# TYPE logback_events_total counter
-logback_events_total{level="warn",} 0.0
-logback_events_total{level="debug",} 2760.0
-logback_events_total{level="error",} 0.0
-logback_events_total{level="trace",} 0.0
-logback_events_total{level="info",} 71.0
-# HELP mem
-# TYPE mem gauge
-mem{name="storageGroup",} 0.0
-mem{name="mtree",} 1328.0
 ...
 ```
 
-### 1.4.2. 对接Prometheus和Grafana
+### 5.2.3. Prometheus + Grafana
 
-如上面所述,IoTDB对外暴露出标准Prometheus格式的监控指标数据,可以直接和Prometheus以及Grafana集成。
+如上所示,IoTDB 对外暴露出标准的 Prometheus 格式的监控指标数据,可以使用 Prometheus 采集并存储监控指标,使用 Grafana 可视化监控指标。
 
 IoTDB、Prometheus、Grafana三者的关系如下图所示:
 
@@ -358,147 +367,126 @@ static_configs:
 
 [Grafana从Prometheus查询数据并绘图的文档](https://prometheus.io/docs/visualization/grafana/#grafana-support-for-prometheus)
 
-### 1.4.3. Apache IoTDB Dashboard
+### 5.2.4. Apache IoTDB Dashboard
 我们提供了Apache IoTDB Dashboard,在Grafana中显示的效果图如下所示:
 
 ![Apache IoTDB Dashboard](https://github.com/apache/iotdb-bin-resources/blob/main/docs/UserGuide/System%20Tools/Metrics/dashboard.png)
 
-#### 1.4.3.1. 获取方式
-1. 您可以在grafana-metrics-example文件夹下获取到对应不同iotdb版本的Dashboard的json文件。
+#### 5.2.4.1. 如何获取 Apache IoTDB Dashboard?
+
+1. 您可以在 grafana-metrics-example 文件夹下获取到对应不同iotdb版本的Dashboard的json文件。
 2. 您可以访问[Grafana Dashboard官网](https://grafana.com/grafana/dashboards/)搜索`Apache IoTDB Dashboard`并使用
 
 在创建Grafana时,您可以选择Import刚刚下载的json文件,并为Apache IoTDB Dashboard选择对应目标数据源。
 
-#### 1.4.3.2. Apache IoTDB StandAlone Dashboard 说明
+#### 5.2.4.2. Apache IoTDB StandAlone Dashboard 说明
 > 除特殊说明的监控项以外,以下监控项均保证在Important级别的监控框架中可用。
 
-1. `Overview`:系统概述
-   1. `The number of entity`:实体数量,目前包含时间序列的数量
-   2. `write point per minute`:每分钟系统累计写入点数
-   3. `database used memory`:每个 database 使用的内存大小
-2. `Interface`:接口
-   1. `The QPS of Interface`:系统接口每秒钟访问次数
-   2. `The time consumed of Interface`:系统接口的平均耗时
-   3. `Cache hit rate`:缓存命中率
-3. `Engine`:引擎
-   1. `Task number(pending and active)`:系统中不同状态的任务个数
-   2. `The time consumed of tasking(pending and active)`:系统中不同状态的任务的耗时
-4. `System`:系统
-   1. `The size of file`:IoTDB系统相关的文件大小,包括wal下的文件总大小、seq下的tsfile文件总大小、unseq下的tsfile文件总大小
-   2. `The number of file`:IoTDB系统相关的文件个数,包括wal下的文件个数、seq下的tsfile文件个数、unseq下的tsfile文件个数
-   3. `The number of GC(per minute)`:IoTDB每分钟的GC数量,包括Young GC和Full GC
-   4. `The time consumed of GC(per minute)`:IoTDB的每分钟平均GC耗时,包括Young GC和Full GC
-   5. `Heap Memory`:IoTDB的堆内存
-   6. `Off-heap Memory`:IoTDB的堆外内存
-   7. `The number of Java Thread`:IoTDB的不同状态线程数
-
-#### 1.4.3.3. Apache IoTDB ConfigNode Dashboard 说明
+- `Overview`:系统概述
+  - `The number of entity`:实体数量,目前包含时间序列的数量
+  - `write point per minute`:每分钟系统累计写入点数
+  - `database used memory`:每个 database 使用的内存大小
+- `Interface`:接口
+  - `The QPS of Interface`:系统接口每秒钟访问次数
+  - `The time consumed of Interface`:系统接口的平均耗时
+  - `Cache hit rate`:缓存命中率
+- `Engine`:引擎
+  - `Task number(pending and active)`:系统中不同状态的任务个数
+  - `The time consumed of tasking(pending and active)`:系统中不同状态的任务的耗时
+- `System`:系统
+  - `The size of file`:IoTDB系统相关的文件大小,包括wal下的文件总大小、seq下的tsfile文件总大小、unseq下的tsfile文件总大小
+  - `The number of file`:IoTDB系统相关的文件个数,包括wal下的文件个数、seq下的tsfile文件个数、unseq下的tsfile文件个数
+  - `The number of GC(per minute)`:IoTDB每分钟的GC数量,包括Young GC和Full GC
+  - `The time consumed of GC(per minute)`:IoTDB的每分钟平均GC耗时,包括Young GC和Full GC
+  - `Heap Memory`:IoTDB的堆内存
+  - `Off-heap Memory`:IoTDB的堆外内存
+  - `The number of Java Thread`:IoTDB的不同状态线程数
+
+#### 5.2.4.3. Apache IoTDB ConfigNode Dashboard 说明
 > 除特殊说明的监控项以外,以下监控项均保证在Important级别的监控框架中可用。
 
-1. `Overview`:系统概述
-   1. `Online ConfigNode`:正常运行ConfigNode个数
-   2. `Registered ConfigNode`:注册ConfigNode个数
-   3. `Unknown ConfigNode`:状态未知ConfigNode个数
-   4. `Online DataNode`:正常运行DataNode个数
-   5. `Registered DataNode`:注册DataNode个数
-   3. `Unknown DataNode`:状态未知DataNode个数
-   4. `TotalRegion`:Region总数量
-   5. `DataRegion`:DataRegion总数量
-   6. `SchemaRegion`:SchemaRegion总数量
-2. `Node Info`:节点信息
-   1. `The status of cluster node`:集群节点状态
-   2. `Leadership distribution`:Leader分布情况
-3. `Region`:Region分布情况
-   1. `Total Region in Node`:不同Node的Region总数量
-   2. `Region in Node`:不同Node的Region数量,包括SchemaRegion、DataRegion
-   3. `Region in Database`(Normal级别):不同数据库的Region数量,包括SchemaRegion、DataRegion
-   4. `Slot in Database`(Normal级别):不同数据库的Slot数量,包括DataSlot数量和SchemaSlot数量
-4. `System`:系统
-   1. `The number of GC(per minute)`:IoTDB每分钟的GC数量,包括Young GC和Full GC
-   2. `The time consumed of GC(per minute)`:IoTDB的每分钟平均GC耗时,包括Young GC和Full GC
-   3. `Heap Memory`:IoTDB的堆内存
-   4. `Off-heap Memory`:IoTDB的堆外内存
-   5. `The number of Java Thread`:IoTDB的不同状态线程数
-   6. `The time consumed of Interface`:系统接口的平均耗时
-   7. `CPU Load`:当前处理器的总负载
-   8. `Memory`:系统内存大小和已经使用的大小
-
-#### 1.4.3.4. Apache IoTDB DataNode Dashboard 说明
+- `Overview`:系统概述
+  - `Online ConfigNode`:正常运行ConfigNode个数
+  - `Registered ConfigNode`:注册ConfigNode个数
+  - `Unknown ConfigNode`:状态未知ConfigNode个数
+  - `Online DataNode`:正常运行DataNode个数
+  - `Registered DataNode`:注册DataNode个数
+  - `Unknown DataNode`:状态未知DataNode个数
+  - `TotalRegion`:Region总数量
+  - `DataRegion`:DataRegion总数量
+  - `SchemaRegion`:SchemaRegion总数量
+- `Node Info`:节点信息
+  - `The status of cluster node`:集群节点状态
+  - `Leadership distribution`:Leader分布情况
+- `Region`:Region分布情况
+  - `Total Region in Node`:不同Node的Region总数量
+  - `Region in Node`:不同Node的Region数量,包括SchemaRegion、DataRegion
+  - `Region in Database`(Normal级别):不同数据库的Region数量,包括SchemaRegion、DataRegion
+  - `Slot in Database`(Normal级别):不同数据库的Slot数量,包括DataSlot数量和SchemaSlot数量
+- `System`:系统
+  - `The number of GC(per minute)`:IoTDB每分钟的GC数量,包括Young GC和Full GC
+  - `The time consumed of GC(per minute)`:IoTDB的每分钟平均GC耗时,包括Young GC和Full GC
+  - `Heap Memory`:IoTDB的堆内存
+  - `Off-heap Memory`:IoTDB的堆外内存
+  - `The number of Java Thread`:IoTDB的不同状态线程数
+  - `The time consumed of Interface`:系统接口的平均耗时
+  - `CPU Load`:当前处理器的总负载
+  - `Memory`:系统内存大小和已经使用的大小
+
+#### 5.2.4.4. Apache IoTDB DataNode Dashboard 说明
 > 除特殊说明的监控项以外,以下监控项均保证在Important级别的监控框架中可用。
 
-1. `Overview`:系统概述
-   1. `The number of entity`:实体数量,目前包含时间序列的数量
-   2. `write point per minute`:每分钟系统累计写入点数
-   3. `database used memory`:每个 database 使用的内存大小
-   4. `Memory`:系统内存大小和已经使用的大小
-2. `Interface`:接口
-   1. `The QPS of Interface`:系统接口每秒钟访问次数
-   2. `The time consumed of Interface`:系统接口的平均耗时
-   3. `Cache hit Rate`:缓存命中率
-3. `Engine`:引擎
-   1. `Task number(pending and active)`:系统中不同状态的任务个数
-   2. `The time consumed of tasking(pending and active)`:系统中不同状态的任务的耗时
-4. `MultiLeader`:弱一致性共识协议
-   1. `MultiLeader Used Memory`:弱一致性共识层使用的内存大小
-   2. `MultiLeader Sync Index`:不同的Region的写入Index和同步Index
-   3. `MultiLeader Overview`:不同节点的同步总差距、总缓存的请求个数
-   4. `The time consumed of different stages(50%)`:不同阶段耗时的中位数
-   5. `The time consumed of different stages(75%)`:不同阶段耗时的上四分位数
-   6. `The time consumed of different stages(100%)`:不同阶段耗时的最大值
-   7. `MultiLeader Search Index Rate`:不同region的写入Index的增长速度
-   8. `MultiLeader Safe Index Rate`:不同region的同步Index的增长速度
-   9. `MultiLeader LogDispatcher Request Size`:不同的LogDispatcherThread缓存的请求个数
-   10. `Sync Lag`:每个region的同步index差距
-   11. `Min Peer Sync Lag`:每个region的写入index和同步最快的LogDispatcherThread的同步index之间的差距
-   12. `Sync speed diff of Peers`:每个region中同步最快的LogDispatcherThread与同步最慢的LogDispatcherThread之间的同步index差距
-5. `CPU`:处理器
-   1. `CPU Load`:当前处理器的总负载
-   2. `Process CPU Load`:IoTDB进程占用处理器的负载
-6. `File System`:文件系统
-   1. `The size of file`:IoTDB系统相关的文件大小,包括wal下的文件总大小、seq下的tsfile文件总大小、unseq下的tsfile文件总大小
-   2. `The number of file`:IoTDB系统相关的文件个数,包括wal下的文件个数、seq下的tsfile文件个数、unseq下的tsfile文件个数
-   3. `Disk Space`:当前data目录所挂载的磁盘总大小和剩余大小
-7. `JVM`:系统
-   1. `The number of GC(per minute)`:IoTDB每分钟的GC数量,包括Young GC和Full GC
-   2. `The time consumed of GC(per minute)`:IoTDB的每分钟平均GC耗时,包括Young GC和Full GC
-   3. `Heap Memory`:IoTDB的堆内存
-   4. `Off-heap Memory`:IoTDB的堆外内存
-   5. `The number of Java Thread`:IoTDB的不同状态线程数
-
-# 2. 系统状态监控
-进入 Jconsole 监控页面后,首先看到的是 IoTDB 各类运行情况的概览。在这里,您可以看到堆内存信息、线程信息、类信息以及服务器的 CPU 使用情况。
-
-# 3. JMX MBean 监控
-通过使用 JConsole 工具并与 JMX 连接,您可以查看一些系统统计信息和参数。
-本节描述如何使用 JConsole 的 "Mbean" 选项卡来监视 IoTDB 的一些系统配置、写入数据统计等等。 连接到 JMX 后,您可以通过 "MBeans" 标签找到名为 "org.apache.iotdb.service" 的 "MBean",如下图所示。
-
-<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/46039728/149951720-707f1ee8-32ee-4fde-9252-048caebd232e.png"> <br>
-
-# 4. 性能监控
-
-## 4.1. 介绍
-
-性能监控模块用来监控 IOTDB 每一个操作的耗时,以便用户更好的了解数据库的整体性能。该模块会统计每一种操作的下四分位数、中位数、上四分位数和最大值。目前操作包括`EXECUTE_BATCH`、`EXECUTE_ONE_SQL_IN_BATCH`和`EXECUTE_QUERY`。
-
-## 4.2. 配置参数
-
-- 配置文件位置
-  - datanode:conf/iotdb-datanode.properties
-  - confignode:conf/iotdb-confignode.properties
-
-<center>
-
-**表-配置参数以及描述项**
-
-| 参数                      | 默认值 | 描述                 |
-| :------------------------ | :----- | :------------------- |
-| enable\_performance\_stat | false  | 是否开启性能监控模块 |
-</center>
-
-# 5. Cache 命中率统计
-
-为了提高查询性能,IOTDB 对 ChunkMetaData 和 TsFileMetaData 进行了缓存。用户可以通过 debug 级别的日志以及 MXBean 两种方式来查看缓存的命中率,并根据缓存命中率以及系统内存来调节缓存所使用的内存大小。使用 MXBean 查看缓存命中率的方法为:
-1. 通过端口 31999 连接 jconsole,并在上方菜单项中选择‘MBean’. 
-2. 展开侧边框并选择 'org.apache.iotdb.db.service'. 将会得到如下图所示结果:
-
-<img style="width:100%; max-width:800px; max-height:600px; margin-left:auto; margin-right:auto; display:block;" src="https://user-images.githubusercontent.com/19167280/112426760-73e3da80-8d73-11eb-9a8f-9232d1f2033b.png">
\ No newline at end of file
+- `Overview`:系统概述
+  - `The number of entity`:实体数量,目前包含时间序列的数量
+  - `write point per minute`:每分钟系统累计写入点数
+  - `database used memory`:每个 database 使用的内存大小
+  - `Memory`:系统内存大小和已经使用的大小
+- `Interface`:接口
+  - `The QPS of Interface`:系统接口每秒钟访问次数
+  - `The time consumed of Interface`:系统接口的平均耗时
+  - `Cache hit Rate`:缓存命中率
+- `Engine`:引擎
+  - `Task number(pending and active)`:系统中不同状态的任务个数
+  - `The time consumed of tasking(pending and active)`:系统中不同状态的任务的耗时
+- `MultiLeader`:弱一致性共识协议
+  - `MultiLeader Used Memory`:弱一致性共识层使用的内存大小
+  - `MultiLeader Sync Index`:不同的Region的写入Index和同步Index
+  - `MultiLeader Overview`:不同节点的同步总差距、总缓存的请求个数
+  - `The time consumed of different stages(50%)`:不同阶段耗时的中位数
+  - `The time consumed of different stages(75%)`:不同阶段耗时的上四分位数
+  - `The time consumed of different stages(100%)`:不同阶段耗时的最大值
+  - `MultiLeader Search Index Rate`:不同region的写入Index的增长速度
+  - `MultiLeader Safe Index Rate`:不同region的同步Index的增长速度
+  - `MultiLeader LogDispatcher Request Size`:不同的LogDispatcherThread缓存的请求个数
+  - `Sync Lag`:每个region的同步index差距
+  - `Min Peer Sync Lag`:每个region的写入index和同步最快的LogDispatcherThread的同步index之间的差距
+  - `Sync speed diff of Peers`:每个region中同步最快的LogDispatcherThread与同步最慢的LogDispatcherThread之间的同步index差距
+- `CPU`:处理器
+  - `CPU Load`:当前处理器的总负载
+  - `Process CPU Load`:IoTDB进程占用处理器的负载
+- `File System`:文件系统
+  - `The size of file`:IoTDB系统相关的文件大小,包括wal下的文件总大小、seq下的tsfile文件总大小、unseq下的tsfile文件总大小
+  - `The number of file`:IoTDB系统相关的文件个数,包括wal下的文件个数、seq下的tsfile文件个数、unseq下的tsfile文件个数
+  - `Disk Space`:当前data目录所挂载的磁盘总大小和剩余大小
+- `JVM`:系统
+  - `The number of GC(per minute)`:IoTDB每分钟的GC数量,包括Young GC和Full GC
+  - `The time consumed of GC(per minute)`:IoTDB的每分钟平均GC耗时,包括Young GC和Full GC
+  - `Heap Memory`:IoTDB的堆内存
+  - `Off-heap Memory`:IoTDB的堆外内存
+  - `The number of Java Thread`:IoTDB的不同状态线程数
+
+## 5.3. 使用 IoTDB 方式
+
+### 5.3.1. 监控指标的 IoTDB 映射关系
+> 对于 Metric Name 为 name, Tags 为 K1=V1, ..., Kn=Vn 的监控指标有如下映射,以默认写到 root.__system.metric 为例
+
+| 监控指标类型     | 映射关系                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          [...]
+| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...]
+| Counter          | root.__system.metric.name.`K1=V1`...`Kn=Vn`.value                                                                                                                                                                                                                                                                                                                                                                                                                                       [...]
+| AutoGauge、Gauge | root.__system.metric.name.`K1=V1`...`Kn=Vn`.value                                                                                                                                                                                                                                                                                                                                                                                                                                        [...]
+| Histogram        | root.__system.metric.name.`K1=V1`...`Kn=Vn`.count <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.max <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.sum <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p0 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p25 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p50 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p75 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p100                                                    [...]
+| Rate             | root.__system.metric.name.`K1=V1`...`Kn=Vn`.count <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.mean <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.m1 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.m5 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.m15                                                                                                                                                                                                                    [...]
+| Timer            | root.__system.metric.name.`K1=V1`...`Kn=Vn`.count <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.max <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.mean <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.sum <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p0 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p25 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p50 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p75 <br> root.__system.metric.name.`K1=V1`...`Kn=Vn`.p [...]
+
+### 5.3.2. 获取监控指标
+根据如上的映射关系,可以构成相关的 IoTDB 查询语句获取监控指标
\ No newline at end of file
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java
index 046c11684f..a2558f8b9b 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java
@@ -30,10 +30,6 @@ import java.util.List;
 import java.util.Objects;
 
 public class MetricConfig {
-
-  /** Is the statistic of operation performance enabled */
-  private Boolean enablePerformanceStat = false;
-
   /** The type of the implementation of metric framework */
   private MetricFrameType metricFrameType = MetricFrameType.MICROMETER;
 
@@ -60,14 +56,6 @@ public class MetricConfig {
   /** The port of iotdb instance that is monitored */
   private Integer rpcPort = 6667;
 
-  public Boolean getEnablePerformanceStat() {
-    return enablePerformanceStat;
-  }
-
-  public void setEnablePerformanceStat(Boolean enablePerformanceStat) {
-    this.enablePerformanceStat = enablePerformanceStat;
-  }
-
   public MetricFrameType getMetricFrameType() {
     return metricFrameType;
   }
@@ -145,7 +133,6 @@ public class MetricConfig {
 
   /** Copy properties from another metric config */
   public void copy(MetricConfig newMetricConfig) {
-    enablePerformanceStat = newMetricConfig.getEnablePerformanceStat();
     metricFrameType = newMetricConfig.getMetricFrameType();
     metricReporterList = newMetricConfig.getMetricReporterList();
     metricLevel = newMetricConfig.getMetricLevel();
@@ -170,8 +157,7 @@ public class MetricConfig {
       return false;
     }
     MetricConfig anotherMetricConfig = (MetricConfig) obj;
-    return enablePerformanceStat.equals(anotherMetricConfig.getEnablePerformanceStat())
-        && metricFrameType.equals(anotherMetricConfig.getMetricFrameType())
+    return metricFrameType.equals(anotherMetricConfig.getMetricFrameType())
         && metricReporterList.equals(anotherMetricConfig.getMetricReporterList())
         && metricLevel.equals(anotherMetricConfig.getMetricLevel())
         && asyncCollectPeriodInSecond.equals(anotherMetricConfig.getAsyncCollectPeriodInSecond())
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfigDescriptor.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfigDescriptor.java
index d8d3424d64..ef28f8c779 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfigDescriptor.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfigDescriptor.java
@@ -51,10 +51,7 @@ public class MetricConfigDescriptor {
     MetricConfig newMetricConfig = generateFromProperties(properties);
     ReloadLevel reloadLevel = ReloadLevel.NOTHING;
     if (!metricConfig.equals(newMetricConfig)) {
-      if (!metricConfig
-              .getEnablePerformanceStat()
-              .equals(newMetricConfig.getEnablePerformanceStat())
-          || !metricConfig.getMetricFrameType().equals(newMetricConfig.getMetricFrameType())
+      if (!metricConfig.getMetricFrameType().equals(newMetricConfig.getMetricFrameType())
           || !metricConfig.getMetricLevel().equals(newMetricConfig.getMetricLevel())
           || !metricConfig
               .getAsyncCollectPeriodInSecond()
@@ -79,13 +76,6 @@ public class MetricConfigDescriptor {
   private MetricConfig generateFromProperties(Properties properties) {
     MetricConfig loadConfig = new MetricConfig();
 
-    loadConfig.setEnablePerformanceStat(
-        Boolean.parseBoolean(
-            getProperty(
-                "enable_performance_stat",
-                String.valueOf(loadConfig.getEnablePerformanceStat()),
-                properties)));
-
     String reporterList =
         getProperty(
             "metric_reporter_list",
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmClassLoaderMetrics.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmClassLoaderMetrics.java
index 02cf75cde7..32e5a85710 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmClassLoaderMetrics.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmClassLoaderMetrics.java
@@ -33,12 +33,12 @@ public class JvmClassLoaderMetrics implements IMetricSet {
   public void bindTo(AbstractMetricService metricService) {
     ClassLoadingMXBean classLoadingBean = ManagementFactory.getClassLoadingMXBean();
     metricService.createAutoGauge(
-        "jvm.classes.loaded.classes",
+        "jvm_classes_loaded_classes",
         MetricLevel.IMPORTANT,
         classLoadingBean,
         ClassLoadingMXBean::getLoadedClassCount);
     metricService.createAutoGauge(
-        "jvm.classes.unloaded.classes",
+        "jvm_classes_unloaded_classes",
         MetricLevel.IMPORTANT,
         classLoadingBean,
         ClassLoadingMXBean::getUnloadedClassCount);
@@ -46,7 +46,7 @@ public class JvmClassLoaderMetrics implements IMetricSet {
 
   @Override
   public void unbindFrom(AbstractMetricService metricService) {
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.classes.loaded.classes");
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.classes.unloaded.classes");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_classes_loaded_classes");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_classes_unloaded_classes");
   }
 }
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmCompileMetrics.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmCompileMetrics.java
index 20363d3ef6..69e11ba28d 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmCompileMetrics.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmCompileMetrics.java
@@ -34,7 +34,7 @@ public class JvmCompileMetrics implements IMetricSet {
     CompilationMXBean compilationBean = ManagementFactory.getCompilationMXBean();
     if (compilationBean != null && compilationBean.isCompilationTimeMonitoringSupported()) {
       metricService.createAutoGauge(
-          "jvm.compilation.time.ms",
+          "jvm_compilation_time_ms",
           MetricLevel.IMPORTANT,
           compilationBean,
           CompilationMXBean::getTotalCompilationTime,
@@ -48,7 +48,7 @@ public class JvmCompileMetrics implements IMetricSet {
     CompilationMXBean compilationBean = ManagementFactory.getCompilationMXBean();
     if (compilationBean != null && compilationBean.isCompilationTimeMonitoringSupported()) {
       metricService.remove(
-          MetricType.AUTO_GAUGE, "jvm.compilation.time.ms", "compiler", compilationBean.getName());
+          MetricType.AUTO_GAUGE, "jvm_compilation_time_ms", "compiler", compilationBean.getName());
     }
   }
 }
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmGcMetrics.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmGcMetrics.java
index 3b31c1b473..65693b89f7 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmGcMetrics.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmGcMetrics.java
@@ -86,20 +86,20 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
 
     AtomicLong maxDataSize = new AtomicLong((long) maxLongLivedPoolBytes);
     metricService.createAutoGauge(
-        "jvm.gc.max.data.size.bytes", MetricLevel.IMPORTANT, maxDataSize, AtomicLong::get);
+        "jvm_gc_max_data_size_bytes", MetricLevel.IMPORTANT, maxDataSize, AtomicLong::get);
 
     AtomicLong liveDataSize = new AtomicLong();
     metricService.createAutoGauge(
-        "jvm.gc.live.data.size.bytes", MetricLevel.IMPORTANT, liveDataSize, AtomicLong::get);
+        "jvm_gc_live_data_size_bytes", MetricLevel.IMPORTANT, liveDataSize, AtomicLong::get);
 
     Counter allocatedBytes =
-        metricService.getOrCreateCounter("jvm.gc.memory.allocated.bytes", MetricLevel.IMPORTANT);
+        metricService.getOrCreateCounter("jvm_gc_memory_allocated_bytes", MetricLevel.IMPORTANT);
 
     Counter promotedBytes =
         (oldGenPoolName == null)
             ? null
             : metricService.getOrCreateCounter(
-                "jvm.gc.memory.promoted.bytes", MetricLevel.IMPORTANT);
+                "jvm_gc_memory_promoted_bytes", MetricLevel.IMPORTANT);
 
     // start watching for GC notifications
     final AtomicLong heapPoolSizeAfterGc = new AtomicLong();
@@ -120,9 +120,9 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
             long duration = gcInfo.getDuration();
             String timerName;
             if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
-              timerName = "jvm.gc.concurrent.phase.time";
+              timerName = "jvm_gc_concurrent_phase_time";
             } else {
-              timerName = "jvm.gc.pause";
+              timerName = "jvm_gc_pause";
             }
             Timer timer =
                 metricService.getOrCreateTimer(
@@ -204,12 +204,12 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
       return;
     }
 
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.gc.max.data.size.bytes");
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.gc.live.data.size.bytes");
-    metricService.remove(MetricType.COUNTER, "jvm.gc.memory.allocated.bytes");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_gc_max_data_size_bytes");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_gc_live_data_size_bytes");
+    metricService.remove(MetricType.COUNTER, "jvm_gc_memory_allocated_bytes");
 
     if (oldGenPoolName != null) {
-      metricService.remove(MetricType.COUNTER, "jvm.gc.memory.promoted.bytes");
+      metricService.remove(MetricType.COUNTER, "jvm_gc_memory_promoted_bytes");
     }
 
     // start watching for GC notifications
@@ -227,9 +227,9 @@ public class JvmGcMetrics implements IMetricSet, AutoCloseable {
             String gcAction = notificationInfo.getGcAction();
             String timerName;
             if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
-              timerName = "jvm.gc.concurrent.phase.time";
+              timerName = "jvm_gc_concurrent_phase_time";
             } else {
-              timerName = "jvm.gc.pause";
+              timerName = "jvm_gc_pause";
             }
             metricService.remove(MetricType.TIMER, timerName, "action", gcAction, "cause", gcCause);
           };
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmMemoryMetrics.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmMemoryMetrics.java
index e126662c79..4fdfab2d4c 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmMemoryMetrics.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmMemoryMetrics.java
@@ -37,7 +37,7 @@ public class JvmMemoryMetrics implements IMetricSet {
     for (BufferPoolMXBean bufferPoolBean :
         ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class)) {
       metricService.createAutoGauge(
-          "jvm.buffer.count.buffers",
+          "jvm_buffer_count_buffers",
           MetricLevel.IMPORTANT,
           bufferPoolBean,
           BufferPoolMXBean::getCount,
@@ -45,7 +45,7 @@ public class JvmMemoryMetrics implements IMetricSet {
           bufferPoolBean.getName());
 
       metricService.createAutoGauge(
-          "jvm.buffer.memory.used.bytes",
+          "jvm_buffer_memory_used_bytes",
           MetricLevel.IMPORTANT,
           bufferPoolBean,
           BufferPoolMXBean::getMemoryUsed,
@@ -53,7 +53,7 @@ public class JvmMemoryMetrics implements IMetricSet {
           bufferPoolBean.getName());
 
       metricService.createAutoGauge(
-          "jvm.buffer.total.capacity.bytes",
+          "jvm_buffer_total_capacity_bytes",
           MetricLevel.IMPORTANT,
           bufferPoolBean,
           BufferPoolMXBean::getTotalCapacity,
@@ -66,7 +66,7 @@ public class JvmMemoryMetrics implements IMetricSet {
       String area = MemoryType.HEAP.equals(memoryPoolBean.getType()) ? "heap" : "nonheap";
 
       metricService.createAutoGauge(
-          "jvm.memory.used.bytes",
+          "jvm_memory_used_bytes",
           MetricLevel.IMPORTANT,
           memoryPoolBean,
           (mem) -> (long) JvmUtils.getUsageValue(mem, MemoryUsage::getUsed),
@@ -76,7 +76,7 @@ public class JvmMemoryMetrics implements IMetricSet {
           area);
 
       metricService.createAutoGauge(
-          "jvm.memory.committed.bytes",
+          "jvm_memory_committed_bytes",
           MetricLevel.IMPORTANT,
           memoryPoolBean,
           (mem) -> (long) JvmUtils.getUsageValue(mem, MemoryUsage::getCommitted),
@@ -86,7 +86,7 @@ public class JvmMemoryMetrics implements IMetricSet {
           area);
 
       metricService.createAutoGauge(
-          "jvm.memory.max.bytes",
+          "jvm_memory_max_bytes",
           MetricLevel.IMPORTANT,
           memoryPoolBean,
           (mem) -> (long) JvmUtils.getUsageValue(mem, MemoryUsage::getMax),
@@ -102,13 +102,13 @@ public class JvmMemoryMetrics implements IMetricSet {
     for (BufferPoolMXBean bufferPoolBean :
         ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class)) {
       metricService.remove(
-          MetricType.AUTO_GAUGE, "jvm.buffer.count.buffers", "id", bufferPoolBean.getName());
+          MetricType.AUTO_GAUGE, "jvm_buffer_count_buffers", "id", bufferPoolBean.getName());
 
       metricService.remove(
-          MetricType.AUTO_GAUGE, "jvm.buffer.memory.used.bytes", "id", bufferPoolBean.getName());
+          MetricType.AUTO_GAUGE, "jvm_buffer_memory_used_bytes", "id", bufferPoolBean.getName());
 
       metricService.remove(
-          MetricType.AUTO_GAUGE, "jvm.buffer.total.capacity.bytes", "id", bufferPoolBean.getName());
+          MetricType.AUTO_GAUGE, "jvm_buffer_total_capacity_bytes", "id", bufferPoolBean.getName());
     }
 
     for (MemoryPoolMXBean memoryPoolBean :
@@ -117,7 +117,7 @@ public class JvmMemoryMetrics implements IMetricSet {
 
       metricService.remove(
           MetricType.AUTO_GAUGE,
-          "jvm.memory.used.bytes",
+          "jvm_memory_used_bytes",
           "id",
           memoryPoolBean.getName(),
           "area",
@@ -125,7 +125,7 @@ public class JvmMemoryMetrics implements IMetricSet {
 
       metricService.remove(
           MetricType.AUTO_GAUGE,
-          "jvm.memory.committed.bytes",
+          "jvm_memory_committed_bytes",
           "id",
           memoryPoolBean.getName(),
           "area",
@@ -133,7 +133,7 @@ public class JvmMemoryMetrics implements IMetricSet {
 
       metricService.remove(
           MetricType.AUTO_GAUGE,
-          "jvm.memory.max.bytes",
+          "jvm_memory_max_bytes",
           "id",
           memoryPoolBean.getName(),
           "area",
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmThreadMetrics.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmThreadMetrics.java
index 38a673f6e7..017e314fdd 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmThreadMetrics.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/jvm/JvmThreadMetrics.java
@@ -35,19 +35,19 @@ public class JvmThreadMetrics implements IMetricSet {
     ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
 
     metricService.createAutoGauge(
-        "jvm.threads.peak.threads",
+        "jvm_threads_peak_threads",
         MetricLevel.IMPORTANT,
         threadBean,
         ThreadMXBean::getPeakThreadCount);
 
     metricService.createAutoGauge(
-        "jvm.threads.daemon.threads",
+        "jvm_threads_daemon_threads",
         MetricLevel.IMPORTANT,
         threadBean,
         ThreadMXBean::getDaemonThreadCount);
 
     metricService.createAutoGauge(
-        "jvm.threads.live.threads",
+        "jvm_threads_live_threads",
         MetricLevel.IMPORTANT,
         threadBean,
         ThreadMXBean::getThreadCount);
@@ -56,7 +56,7 @@ public class JvmThreadMetrics implements IMetricSet {
       threadBean.getAllThreadIds();
       for (Thread.State state : Thread.State.values()) {
         metricService.createAutoGauge(
-            "jvm.threads.states.threads",
+            "jvm_threads_states_threads",
             MetricLevel.IMPORTANT,
             threadBean,
             (bean) -> getThreadStateCount(bean, state),
@@ -73,15 +73,15 @@ public class JvmThreadMetrics implements IMetricSet {
   public void unbindFrom(AbstractMetricService metricService) {
     ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
 
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.threads.peak.threads");
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.threads.daemon.threads");
-    metricService.remove(MetricType.AUTO_GAUGE, "jvm.threads.live.threads");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_threads_peak_threads");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_threads_daemon_threads");
+    metricService.remove(MetricType.AUTO_GAUGE, "jvm_threads_live_threads");
 
     try {
       threadBean.getAllThreadIds();
       for (Thread.State state : Thread.State.values()) {
         metricService.remove(
-            MetricType.AUTO_GAUGE, "jvm.threads.states.threads", "state", getStateTagValue(state));
+            MetricType.AUTO_GAUGE, "jvm_threads_states_threads", "state", getStateTagValue(state));
       }
     } catch (Error error) {
       // An error will be thrown for unsupported operations
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/LogbackMetrics.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/LogbackMetrics.java
index 6790c31b6c..2e2f8013a9 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/LogbackMetrics.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/LogbackMetrics.java
@@ -91,11 +91,11 @@ public class LogbackMetrics implements IMetricSet {
         for (MetricsTurboFilter addMetricsTurboFilter : metricsTurboFilters.values()) {
           loggerContext.getTurboFilterList().remove(addMetricsTurboFilter);
         }
-        metricService.remove(MetricType.COUNTER, "logback.events", "level", "error");
-        metricService.remove(MetricType.COUNTER, "logback.events", "level", "warn");
-        metricService.remove(MetricType.COUNTER, "logback.events", "level", "info");
-        metricService.remove(MetricType.COUNTER, "logback.events", "level", "debug");
-        metricService.remove(MetricType.COUNTER, "logback.events", "level", "trace");
+        metricService.remove(MetricType.COUNTER, "logback_events", "level", "error");
+        metricService.remove(MetricType.COUNTER, "logback_events", "level", "warn");
+        metricService.remove(MetricType.COUNTER, "logback_events", "level", "info");
+        metricService.remove(MetricType.COUNTER, "logback_events", "level", "debug");
+        metricService.remove(MetricType.COUNTER, "logback_events", "level", "trace");
       }
     } catch (Exception e) {
       logger.error("Failed to remove LogBackMetrics.");
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/MetricsTurboFilter.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/MetricsTurboFilter.java
index a569226533..4e8506d5fc 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/MetricsTurboFilter.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/metricsets/logback/MetricsTurboFilter.java
@@ -38,19 +38,19 @@ public class MetricsTurboFilter extends TurboFilter {
 
   MetricsTurboFilter(AbstractMetricService metricService) {
     errorCounter =
-        metricService.getOrCreateCounter("logback.events", MetricLevel.IMPORTANT, "level", "error");
+        metricService.getOrCreateCounter("logback_events", MetricLevel.IMPORTANT, "level", "error");
 
     warnCounter =
-        metricService.getOrCreateCounter("logback.events", MetricLevel.IMPORTANT, "level", "warn");
+        metricService.getOrCreateCounter("logback_events", MetricLevel.IMPORTANT, "level", "warn");
 
     infoCounter =
-        metricService.getOrCreateCounter("logback.events", MetricLevel.IMPORTANT, "level", "info");
+        metricService.getOrCreateCounter("logback_events", MetricLevel.IMPORTANT, "level", "info");
 
     debugCounter =
-        metricService.getOrCreateCounter("logback.events", MetricLevel.IMPORTANT, "level", "debug");
+        metricService.getOrCreateCounter("logback_events", MetricLevel.IMPORTANT, "level", "debug");
 
     traceCounter =
-        metricService.getOrCreateCounter("logback.events", MetricLevel.IMPORTANT, "level", "trace");
+        metricService.getOrCreateCounter("logback_events", MetricLevel.IMPORTANT, "level", "trace");
   }
 
   @Override
diff --git a/metrics/interface/src/test/java/org/apache/iotdb/metrics/config/MetricConfigTest.java b/metrics/interface/src/test/java/org/apache/iotdb/metrics/config/MetricConfigTest.java
index 12adf23018..8987d6997a 100644
--- a/metrics/interface/src/test/java/org/apache/iotdb/metrics/config/MetricConfigTest.java
+++ b/metrics/interface/src/test/java/org/apache/iotdb/metrics/config/MetricConfigTest.java
@@ -28,7 +28,6 @@ import org.junit.Test;
 import java.util.Properties;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
 public class MetricConfigTest {
   @Test
@@ -53,7 +52,6 @@ public class MetricConfigTest {
 
     MetricConfig metricConfig = MetricConfigDescriptor.getInstance().getMetricConfig();
 
-    assertTrue(metricConfig.getEnablePerformanceStat());
     assertEquals(3, metricConfig.getMetricReporterList().size());
     assertEquals(MetricFrameType.DROPWIZARD, metricConfig.getMetricFrameType());
     assertEquals(MetricLevel.ALL, metricConfig.getMetricLevel());
@@ -94,7 +92,6 @@ public class MetricConfigTest {
 
     MetricConfig metricConfig = MetricConfigDescriptor.getInstance().getMetricConfig();
 
-    assertTrue(metricConfig.getEnablePerformanceStat());
     assertEquals(3, metricConfig.getMetricReporterList().size());
     assertEquals(MetricFrameType.DROPWIZARD, metricConfig.getMetricFrameType());
     assertEquals(MetricLevel.ALL, metricConfig.getMetricLevel());
diff --git a/node-commons/src/main/java/org/apache/iotdb/commons/service/metric/enums/Metric.java b/node-commons/src/main/java/org/apache/iotdb/commons/service/metric/enums/Metric.java
index b1f8a316e6..cb6ea3f926 100644
--- a/node-commons/src/main/java/org/apache/iotdb/commons/service/metric/enums/Metric.java
+++ b/node-commons/src/main/java/org/apache/iotdb/commons/service/metric/enums/Metric.java
@@ -21,6 +21,7 @@ package org.apache.iotdb.commons.service.metric.enums;
 
 public enum Metric {
   ENTRY,
+  OPERATION,
   COST_TASK,
   QUEUE,
   FILE_SIZE,
diff --git a/server/src/assembly/resources/conf/iotdb-datanode.properties b/server/src/assembly/resources/conf/iotdb-datanode.properties
index d6d4ccd398..7eea86f87f 100644
--- a/server/src/assembly/resources/conf/iotdb-datanode.properties
+++ b/server/src/assembly/resources/conf/iotdb-datanode.properties
@@ -214,10 +214,6 @@ dn_target_config_node_list=127.0.0.1:22277
 ### Metric Configuration
 ####################
 
-# Whether statistic operation performance
-# Datatype: boolean
-# dn_enable_performance_stat=false
-
 # The reporters of metric module to report metrics
 # If there are more than one reporter, please separate them by commas ",".
 # Options: [JMX, PROMETHEUS, IOTDB]
diff --git a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/DataRegionMetrics.java b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/DataRegionMetrics.java
index 12065aaf67..194ea37822 100644
--- a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/DataRegionMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/DataRegionMetrics.java
@@ -45,7 +45,7 @@ public class DataRegionMetrics implements IMetricSet {
         dataRegion,
         DataRegion::getMemCost,
         Tag.NAME.toString(),
-        "storageGroup_" + storageGroupName);
+        "database_" + storageGroupName);
   }
 
   @Override
@@ -54,7 +54,7 @@ public class DataRegionMetrics implements IMetricSet {
         MetricType.AUTO_GAUGE,
         Metric.MEM.toString(),
         Tag.NAME.toString(),
-        "storageGroup_" + storageGroupName);
+        "database_" + storageGroupName);
   }
 
   @Override
diff --git a/server/src/main/java/org/apache/iotdb/db/service/metrics/SystemMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/metrics/SystemMetrics.java
index 2763cdd058..7bb2b77a8a 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/metrics/SystemMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/metrics/SystemMetrics.java
@@ -106,7 +106,7 @@ public class SystemMetrics implements IMetricSet {
 
     metricService
         .getOrCreateGauge(
-            Metric.SYS_CPU_CORES.toString(), MetricLevel.IMPORTANT, Tag.NAME.toString(), "system")
+            Metric.SYS_CPU_CORES.toString(), MetricLevel.CORE, Tag.NAME.toString(), "system")
         .set(osMXBean.getAvailableProcessors());
   }
 
diff --git a/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/ClientRPCServiceImpl.java b/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/ClientRPCServiceImpl.java
index 055bcda62b..c27e40ff79 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/ClientRPCServiceImpl.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/ClientRPCServiceImpl.java
@@ -24,6 +24,7 @@ import org.apache.iotdb.commons.conf.IoTDBConstant;
 import org.apache.iotdb.commons.exception.IllegalPathException;
 import org.apache.iotdb.commons.exception.IoTDBException;
 import org.apache.iotdb.commons.service.metric.MetricService;
+import org.apache.iotdb.commons.service.metric.enums.Metric;
 import org.apache.iotdb.commons.service.metric.enums.Operation;
 import org.apache.iotdb.commons.utils.PathUtils;
 import org.apache.iotdb.db.auth.AuthorityChecker;
@@ -65,7 +66,6 @@ import org.apache.iotdb.db.service.basic.BasicOpenSessionResp;
 import org.apache.iotdb.db.sync.SyncService;
 import org.apache.iotdb.db.utils.QueryDataSetUtils;
 import org.apache.iotdb.db.utils.SetThreadName;
-import org.apache.iotdb.metrics.config.MetricConfigDescriptor;
 import org.apache.iotdb.metrics.utils.MetricLevel;
 import org.apache.iotdb.rpc.RpcUtils;
 import org.apache.iotdb.rpc.TSStatusCode;
@@ -1785,17 +1785,13 @@ public class ClientRPCServiceImpl implements IClientRPCServiceWithHandler {
 
   /** Add stat of operation into metrics */
   private void addOperationLatency(Operation operation, long startTime) {
-    if (MetricConfigDescriptor.getInstance().getMetricConfig().getEnablePerformanceStat()) {
-      MetricService.getInstance()
-          .histogram(
-              System.currentTimeMillis() - startTime,
-              "operation_histogram",
-              MetricLevel.IMPORTANT,
-              "name",
-              operation.getName());
-      MetricService.getInstance()
-          .count(1, "operation_count", MetricLevel.IMPORTANT, "name", operation.getName());
-    }
+    MetricService.getInstance()
+        .histogram(
+            System.currentTimeMillis() - startTime,
+            Metric.OPERATION.toString(),
+            MetricLevel.IMPORTANT,
+            "name",
+            operation.getName());
   }
 
   @Override
diff --git a/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/TSServiceImpl.java b/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/TSServiceImpl.java
index b31bfc40cf..901c96eefd 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/TSServiceImpl.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/thrift/impl/TSServiceImpl.java
@@ -30,6 +30,7 @@ import org.apache.iotdb.commons.exception.MetadataException;
 import org.apache.iotdb.commons.path.MeasurementPath;
 import org.apache.iotdb.commons.path.PartialPath;
 import org.apache.iotdb.commons.service.metric.MetricService;
+import org.apache.iotdb.commons.service.metric.enums.Metric;
 import org.apache.iotdb.commons.service.metric.enums.Operation;
 import org.apache.iotdb.commons.utils.PathUtils;
 import org.apache.iotdb.db.auth.AuthorityChecker;
@@ -71,7 +72,6 @@ import org.apache.iotdb.db.sync.SyncService;
 import org.apache.iotdb.db.tools.watermark.GroupedLSBWatermarkEncoder;
 import org.apache.iotdb.db.tools.watermark.WatermarkEncoder;
 import org.apache.iotdb.db.utils.QueryDataSetUtils;
-import org.apache.iotdb.metrics.config.MetricConfigDescriptor;
 import org.apache.iotdb.metrics.utils.MetricLevel;
 import org.apache.iotdb.rpc.RedirectException;
 import org.apache.iotdb.rpc.RpcUtils;
@@ -1412,16 +1412,12 @@ public class TSServiceImpl implements IClientRPCServiceWithHandler {
 
   /** Add stat of operation into metrics */
   private void addOperationLatency(Operation operation, long startTime) {
-    if (MetricConfigDescriptor.getInstance().getMetricConfig().getEnablePerformanceStat()) {
-      MetricService.getInstance()
-          .histogram(
-              System.currentTimeMillis() - startTime,
-              "operation_histogram",
-              MetricLevel.IMPORTANT,
-              "name",
-              operation.getName());
-      MetricService.getInstance()
-          .count(1, "operation_count", MetricLevel.IMPORTANT, "name", operation.getName());
-    }
+    MetricService.getInstance()
+        .histogram(
+            System.currentTimeMillis() - startTime,
+            Metric.OPERATION.toString(),
+            MetricLevel.IMPORTANT,
+            "name",
+            operation.getName());
   }
 }
diff --git a/server/src/test/resources/datanode1conf/iotdb-datanode.properties b/server/src/test/resources/datanode1conf/iotdb-datanode.properties
index 2f5bb9181f..5bb899e5ba 100644
--- a/server/src/test/resources/datanode1conf/iotdb-datanode.properties
+++ b/server/src/test/resources/datanode1conf/iotdb-datanode.properties
@@ -35,7 +35,6 @@ dn_tracing_dir=target/datanode1/data/tracing
 dn_consensus_dir=target/datanode1/consensus
 dn_sync_dir=target/datanode1/sync
 
-dn_enable_performance_stat=false
 dn_metric_reporter_list=PROMETHEUS
 dn_metric_frame_type=MICROMETER
 dn_metric_level=IMPORTANT
diff --git a/server/src/test/resources/datanode2conf/iotdb-datanode.properties b/server/src/test/resources/datanode2conf/iotdb-datanode.properties
index 9733a213e3..0daf3bd5dd 100644
--- a/server/src/test/resources/datanode2conf/iotdb-datanode.properties
+++ b/server/src/test/resources/datanode2conf/iotdb-datanode.properties
@@ -35,7 +35,6 @@ dn_tracing_dir=target/datanode2/data/tracing
 dn_consensus_dir=target/datanode2/consensus
 dn_sync_dir=target/datanode2/sync
 
-dn_enable_performance_stat=false
 dn_metric_reporter_list=PROMETHEUS
 dn_metric_frame_type=MICROMETER
 dn_metric_level=IMPORTANT
diff --git a/server/src/test/resources/datanode3conf/iotdb-datanode.properties b/server/src/test/resources/datanode3conf/iotdb-datanode.properties
index 15028d8ad9..9837d3e26a 100644
--- a/server/src/test/resources/datanode3conf/iotdb-datanode.properties
+++ b/server/src/test/resources/datanode3conf/iotdb-datanode.properties
@@ -35,7 +35,6 @@ dn_tracing_dir=target/datanode3/data/tracing
 dn_consensus_dir=target/datanode3/consensus
 dn_sync_dir=target/datanode3/sync
 
-dn_enable_performance_stat=false
 dn_metric_reporter_list=PROMETHEUS
 dn_metric_frame_type=MICROMETER
 dn_metric_level=IMPORTANT