You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by qi...@apache.org on 2022/11/17 07:06:18 UTC
[iotdb] branch master updated: [IOTDB-4923] Enable metric module in default config (#7973)
This is an automated email from the ASF dual-hosted git repository.
qiaojialin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new b84845de2d [IOTDB-4923] Enable metric module in default config (#7973)
b84845de2d is described below
commit b84845de2d46eb6fb30d0661bb414594bae456af
Author: ZhangHongYin <46...@users.noreply.github.com>
AuthorDate: Thu Nov 17 15:06:10 2022 +0800
[IOTDB-4923] Enable metric module in default config (#7973)
---
.../thrift/ConfigNodeRPCServiceHandlerMetrics.java | 2 +-
.../thrift/ConfigNodeRPCServiceMetrics.java | 2 +-
.../multileader/MultiLeaderServerImpl.java | 10 ++---
.../multileader/MultiLeaderServerMetrics.java | 4 +-
.../multileader/client/DispatchLogHandler.java | 2 +-
.../multileader/logdispatcher/LogDispatcher.java | 2 +-
.../MultiLeaderMemoryManagerMetrics.java | 2 +-
docs/UserGuide/Monitor-Alert/Metric-Tool.md | 42 +++++++++----------
docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md | 48 +++++++++++-----------
.../resources/conf/iotdb-confignode-metric.yml | 6 +--
.../resources/conf/iotdb-datanode-metric.yml | 6 +--
.../apache/iotdb/metrics/config/MetricConfig.java | 9 ++--
.../micrometer/MicrometerMetricManager.java | 2 +
.../exchange/MPPDataExchangeServiceMetrics.java | 2 +-
...MppDataExchangeServiceThriftHandlerMetrics.java | 2 +-
.../service/DataNodeInternalRPCServiceMetrics.java | 2 +-
.../apache/iotdb/db/service/RPCServiceMetrics.java | 2 +-
.../iotdb/db/service/metrics/ProcessMetrics.java | 10 +++--
.../InternalServiceThriftHandlerMetrics.java | 2 +-
.../handler/RPCServiceThriftHandlerMetrics.java | 2 +-
20 files changed, 79 insertions(+), 80 deletions(-)
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java
index 327e854687..90b9bd2df0 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java
@@ -38,7 +38,7 @@ public class ConfigNodeRPCServiceHandlerMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_CONNECTIONS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftConnectionNumber,
AtomicLong::get,
Tag.NAME.toString(),
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java
index f266b49844..ce57c811cf 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java
@@ -39,7 +39,7 @@ public class ConfigNodeRPCServiceMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_ACTIVE_THREADS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftServiceThread,
AbstractThriftServiceThread::getActiveThreadCount,
Tag.NAME.toString(),
diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java
index e965973459..7eaa61db2c 100644
--- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java
+++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java
@@ -166,7 +166,7 @@ public class MultiLeaderServerImpl {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
@@ -200,7 +200,7 @@ public class MultiLeaderServerImpl {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
@@ -222,7 +222,7 @@ public class MultiLeaderServerImpl {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
@@ -245,7 +245,7 @@ public class MultiLeaderServerImpl {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
@@ -264,7 +264,7 @@ public class MultiLeaderServerImpl {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java
index 0f0a3a34a4..2d92d0aa29 100644
--- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java
+++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java
@@ -39,7 +39,7 @@ public class MultiLeaderServerMetrics implements IMetricSet {
MetricService.getInstance()
.getOrCreateAutoGauge(
Metric.MULTI_LEADER.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
impl,
MultiLeaderServerImpl::getIndex,
Tag.NAME.toString(),
@@ -51,7 +51,7 @@ public class MultiLeaderServerMetrics implements IMetricSet {
MetricService.getInstance()
.getOrCreateAutoGauge(
Metric.MULTI_LEADER.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
impl,
MultiLeaderServerImpl::getCurrentSafelyDeletedSearchIndex,
Tag.NAME.toString(),
diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java
index 0c5ef42e1b..81ee0c2ea4 100644
--- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java
+++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java
@@ -67,7 +67,7 @@ public class DispatchLogHandler implements AsyncMethodCallback<TSyncLogRes> {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java
index 606731e6a7..f38c0db801 100644
--- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java
+++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java
@@ -308,7 +308,7 @@ public class LogDispatcher {
MetricService.getInstance()
.getOrCreateHistogram(
Metric.STAGE.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
Tag.NAME.toString(),
Metric.MULTI_LEADER.toString(),
Tag.TYPE.toString(),
diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java
index 326d3dc1ad..c2b43f448d 100644
--- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java
+++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java
@@ -37,7 +37,7 @@ public class MultiLeaderMemoryManagerMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.MEM.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
multiLeaderMemoryManager,
MultiLeaderMemoryManager::getMemorySizeInByte,
Tag.NAME.toString(),
diff --git a/docs/UserGuide/Monitor-Alert/Metric-Tool.md b/docs/UserGuide/Monitor-Alert/Metric-Tool.md
index 12345ebfc9..0f9c1dddd4 100644
--- a/docs/UserGuide/Monitor-Alert/Metric-Tool.md
+++ b/docs/UserGuide/Monitor-Alert/Metric-Tool.md
@@ -82,13 +82,13 @@ Next, we will choose Prometheus format data as samples to describe each kind of
#### 1.3.3.1. API
| Metric | Tag | level | Description | Sample |
-| --------------------- | ------------------------ | --------- | ---------------------------------------- | -------------------------------------------- |
+| --------------------- | ------------------------ |-----------| ---------------------------------------- | -------------------------------------------- |
| entry_seconds_count | name="{{interface}}" | important | The total request count of the interface | entry_seconds_count{name="openSession",} 1.0 |
| entry_seconds_sum | name="{{interface}}" | important | The total cost seconds of the interface | entry_seconds_sum{name="openSession",} 0.024 |
| entry_seconds_max | name="{{interface}}" | important | The max latency of the interface | entry_seconds_max{name="openSession",} 0.024 |
| quantity_total | name="pointsIn" | important | The total points inserted into IoTDB | quantity_total{name="pointsIn",} 1.0 |
-| thrift_connections | name="{{thriftService}}" | core | current number of thrift connections | thrift_connections{name="RPC",} 1.0 |
-| thrift_active_threads | name="{{thriftThread}}" | core | current number if thrift worker threads | thrift_active_threads{name="RPC",} 1.0 |
+| thrift_connections | name="{{thriftService}}" | important | current number of thrift connections | thrift_connections{name="RPC",} 1.0 |
+| thrift_active_threads | name="{{thriftThread}}" | important | current number if thrift worker threads | thrift_active_threads{name="RPC",} 1.0 |
#### 1.3.3.2. Task
| Metric | Tag | level | Description | Sample |
@@ -143,13 +143,13 @@ Next, we will choose Prometheus format data as samples to describe each kind of
| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex" | core | The searchIndex and safeIndex of region in multiLeader | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0 |
| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex" | important | The currentSyncIndex of LogDispatcherThread of related region | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[7]",type="currentSyncIndex",} 1945.0 |
| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | important | The total size of queues that buffers requests in LogDispatcher | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[9]",type="cachedRequestInMemoryQueue",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | core | The time consumed to get lock of statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | core | The time consumed to check before write in multiLeader | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | core | The time consumed to write consensus request into statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 |
-| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | core | The time consumed to try to offer request into queue in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 |
-| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | core | The total time consumed to write a consensus request in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 |
-| stage | name="multi_leader", region="{{region}}", type="constructBatch" | core | The time consumed to construct batch in LogDispatcher per request | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | core | The time consumed to sync one request in multiLeader | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | important | The time consumed to get lock of statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | important | The time consumed to check before write in multiLeader | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | important | The time consumed to write consensus request into statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 |
+| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | important | The time consumed to try to offer request into queue in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 |
+| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | important | The total time consumed to write a consensus request in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 |
+| stage | name="multi_leader", region="{{region}}", type="constructBatch" | important | The time consumed to construct batch in LogDispatcher per request | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | important | The time consumed to sync one request in multiLeader | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 |
### 1.3.4. IoTDB PreDefined Metrics Set
@@ -209,17 +209,17 @@ Next, we will choose Prometheus format data as samples to describe each kind of
| logback_events_total | {level="trace/debug/info/warn/error",} | Important | The count of trace/debug/info/warn/error log events till now | logback_events_total{level="warn",} 0.0 |
#### 1.3.4.4. Process
-| Metric | Tag | level | Description | 示例 |
-| --------------------- | -------------- | ----- | ----------------------------------------------------------------------------- | ----------------------------------------------- |
-| process_cpu_load | name="cpu" | core | current process CPU Usage (%) | process_cpu_load{name="process",} 5.0 |
-| process_cpu_time | name="cpu" | core | total Process CPU Time Occupied (ns) | process_cpu_time{name="process",} 3.265625E9 |
-| process_max_mem | name="memory" | core | The maximum available memory for the JVM | process_max_mem{name="process",} 3.545759744E9 |
-| process_used_mem | name="memory" | core | The current available memory for the JVM | process_used_mem{name="process",} 4.6065456E7 |
-| process_total_mem | name="memory" | core | The current requested memory for the JVM | process_total_mem{name="process",} 2.39599616E8 |
-| process_free_mem | name="memory" | core | The free available memory for the JVM | process_free_mem{name="process",} 1.94035584E8 |
-| process_mem_ratio | name="memory" | core | Memory footprint ratio of process | process_mem_ratio{name="process",} 0.0 |
-| process_threads_count | name="process" | core | The current number of threads | process_threads_count{name="process",} 11.0 |
-| process_status | name="process" | core | The process survivor status, 1.0 means survivorship, and 0.0 means terminated | process_status{name="process",} 1.0 |
+| Metric | Tag | level | Description | 示例 |
+| --------------------- | -------------- |-----------| ----------------------------------------------------------------------------- | ----------------------------------------------- |
+| process_cpu_load | name="cpu" | core | current process CPU Usage (%) | process_cpu_load{name="process",} 5.0 |
+| process_cpu_time | name="cpu" | core | total Process CPU Time Occupied (ns) | process_cpu_time{name="process",} 3.265625E9 |
+| process_max_mem | name="memory" | core | The maximum available memory for the JVM | process_max_mem{name="process",} 3.545759744E9 |
+| process_used_mem | name="memory" | important | The current available memory for the JVM | process_used_mem{name="process",} 4.6065456E7 |
+| process_total_mem | name="memory" | core | The current requested memory for the JVM | process_total_mem{name="process",} 2.39599616E8 |
+| process_free_mem | name="memory" | core | The free available memory for the JVM | process_free_mem{name="process",} 1.94035584E8 |
+| process_mem_ratio | name="memory" | important | Memory footprint ratio of process | process_mem_ratio{name="process",} 0.0 |
+| process_threads_count | name="process" | important | The current number of threads | process_threads_count{name="process",} 11.0 |
+| process_status | name="process" | important | The process survivor status, 1.0 means survivorship, and 0.0 means terminated | process_status{name="process",} 1.0 |
#### 1.3.4.5. System
| Metric | Tag | level | Description | 示例 |
diff --git a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
index 459912c228..5902062276 100644
--- a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
+++ b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
@@ -79,13 +79,13 @@ IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通
#### 1.3.3.1. Interface
| Metric | Tag | level | 说明 | 示例 |
-| --------------------- | ------------------------ | --------- | ------------------- | -------------------------------------------- |
+| --------------------- | ------------------------ |-----------| ------------------- | -------------------------------------------- |
| entry_seconds_count | name="{{interface}}" | important | 接口累计访问次数 | entry_seconds_count{name="openSession",} 1.0 |
| entry_seconds_sum | name="{{interface}}" | important | 接口累计耗时(s) | entry_seconds_sum{name="openSession",} 0.024 |
| entry_seconds_max | name="{{interface}}" | important | 接口最大耗时(s) | entry_seconds_max{name="openSession",} 0.024 |
| quantity_total | name="pointsIn" | important | 系统累计写入点数 | quantity_total{name="pointsIn",} 1.0 |
-| thrift_connections | name="{{thriftService}}" | core | thrift当前连接数 | thrift_connections{name="RPC",} 1.0 |
-| thrift_active_threads | name="{{thriftThread}}" | core | thrift worker线程数 | thrift_active_threads{name="RPC",} 1.0 |
+| thrift_connections | name="{{thriftService}}" | important | thrift当前连接数 | thrift_connections{name="RPC",} 1.0 |
+| thrift_active_threads | name="{{thriftThread}}" | important | thrift worker线程数 | thrift_active_threads{name="RPC",} 1.0 |
#### 1.3.3.2. Task
@@ -136,18 +136,18 @@ IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通
| slot | name="{{storageGroupName}}",type="schemaSlotNumber/dataSlotNumber" | normal | database 的 schemaSlot/dataSlot个数 | slot{name="root.schema.sg1",type="schemaSlotNumber",} 2.0 |
##### 1.3.3.6.2. 弱一致性
-| Metric | Tag | level | 说明 | 示例 |
-| ------------ | -------------------------------------------------------------------------------------------- | --------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
-| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex" | core | 弱一致性对应region的写入index和同步index | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0 |
+| Metric | Tag | level | 说明 | 示例 |
+| ------------ | -------------------------------------------------------------------------------------------- | -------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
+| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex" | core | 弱一致性对应region的写入index和同步index | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0 |
| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex" | important | 弱一致性对应region的同步线程当前的同步index | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[7]",type="currentSyncIndex",} 1945.0 |
| mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | important | 弱一致性对应region的同步线程缓存的队列总大小 | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[9]",type="cachedRequestInMemoryQueue",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | core | 弱一致性对应region获取状态机锁的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | core | 弱一致性对应region状态机完成写前检查的耗时 | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | core | 弱一致性对应region状态机写入请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 |
-| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | core | 弱一致性对应region状态机尝试将请求放入同步队列的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 |
-| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | core | 弱一致性对应region状态机处理共识层请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 |
-| stage | name="multi_leader", region="{{region}}", type="constructBatch" | core | 弱一致性对应同步线程完成一个请求构造的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 |
-| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | core | 弱一致性对应同步线程完成一个请求同步的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | important | 弱一致性对应region获取状态机锁的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | important | 弱一致性对应region状态机完成写前检查的耗时 | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | important | 弱一致性对应region状态机写入请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 |
+| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | important | 弱一致性对应region状态机尝试将请求放入同步队列的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 |
+| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | important | 弱一致性对应region状态机处理共识层请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 |
+| stage | name="multi_leader", region="{{region}}", type="constructBatch" | important | 弱一致性对应同步线程完成一个请求构造的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 |
+| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | important | 弱一致性对应同步线程完成一个请求同步的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 |
### 1.3.4. IoTDB 预定义指标集
@@ -208,17 +208,17 @@ IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通
| logback_events_total | {level="trace/debug/info/warn/error",} | important | trace/debug/info/warn/error日志累计数量 | logback_events_total{level="warn",} 0.0 |
#### 1.3.4.4. 进程(Process)
-| Metric | Tag | level | 说明 | 示例 |
-| --------------------- | -------------- | ----- | ---------------------------------- | ----------------------------------------------- |
-| process_cpu_load | name="cpu" | core | process当前CPU占用率(%) | process_cpu_load{name="process",} 5.0 |
-| process_cpu_time | name="cpu" | core | process累计占用CPU时间(ns) | process_cpu_time{name="process",} 3.265625E9 |
-| process_max_mem | name="memory" | core | JVM最大可用内存 | process_max_mem{name="process",} 3.545759744E9 |
-| process_used_mem | name="memory" | core | JVM当前使用内存 | process_used_mem{name="process",} 4.6065456E7 |
-| process_total_mem | name="memory" | core | JVM当前已申请内存 | process_total_mem{name="process",} 2.39599616E8 |
-| process_free_mem | name="memory" | core | JVM当前剩余可用内存 | process_free_mem{name="process",} 1.94035584E8 |
-| process_mem_ratio | name="memory" | core | 进程的内存占用比例 | process_mem_ratio{name="process",} 0.0 |
-| process_threads_count | name="process" | core | 当前线程数 | process_threads_count{name="process",} 11.0 |
-| process_status | name="process" | core | 进程存活状态,1.0为存活,0.0为终止 | process_status{name="process",} 1.0 |
+| Metric | Tag | level | 说明 | 示例 |
+| --------------------- | -------------- |-----------| ---------------------------------- | ----------------------------------------------- |
+| process_cpu_load | name="cpu" | core | process当前CPU占用率(%) | process_cpu_load{name="process",} 5.0 |
+| process_cpu_time | name="cpu" | core | process累计占用CPU时间(ns) | process_cpu_time{name="process",} 3.265625E9 |
+| process_max_mem | name="memory" | core | JVM最大可用内存 | process_max_mem{name="process",} 3.545759744E9 |
+| process_used_mem | name="memory" | important | JVM当前使用内存 | process_used_mem{name="process",} 4.6065456E7 |
+| process_total_mem | name="memory" | core | JVM当前已申请内存 | process_total_mem{name="process",} 2.39599616E8 |
+| process_free_mem | name="memory" | core | JVM当前剩余可用内存 | process_free_mem{name="process",} 1.94035584E8 |
+| process_mem_ratio | name="memory" | important | 进程的内存占用比例 | process_mem_ratio{name="process",} 0.0 |
+| process_threads_count | name="process" | important | 当前线程数 | process_threads_count{name="process",} 11.0 |
+| process_status | name="process" | important | 进程存活状态,1.0为存活,0.0为终止 | process_status{name="process",} 1.0 |
#### 1.3.4.5. 系统(System)
| Metric | Tag | level | 说明 | 示例 |
diff --git a/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml b/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml
index f85dfece66..7262a478da 100644
--- a/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml
+++ b/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml
@@ -18,21 +18,19 @@
#
# whether enable the module
-enableMetric: false
+enableMetric: true
# Is stat performance of operation latency
enablePerformanceStat: false
# Multiple reporter, options: [JMX, PROMETHEUS, IOTDB], IOTDB is off by default
metricReporterList:
- - JMX
- - PROMETHEUS
# Type of monitor frame, options: [MICROMETER, DROPWIZARD]
monitorType: MICROMETER
# Level of metric level, options: [CORE, IMPORTANT, NORMAL, ALL]
-metricLevel: IMPORTANT
+metricLevel: CORE
# The period of the collection of some metrics in asynchronous way, such as tsfile size.
asyncCollectPeriodInSecond: 5
diff --git a/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml b/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml
index f85dfece66..7262a478da 100644
--- a/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml
+++ b/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml
@@ -18,21 +18,19 @@
#
# whether enable the module
-enableMetric: false
+enableMetric: true
# Is stat performance of operation latency
enablePerformanceStat: false
# Multiple reporter, options: [JMX, PROMETHEUS, IOTDB], IOTDB is off by default
metricReporterList:
- - JMX
- - PROMETHEUS
# Type of monitor frame, options: [MICROMETER, DROPWIZARD]
monitorType: MICROMETER
# Level of metric level, options: [CORE, IMPORTANT, NORMAL, ALL]
-metricLevel: IMPORTANT
+metricLevel: CORE
# The period of the collection of some metrics in asynchronous way, such as tsfile size.
asyncCollectPeriodInSecond: 5
diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java
index 74505cabf4..311b3238ed 100644
--- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java
+++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java
@@ -23,13 +23,13 @@ import org.apache.iotdb.metrics.utils.MetricLevel;
import org.apache.iotdb.metrics.utils.MonitorType;
import org.apache.iotdb.metrics.utils.ReporterType;
-import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Objects;
public class MetricConfig {
/** Is metric service enabled */
- private Boolean enableMetric = false;
+ private Boolean enableMetric = true;
/** Is stat performance of operations enabled */
private Boolean enablePerformanceStat = false;
@@ -38,11 +38,10 @@ public class MetricConfig {
private MonitorType monitorType = MonitorType.MICROMETER;
/** The list of reporters provide data for external system */
- private List<ReporterType> metricReporterList =
- Arrays.asList(ReporterType.JMX, ReporterType.PROMETHEUS);
+ private List<ReporterType> metricReporterList = Collections.emptyList();
/** The level of metric service */
- private MetricLevel metricLevel = MetricLevel.IMPORTANT;
+ private MetricLevel metricLevel = MetricLevel.CORE;
private Integer asyncCollectPeriodInSecond = 5;
diff --git a/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java b/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java
index a1c6eddcb0..b56e2e5596 100644
--- a/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java
+++ b/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java
@@ -37,6 +37,7 @@ import org.apache.iotdb.metrics.utils.MetricType;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tags;
+import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.ToLongFunction;
@@ -49,6 +50,7 @@ public class MicrometerMetricManager extends AbstractMetricManager {
public MicrometerMetricManager() {
meterRegistry = Metrics.globalRegistry;
+ Metrics.globalRegistry.add(new SimpleMeterRegistry());
}
@Override
diff --git a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java
index b97fe91149..6e9f5141ec 100644
--- a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java
@@ -39,7 +39,7 @@ public class MPPDataExchangeServiceMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_ACTIVE_THREADS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftServiceThread,
AbstractThriftServiceThread::getActiveThreadCount,
Tag.NAME.toString(),
diff --git a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java
index 85483dd8fd..fe63d041ea 100644
--- a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java
@@ -42,7 +42,7 @@ public class MppDataExchangeServiceThriftHandlerMetrics implements IMetricSet {
MetricService.getInstance()
.getOrCreateAutoGauge(
Metric.THRIFT_CONNECTIONS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftConnectionNumber,
AtomicLong::get,
Tag.NAME.toString(),
diff --git a/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java
index 9cbef97bd1..89715708b7 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java
@@ -41,7 +41,7 @@ public class DataNodeInternalRPCServiceMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_ACTIVE_THREADS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftServiceThread,
AbstractThriftServiceThread::getActiveThreadCount,
Tag.NAME.toString(),
diff --git a/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java
index b6f76636f7..59dbb96fcb 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java
@@ -40,7 +40,7 @@ public class RPCServiceMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_ACTIVE_THREADS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftServiceThread,
AbstractThriftServiceThread::getActiveThreadCount,
Tag.NAME.toString(),
diff --git a/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java
index 2c95c59eae..7c1cea0a46 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java
@@ -105,16 +105,17 @@ public class ProcessMetrics implements IMetricSet {
a -> runtime.freeMemory(),
Tag.NAME.toString(),
"process");
+ // TODO maybe following metrics can be removed
metricService.getOrCreateAutoGauge(
Metric.PROCESS_USED_MEM.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
this,
a -> getProcessUsedMemory(),
Tag.NAME.toString(),
"process");
metricService.getOrCreateAutoGauge(
Metric.PROCESS_MEM_RATIO.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
this,
a -> Math.round(getProcessMemoryRatio()),
Tag.NAME.toString(),
@@ -135,9 +136,10 @@ public class ProcessMetrics implements IMetricSet {
}
private void collectThreadInfo(AbstractMetricService metricService) {
+ // TODO maybe duplicated with thread info in jvm related metrics
metricService.getOrCreateAutoGauge(
Metric.PROCESS_THREADS_COUNT.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
this,
a -> getThreadsCount(),
Tag.NAME.toString(),
@@ -152,7 +154,7 @@ public class ProcessMetrics implements IMetricSet {
private void collectProcessStatusInfo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.PROCESS_STATUS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
this,
a -> (getProcessStatus()),
Tag.NAME.toString(),
diff --git a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java
index 94dcb0f78a..dcd00603fc 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java
@@ -40,7 +40,7 @@ public class InternalServiceThriftHandlerMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_CONNECTIONS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftConnectionNumber,
AtomicLong::get,
Tag.NAME.toString(),
diff --git a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java
index 74fc4f4312..588337c88d 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java
@@ -37,7 +37,7 @@ public class RPCServiceThriftHandlerMetrics implements IMetricSet {
public void bindTo(AbstractMetricService metricService) {
metricService.getOrCreateAutoGauge(
Metric.THRIFT_CONNECTIONS.toString(),
- MetricLevel.CORE,
+ MetricLevel.IMPORTANT,
thriftConnectionNumber,
AtomicLong::get,
Tag.NAME.toString(),