You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2023/01/10 10:47:05 UTC

[doris] branch master updated: [Enhancement](metric) add current edit log metric (#15657)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a67cea2d27 [Enhancement](metric) add current edit log metric (#15657)
a67cea2d27 is described below

commit a67cea2d27935b08f95f7e6113fc84c76a32551d
Author: yongjinhou <10...@users.noreply.github.com>
AuthorDate: Tue Jan 10 18:46:57 2023 +0800

    [Enhancement](metric) add current edit log metric (#15657)
---
 .../admin-manual/maint-monitor/monitor-metrics/metrics.md |  4 +++-
 .../src/main/java/org/apache/doris/journal/Journal.java   |  3 +++
 .../java/org/apache/doris/journal/bdbje/BDBJEJournal.java |  6 ++++++
 .../java/org/apache/doris/journal/local/LocalJournal.java |  5 +++++
 .../src/main/java/org/apache/doris/master/Checkpoint.java |  2 ++
 .../java/org/apache/doris/metric/LongCounterMetric.java   |  9 +++++++++
 .../src/main/java/org/apache/doris/metric/MetricRepo.java | 15 +++++++++++++--
 .../src/main/java/org/apache/doris/persist/EditLog.java   | 10 +++++++++-
 8 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/docs/zh-CN/docs/admin-manual/maint-monitor/monitor-metrics/metrics.md b/docs/zh-CN/docs/admin-manual/maint-monitor/monitor-metrics/metrics.md
index a55cff98f7..a8753314c6 100644
--- a/docs/zh-CN/docs/admin-manual/maint-monitor/monitor-metrics/metrics.md
+++ b/docs/zh-CN/docs/admin-manual/maint-monitor/monitor-metrics/metrics.md
@@ -79,9 +79,11 @@ curl http://be_host:webserver_port/metrics?type=json
 |`doris_fe_counter_hit_sql_block_rule`|| Num| 被 SQL BLOCK RULE 拦截的查询数量 |  | |
 |`doris_fe_edit_log_clean`| {type="failed"} | Num| 清理历史元数据日志失败的次数 | 不应失败,如失败,需人工介入 | P0|
 || {type="success"} | Num| 清理历史元数据日志成功的次数 | |
-|`doris_fe_edit_log`| {type="bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
+|`doris_fe_edit_log`| {type="accumulated_bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
+|| {type="current_bytes"} |字节 | 元数据日志当前值 | 用于监控editlog 大小。如果大小超限,需人工介入 | P0 |
 || {type="read"} |Num| 元数据日志读取次数的计数 | 通过斜率观察元数据读取频率是否正常 |P0 |
 || {type="write"} |Num | 元数据日志写入次数的计数 |通过斜率观察元数据写入频率是否正常  |P0 |
+|| {type="current"} |Num | 元数据日志当前数量 |用于监控editlog 数量。如果数量超限,需人工介入  |P0 |
 |`doris_fe_editlog_write_latency_ms`| | 毫秒| 元数据日志写入延迟的百分位统计。如 {quantile="0.75"} 表示 75 分位的写入延迟 | |
 |`doris_fe_image_clean`|{type="failed"} | Num | 清理历史元数据镜像文件失败的次数 | 不应失败,如失败,需人工介入 | P0|
 ||{type="success"} | Num | 清理历史元数据镜像文件成功的次数 | |
diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/Journal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/Journal.java
index bff5d72c7b..973b224f02 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/Journal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/Journal.java
@@ -49,6 +49,9 @@ public interface Journal {
     // Write a journal and sync to disk
     public void write(short op, Writable writable) throws IOException;
 
+    // Get current journal number
+    public long getJournalNum();
+
     // Delete journals whose max id is less than deleteToJournalId
     public void deleteJournals(long deleteJournalToId);
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index 74cd8052bb..82ad6a8cda 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -135,6 +135,7 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
         DatabaseEntry theData = new DatabaseEntry(buffer.getData());
         if (MetricRepo.isInit) {
             MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
+            MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
         }
         LOG.debug("opCode = {}, journal size = {}", op, theData.getSize());
         // Write the key value pair to bdb.
@@ -367,6 +368,11 @@ public class BDBJEJournal implements Journal { // CHECKSTYLE IGNORE THIS LINE: B
                 helperNode.first + ":" + helperNode.second, Env.getServingEnv().isElectable());
     }
 
+    @Override
+    public long getJournalNum() {
+        return currentJournalDB.count();
+    }
+
     @Override
     public void deleteJournals(long deleteToJournalId) {
         List<Long> dbNames = getDatabaseNames();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/local/LocalJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/local/LocalJournal.java
index e54237a7bc..0b6dab4fb5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/local/LocalJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/local/LocalJournal.java
@@ -108,6 +108,11 @@ public class LocalJournal implements Journal {
         return 0;
     }
 
+    @Override
+    public long getJournalNum() {
+        return 0;
+    }
+
     @Override
     public void close() {
         if (outputStream == null) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
index cab31d340f..fe877ce9aa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
@@ -265,6 +265,8 @@ public class Checkpoint extends MasterDaemon {
                 editLog.deleteJournals(deleteVersion + 1);
                 if (MetricRepo.isInit) {
                     MetricRepo.COUNTER_EDIT_LOG_CLEAN_SUCCESS.increase(1L);
+                    MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.reset();
+                    MetricRepo.COUNTER_EDIT_LOG_CURRENT.update(editLog.getEditLogNum());
                 }
                 LOG.info("journals <= {} are deleted. image version {}, other nodes min version {}",
                         deleteVersion, checkPointVersion, minOtherNodesJournalId);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/LongCounterMetric.java b/fe/fe-core/src/main/java/org/apache/doris/metric/LongCounterMetric.java
index 1dfa542878..70122ede84 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/LongCounterMetric.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/LongCounterMetric.java
@@ -38,4 +38,13 @@ public class LongCounterMetric extends CounterMetric<Long> {
     public Long getValue() {
         return value.longValue();
     }
+
+    public void reset() {
+        value.reset();
+    }
+
+    public void update(Long delta) {
+        value.reset();
+        value.add(delta);
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index 85c4479fbc..f246d519b8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -86,7 +86,9 @@ public final class MetricRepo {
 
     public static LongCounterMetric COUNTER_EDIT_LOG_WRITE;
     public static LongCounterMetric COUNTER_EDIT_LOG_READ;
+    public static LongCounterMetric COUNTER_EDIT_LOG_CURRENT;
     public static LongCounterMetric COUNTER_EDIT_LOG_SIZE_BYTES;
+    public static LongCounterMetric COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES;
     public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_SUCCESS;
     public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_FAILED;
     public static Histogram HISTO_EDIT_LOG_WRITE_LATENCY;
@@ -345,9 +347,18 @@ public final class MetricRepo {
                 "counter of edit log read from bdbje");
         COUNTER_EDIT_LOG_READ.addLabel(new MetricLabel("type", "read"));
         DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_READ);
-        COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES, "size of edit log");
-        COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "bytes"));
+        COUNTER_EDIT_LOG_CURRENT = new LongCounterMetric("edit_log", MetricUnit.OPERATIONS,
+                "counter of current edit log in bdbje");
+        COUNTER_EDIT_LOG_CURRENT.addLabel(new MetricLabel("type", "current"));
+        DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_CURRENT);
+        COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
+                "size of accumulated edit log");
+        COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "accumulated_bytes"));
         DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_SIZE_BYTES);
+        COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
+                "size of current edit log");
+        COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "current_bytes"));
+        DORIS_METRIC_REGISTER.addMetrics(COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES);
         HISTO_EDIT_LOG_WRITE_LATENCY = METRIC_REGISTER.histogram(
             MetricRegistry.name("editlog", "write", "latency", "ms"));
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
index 9254583294..6ac6ab0700 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
@@ -1044,6 +1044,7 @@ public class EditLog {
         totalTimeTransactions += (end - start);
         if (MetricRepo.isInit) {
             MetricRepo.HISTO_EDIT_LOG_WRITE_LATENCY.update((end - start));
+            MetricRepo.COUNTER_EDIT_LOG_CURRENT.increase(1L);
         }
 
         if (LOG.isDebugEnabled()) {
@@ -1066,10 +1067,17 @@ public class EditLog {
     /**
      * Return the size of the current EditLog
      */
-    synchronized long getEditLogSize() throws IOException {
+    public synchronized long getEditLogSize() throws IOException {
         return editStream.length();
     }
 
+    /**
+     * Return the number of the current EditLog
+     */
+    public synchronized long getEditLogNum() throws IOException {
+        return journal.getJournalNum();
+    }
+
     public synchronized long getTxId() {
         return txId;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org