You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2021/04/21 01:23:43 UTC

[incubator-doris] branch master updated: [Metrics] Add metrics to monitor BE's agent task queue size (#5648)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d15fe05  [Metrics] Add metrics to monitor BE's agent task queue size (#5648)
d15fe05 is described below

commit d15fe05f3cb9c9424e78dacfd7ff7e39a1e99445
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Wed Apr 21 09:23:33 2021 +0800

    [Metrics] Add metrics to monitor BE's agent task queue size (#5648)
    
    * [Metrics] Add metrics to monitor BE's agent task queue size
    
    Sometimes, user's DDL or background task may last a long time,
    it's not easy to find out which procedure has problem.
    This patch add metric to monitor BE's agent task queue size,
    which would be helpful for troubleshooting.
    
    The raw metrics on BE looks like:
    doris_be_agent_task_queue_size{type="REPORT_OLAP_TABLE"} 0
    doris_be_agent_task_queue_size{type="REPORT_DISK_STATE"} 0
    doris_be_agent_task_queue_size{type="REPORT_TASK"} 0
    doris_be_agent_task_queue_size{type="CHECK_CONSISTENCY"} 0
    doris_be_agent_task_queue_size{type="DELETE"} 0
    doris_be_agent_task_queue_size{type="CLEAR_TRANSACTION_TASK"} 0
    doris_be_agent_task_queue_size{type="PUBLISH_VERSION"} 0
    doris_be_agent_task_queue_size{type="UPLOAD"} 0
    doris_be_agent_task_queue_size{type="DROP_TABLE"} 0
    doris_be_agent_task_queue_size{type="CREATE_TABLE"} 39
    doris_be_agent_task_queue_size{type="RELEASE_SNAPSHOT"} 0
    doris_be_agent_task_queue_size{type="STORAGE_MEDIUM_MIGRATE"} 245
    doris_be_agent_task_queue_size{type="CLONE"} 0
    doris_be_agent_task_queue_size{type="MOVE"} 0
    doris_be_agent_task_queue_size{type="ALTER_TABLE"} 0
    doris_be_agent_task_queue_size{type="DOWNLOAD"} 0
    doris_be_agent_task_queue_size{type="PUSH"} 0
    doris_be_agent_task_queue_size{type="UPDATE_TABLET_META_INFO"} 0
    doris_be_agent_task_queue_size{type="MAKE_SNAPSHOT"} 0
    
    * fix typo
---
 be/src/agent/task_worker_pool.cpp | 18 ++++++++++++++++--
 be/src/agent/task_worker_pool.h   |  3 +++
 be/src/util/doris_metrics.h       | 21 ++++++++++++---------
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index abe918d..85cff4e 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -68,6 +68,8 @@ using std::vector;
 
 namespace doris {
 
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(agent_task_queue_size, MetricUnit::NOUNIT);
+
 const uint32_t TASK_FINISH_MAX_RETRY = 3;
 const uint32_t PUBLISH_VERSION_MAX_RETRY = 3;
 const uint32_t REPORT_TASK_WORKER_COUNT = 1;
@@ -81,8 +83,7 @@ FrontendServiceClientCache TaskWorkerPool::_master_service_client_cache;
 
 TaskWorkerPool::TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* env,
                                const TMasterInfo& master_info)
-        : _name(strings::Substitute("TaskWorkerPool.$0", TYPE_STRING(task_worker_type))),
-          _master_info(master_info),
+        : _master_info(master_info),
           _agent_utils(new AgentUtils()),
           _master_client(new MasterServerClient(_master_info, &_master_service_client_cache)),
           _env(env),
@@ -93,11 +94,24 @@ TaskWorkerPool::TaskWorkerPool(const TaskWorkerType task_worker_type, ExecEnv* e
     _backend.__set_host(BackendOptions::get_localhost());
     _backend.__set_be_port(config::be_port);
     _backend.__set_http_port(config::webserver_port);
+
+    string task_worker_type_name = TYPE_STRING(task_worker_type);
+    _name = strings::Substitute("TaskWorkerPool.$0", task_worker_type_name);
+
+    _metric_entity = DorisMetrics::instance()->metric_registry()->register_entity(
+            task_worker_type_name, {{"type", task_worker_type_name}});
+    REGISTER_ENTITY_HOOK_METRIC(_metric_entity, this, agent_task_queue_size, [this]() {
+        lock_guard<Mutex> lock(_worker_thread_lock);
+        return _tasks.size();
+    });
 }
 
 TaskWorkerPool::~TaskWorkerPool() {
     _stop_background_threads_latch.count_down();
     stop();
+
+    DEREGISTER_ENTITY_HOOK_METRIC(_metric_entity, agent_task_queue_size);
+    DorisMetrics::instance()->metric_registry()->deregister_entity(_metric_entity);
 }
 
 void TaskWorkerPool::start() {
diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h
index cfbd6df..04fbe78 100644
--- a/be/src/agent/task_worker_pool.h
+++ b/be/src/agent/task_worker_pool.h
@@ -221,6 +221,9 @@ private:
     std::unique_ptr<ThreadPool> _thread_pool;
     std::deque<TAgentTaskRequest> _tasks;
 
+    std::shared_ptr<MetricEntity> _metric_entity;
+    UIntGauge* agent_task_queue_size;
+
     uint32_t _worker_count;
     TaskWorkerType _task_worker_type;
 
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 5b76db1..cab2873 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -28,16 +28,19 @@
 
 namespace doris {
 
+#define REGISTER_ENTITY_HOOK_METRIC(entity, owner, metric, func)                                \
+    owner->metric = (UIntGauge*)(entity->register_metric<UIntGauge>(&METRIC_##metric));         \
+    entity->register_hook(#metric, [&]() { owner->metric->set_value(func()); });
+
 #define REGISTER_HOOK_METRIC(metric, func)                                                      \
-    DorisMetrics::instance()->metric =                                                          \
-            (UIntGauge*)(DorisMetrics::instance()->server_entity()->register_metric<UIntGauge>( \
-                    &METRIC_##metric));                                                         \
-    DorisMetrics::instance()->server_entity()->register_hook(                                   \
-            #metric, [&]() { DorisMetrics::instance()->metric->set_value(func()); });
-
-#define DEREGISTER_HOOK_METRIC(name)                                              \
-    DorisMetrics::instance()->server_entity()->deregister_metric(&METRIC_##name); \
-    DorisMetrics::instance()->server_entity()->deregister_hook(#name);
+    REGISTER_ENTITY_HOOK_METRIC(DorisMetrics::instance()->server_entity(), DorisMetrics::instance(), metric, func)
+
+#define DEREGISTER_ENTITY_HOOK_METRIC(entity, name)                                             \
+    entity->deregister_metric(&METRIC_##name);                                                  \
+    entity->deregister_hook(#name);
+
+#define DEREGISTER_HOOK_METRIC(name)                                                            \
+    DEREGISTER_ENTITY_HOOK_METRIC(DorisMetrics::instance()->server_entity(), name)
 
 class DorisMetrics {
 public:

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org