You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2021/04/21 01:15:13 UTC

[incubator-doris] branch master updated: [Metrics] Add some large memtrackers' metric (#5614)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new be733cf  [Metrics] Add some large memtrackers' metric (#5614)
be733cf is described below

commit be733cfa9c53e73f0b122ff7dec88a3919afb2b2
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Wed Apr 21 09:15:04 2021 +0800

    [Metrics] Add some large memtrackers' metric (#5614)
    
    MemTracker can provide memory consumption for us to find out which
    module consume more memory, but it's just a current value, this patch
    add metrics for some large memory consumers, then we can find out
    which module consume more memory in timeline, it would be useful to
    troubleshoot OOM problems and optimize configs.
---
 be/src/olap/schema_change.cpp        | 14 +++++++++++++-
 be/src/olap/schema_change.h          | 10 ++++++----
 be/src/olap/storage_engine.cpp       | 10 +++++-----
 be/src/olap/tablet_manager.cpp       | 11 +++++++++++
 be/src/olap/tablet_manager.h         |  4 +---
 be/src/runtime/exec_env_init.cpp     |  9 +++++++++
 be/src/runtime/load_channel_mgr.cpp  |  6 ++++++
 be/src/util/doris_metrics.h          |  6 +++++-
 be/src/util/metrics.h                |  3 +++
 be/test/exec/hash_table_test.cpp     |  8 ++++----
 be/test/exprs/topn_function_test.cpp |  4 +++-
 be/test/olap/tablet_mgr_test.cpp     | 36 +++++++++++++++++-------------------
 12 files changed, 83 insertions(+), 38 deletions(-)

diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index a46652e..43d3a2a 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -49,6 +49,9 @@ using std::vector;
 
 namespace doris {
 
+DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(schema_change_mem_consumption, MetricUnit::BYTES, "",
+                                   mem_consumption, Labels({{"type", "schema_change"}}));
+
 class RowBlockSorter {
 public:
     explicit RowBlockSorter(RowBlockAllocator* allocator);
@@ -1346,6 +1349,16 @@ bool SchemaChangeWithSorting::_external_sorting(vector<RowsetSharedPtr>& src_row
     return true;
 }
 
+SchemaChangeHandler::SchemaChangeHandler() : _mem_tracker(MemTracker::CreateTracker(-1, "SchemaChange")) {
+    REGISTER_HOOK_METRIC(schema_change_mem_consumption, [this]() {
+      return _mem_tracker->consumption();
+    });
+}
+
+SchemaChangeHandler::~SchemaChangeHandler() {
+    DEREGISTER_HOOK_METRIC(schema_change_mem_consumption);
+}
+
 OLAPStatus SchemaChangeHandler::process_alter_tablet_v2(const TAlterTabletReqV2& request) {
     LOG(INFO) << "begin to do request alter tablet: base_tablet_id=" << request.base_tablet_id
               << ", base_schema_hash=" << request.base_schema_hash
@@ -2173,5 +2186,4 @@ OLAPStatus SchemaChangeHandler::_validate_alter_result(TabletSharedPtr new_table
     }
 }
 
-SchemaChangeHandler SchemaChangeHandler::_s_instance;
 } // namespace doris
diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h
index 452c341..ce5f107 100644
--- a/be/src/olap/schema_change.h
+++ b/be/src/olap/schema_change.h
@@ -182,7 +182,10 @@ private:
 
 class SchemaChangeHandler {
 public:
-    static SchemaChangeHandler* instance() { return &_s_instance; }
+    static SchemaChangeHandler* instance() {
+        static SchemaChangeHandler instance;
+        return &instance;
+    }
 
     OLAPStatus schema_version_convert(TabletSharedPtr base_tablet, TabletSharedPtr new_tablet,
                                       RowsetSharedPtr* base_rowset, RowsetSharedPtr* new_rowset);
@@ -242,13 +245,12 @@ private:
                                            const TabletColumn& column_schema,
                                            const std::string& value);
 private:
-    SchemaChangeHandler() : _mem_tracker(MemTracker::CreateTracker(-1, "SchemaChange")) {}
-    virtual ~SchemaChangeHandler() {}
+    SchemaChangeHandler();
+    virtual ~SchemaChangeHandler();
     SchemaChangeHandler(const SchemaChangeHandler&) = delete;
     SchemaChangeHandler& operator=(const SchemaChangeHandler&) = delete;
 
     std::shared_ptr<MemTracker> _mem_tracker;
-    static SchemaChangeHandler _s_instance;
 };
 
 using RowBlockDeleter = std::function<void(RowBlock*)>;
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 0cf3801..7327350 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -84,14 +84,14 @@ using strings::Substitute;
 namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(unused_rowsets_count, MetricUnit::ROWSETS);
-DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_mem_current_consumption, MetricUnit::BYTES);
+DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(compaction_mem_consumption, MetricUnit::BYTES, "",
+                                   mem_consumption, Labels({{"type", "compaction"}}));
 
 StorageEngine* StorageEngine::_s_instance = nullptr;
 
 static Status _validate_options(const EngineOptions& options) {
     if (options.store_paths.empty()) {
         return Status::InternalError("store paths is empty");
-        ;
     }
     return Status::OK();
 }
@@ -129,7 +129,7 @@ StorageEngine::StorageEngine(const EngineOptions& options)
         MutexLock lock(&_gc_mutex);
         return _unused_rowsets.size();
     });
-    REGISTER_HOOK_METRIC(compaction_mem_current_consumption, [this]() {
+    REGISTER_HOOK_METRIC(compaction_mem_consumption, [this]() {
         return _compaction_mem_tracker->consumption();
         // We can get each compaction's detail usage
         // LOG(INFO) << _compaction_mem_tracker=>LogUsage(2);
@@ -138,10 +138,10 @@ StorageEngine::StorageEngine(const EngineOptions& options)
 
 StorageEngine::~StorageEngine() {
     DEREGISTER_HOOK_METRIC(unused_rowsets_count);
-    DEREGISTER_HOOK_METRIC(compaction_mem_current_consumption);
+    DEREGISTER_HOOK_METRIC(compaction_mem_consumption);
     _clear();
 
-    if(_compaction_thread_pool){
+    if (_compaction_thread_pool) {
         _compaction_thread_pool->shutdown();
     }
 }
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 8ad683f..b39b375 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -62,6 +62,9 @@ using strings::Substitute;
 
 namespace doris {
 
+DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTES, "",
+                                   mem_consumption, Labels({{"type", "tablet_meta"}}));
+
 static bool _cmp_tablet_by_create_time(const TabletSharedPtr& a, const TabletSharedPtr& b) {
     return a->creation_time() < b->creation_time();
 }
@@ -77,6 +80,14 @@ TabletManager::TabletManager(int32_t tablet_map_lock_shard_size)
     for (auto& tablets_shard : _tablets_shards) {
         tablets_shard.lock = std::unique_ptr<RWMutex>(new RWMutex());
     }
+    REGISTER_HOOK_METRIC(tablet_meta_mem_consumption, [this]() {
+      return _mem_tracker->consumption();
+    });
+}
+
+TabletManager::~TabletManager() {
+    _mem_tracker->Release(_mem_tracker->consumption());
+    DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption);
 }
 
 OLAPStatus TabletManager::_add_tablet_unlocked(TTabletId tablet_id, SchemaHash schema_hash,
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 6774298..3427b93 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -47,9 +47,7 @@ class DataDir;
 class TabletManager {
 public:
     TabletManager(int32_t tablet_map_lock_shard_size);
-    ~TabletManager() {
-        _mem_tracker->Release(_mem_tracker->consumption());
-    }
+    ~TabletManager();
 
     bool check_tablet_id_exist(TTabletId tablet_id);
 
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index c9b9ad1..39b9a2a 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -66,6 +66,8 @@ namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(scanner_thread_pool_queue_size, MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(etl_thread_pool_queue_size, MetricUnit::NOUNIT);
+DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(query_mem_consumption, MetricUnit::BYTES, "",
+                                   mem_consumption, Labels({{"type", "query"}}));
 
 Status ExecEnv::init(ExecEnv* env, const std::vector<StorePath>& store_paths) {
     return env->_init(store_paths);
@@ -199,6 +201,10 @@ Status ExecEnv::_init_mem_tracker() {
     int32_t index_page_cache_percentage = config::index_page_cache_percentage;
     StoragePageCache::create_global_cache(storage_cache_limit, index_page_cache_percentage);
 
+    REGISTER_HOOK_METRIC(query_mem_consumption, [this]() {
+      return _mem_tracker->consumption();
+    });
+
     // TODO(zc): The current memory usage configuration is a bit confusing,
     // we need to sort out the use of memory
     return Status::OK();
@@ -260,6 +266,9 @@ void ExecEnv::_destroy() {
     SAFE_DELETE(_routine_load_task_executor);
     SAFE_DELETE(_external_scan_context_mgr);
     SAFE_DELETE(_heartbeat_flags);
+
+    DEREGISTER_HOOK_METRIC(query_mem_consumption);
+
     _is_init = false;
 }
 
diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp
index b3b79cc..15e2c08 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -28,6 +28,8 @@
 namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(load_channel_count, MetricUnit::NOUNIT);
+DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_mem_consumption, MetricUnit::BYTES, "",
+                                   mem_consumption, Labels({{"type", "load"}}));
 
 // Calculate the total memory limit of all load tasks on this BE
 static int64_t calc_process_max_load_memory(int64_t process_mem_limit) {
@@ -73,6 +75,7 @@ LoadChannelMgr::LoadChannelMgr() : _stop_background_threads_latch(1) {
 
 LoadChannelMgr::~LoadChannelMgr() {
     DEREGISTER_HOOK_METRIC(load_channel_count);
+    DEREGISTER_HOOK_METRIC(load_mem_consumption);
     _stop_background_threads_latch.count_down();
     if (_load_channels_clean_thread) {
         _load_channels_clean_thread->join();
@@ -83,6 +86,9 @@ LoadChannelMgr::~LoadChannelMgr() {
 Status LoadChannelMgr::init(int64_t process_mem_limit) {
     int64_t load_mem_limit = calc_process_max_load_memory(process_mem_limit);
     _mem_tracker = MemTracker::CreateTracker(load_mem_limit, "Load");
+    REGISTER_HOOK_METRIC(load_mem_consumption, [this]() {
+        return _mem_tracker->consumption();
+    });
     RETURN_IF_ERROR(_start_bg_worker());
     return Status::OK();
 }
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 7e548c8..5b76db1 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -173,7 +173,11 @@ public:
     UIntGauge* brpc_endpoint_stub_count;
     UIntGauge* tablet_writer_count;
 
-    UIntGauge* compaction_mem_current_consumption;
+    UIntGauge* compaction_mem_consumption;
+    UIntGauge* load_mem_consumption;
+    UIntGauge* query_mem_consumption;
+    UIntGauge* schema_change_mem_consumption;
+    UIntGauge* tablet_meta_mem_consumption;
 
     // Cache metrics
     UIntGauge* query_cache_memory_total_byte;
diff --git a/be/src/util/metrics.h b/be/src/util/metrics.h
index a9a81d2..bffc46c 100644
--- a/be/src/util/metrics.h
+++ b/be/src/util/metrics.h
@@ -283,6 +283,9 @@ public:
 #define DEFINE_GAUGE_METRIC_PROTOTYPE_3ARG(name, unit, desc) \
     DEFINE_METRIC_PROTOTYPE(name, MetricType::GAUGE, unit, desc, "", Labels(), false)
 
+#define DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(name, unit, desc, group, labels) \
+    DEFINE_METRIC_PROTOTYPE(name, MetricType::GAUGE, unit, desc, #group, labels, false)
+
 #define INT_COUNTER_METRIC_REGISTER(entity, metric) \
     metric = (IntCounter*)(entity->register_metric<IntCounter>(&METRIC_##metric))
 
diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp
index 3a9a0e1..b2c1f72 100644
--- a/be/test/exec/hash_table_test.cpp
+++ b/be/test/exec/hash_table_test.cpp
@@ -40,6 +40,7 @@
 #include "util/cpu_info.h"
 #include "util/runtime_profile.h"
 #include "util/time.h"
+#include "test_util/test_util.h"
 
 namespace doris {
 
@@ -309,7 +310,7 @@ TEST_F(HashTableTest, ScanTest) {
 // This test continues adding to the hash table to trigger the resize code paths
 TEST_F(HashTableTest, GrowTableTest) {
     int build_row_val = 0;
-    int num_to_add = 4;
+    int num_to_add = LOOP_LESS_OR_MORE(2, 4);
     int expected_size = 0;
 
     std::shared_ptr<MemTracker> mem_tracker =
@@ -321,8 +322,7 @@ TEST_F(HashTableTest, GrowTableTest) {
                          mem_tracker, num_buckets);
     EXPECT_FALSE(mem_tracker->limit_exceeded());
 
-    // This inserts about 5M entries
-    for (int i = 0; i < 20; ++i) {
+    for (int i = 0; i < LOOP_LESS_OR_MORE(1, 20); ++i) {
         for (int j = 0; j < num_to_add; ++build_row_val, ++j) {
             hash_table.insert(create_tuple_row(build_row_val));
         }
@@ -333,7 +333,7 @@ TEST_F(HashTableTest, GrowTableTest) {
     }
     LOG(INFO) << "consume:" << mem_tracker->consumption() << ",expected_size:" << expected_size;
 
-    EXPECT_TRUE(mem_tracker->limit_exceeded());
+    EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), mem_tracker->limit_exceeded());
 
     // Validate that we can find the entries
     for (int i = 0; i < expected_size * 5; i += 100000) {
diff --git a/be/test/exprs/topn_function_test.cpp b/be/test/exprs/topn_function_test.cpp
index ac2c9e9..2f0e201 100644
--- a/be/test/exprs/topn_function_test.cpp
+++ b/be/test/exprs/topn_function_test.cpp
@@ -136,7 +136,8 @@ void test_topn_accuracy(FunctionContext* ctx, int key_space, int space_expand_ra
     topn_dst->sort_retain(TOPN_NUM, &topn_sort_vec);
 
     uint32_t error = 0;
-    for (uint32_t i = 0; i < TOPN_NUM; ++i) {
+    int min_size = std::min(accuracy_sort_vec.size(), topn_sort_vec.size());
+    for (uint32_t i = 0; i < min_size; ++i) {
         Counter& accuracy_counter = accuracy_sort_vec[i];
         Counter& topn_counter = topn_sort_vec[i];
         if (accuracy_counter.get_count() != topn_counter.get_count()) {
@@ -146,6 +147,7 @@ void test_topn_accuracy(FunctionContext* ctx, int key_space, int space_expand_ra
             LOG(INFO) << "topn counter : (" << topn_counter.get_item() << ", " << topn_counter.get_count() << ")";
         }
     }
+    error += std::abs((int32_t)(accuracy_sort_vec.size() - topn_sort_vec.size()));
     LOG(INFO) << "Total errors : " << error;
     TopNFunctions::topn_finalize(ctx, dst);
 }
diff --git a/be/test/olap/tablet_mgr_test.cpp b/be/test/olap/tablet_mgr_test.cpp
index 682a35a..9a5eb86 100644
--- a/be/test/olap/tablet_mgr_test.cpp
+++ b/be/test/olap/tablet_mgr_test.cpp
@@ -48,7 +48,6 @@ static StorageEngine* k_engine = nullptr;
 class TabletMgrTest : public testing::Test {
 public:
     virtual void SetUp() {
-        string test_engine_data_path = "./be/test/olap/test_data/converter_test_data/data";
         _engine_data_path = "./be/test/olap/test_data/converter_test_data/tmp";
         std::filesystem::remove_all(_engine_data_path);
         FileUtils::create_dir(_engine_data_path);
@@ -66,16 +65,7 @@ public:
 
         _data_dir = new DataDir(_engine_data_path, 1000000000);
         _data_dir->init();
-        string tmp_data_path = _engine_data_path + "/data";
-        if (std::filesystem::exists(tmp_data_path)) {
-            std::filesystem::remove_all(tmp_data_path);
-        }
-        copy_dir(test_engine_data_path, tmp_data_path);
-        _tablet_id = 15007;
-        _schema_hash = 368169781;
-        _tablet_data_path = tmp_data_path + "/" + std::to_string(0) + "/" +
-                            std::to_string(_tablet_id) + "/" + std::to_string(_schema_hash);
-        _tablet_mgr.reset(new TabletManager(1));
+        _tablet_mgr = k_engine->tablet_manager();
     }
 
     virtual void TearDown() {
@@ -86,13 +76,9 @@ public:
     }
 
 private:
-    DataDir* _data_dir;
-    std::string _json_rowset_meta;
+    DataDir* _data_dir = nullptr;
     std::string _engine_data_path;
-    int64_t _tablet_id;
-    int32_t _schema_hash;
-    string _tablet_data_path;
-    std::unique_ptr<TabletManager> _tablet_mgr;
+    TabletManager* _tablet_mgr = nullptr;
 };
 
 TEST_F(TabletMgrTest, CreateTablet) {
@@ -138,6 +124,12 @@ TEST_F(TabletMgrTest, CreateTablet) {
     create_tablet_req.__set_tablet_schema(tablet_schema);
     create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs);
     ASSERT_TRUE(create_st == OLAP_ERR_CE_TABLET_ID_EXIST);
+
+    OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 3333, false);
+    ASSERT_TRUE(drop_st == OLAP_SUCCESS);
+    tablet.reset();
+    OLAPStatus trash_st = _tablet_mgr->start_trash_sweep();
+    ASSERT_TRUE(trash_st == OLAP_SUCCESS);
 }
 
 TEST_F(TabletMgrTest, CreateTabletWithSequence) {
@@ -169,7 +161,6 @@ TEST_F(TabletMgrTest, CreateTabletWithSequence) {
     tablet_schema.__set_storage_type(TStorageType::COLUMN);
     tablet_schema.__set_columns(cols);
     tablet_schema.__set_sequence_col_idx(1);
-
     TCreateTabletReq create_tablet_req;
     create_tablet_req.__set_tablet_schema(tablet_schema);
     create_tablet_req.__set_tablet_id(111);
@@ -179,15 +170,22 @@ TEST_F(TabletMgrTest, CreateTabletWithSequence) {
     data_dirs.push_back(_data_dir);
     OLAPStatus create_st = _tablet_mgr->create_tablet(create_tablet_req, data_dirs);
     ASSERT_TRUE(create_st == OLAP_SUCCESS);
+
     TabletSharedPtr tablet = _tablet_mgr->get_tablet(111, 3333);
     ASSERT_TRUE(tablet != nullptr);
     // check dir exist
     bool dir_exist = FileUtils::check_exist(tablet->tablet_path());
-    ASSERT_TRUE(dir_exist);
+    ASSERT_TRUE(dir_exist) << tablet->tablet_path();
     // check meta has this tablet
     TabletMetaSharedPtr new_tablet_meta(new TabletMeta());
     OLAPStatus check_meta_st = TabletMetaManager::get_meta(_data_dir, 111, 3333, new_tablet_meta);
     ASSERT_TRUE(check_meta_st == OLAP_SUCCESS);
+
+    OLAPStatus drop_st = _tablet_mgr->drop_tablet(111, 3333, false);
+    ASSERT_TRUE(drop_st == OLAP_SUCCESS);
+    tablet.reset();
+    OLAPStatus trash_st = _tablet_mgr->start_trash_sweep();
+    ASSERT_TRUE(trash_st == OLAP_SUCCESS);
 }
 
 TEST_F(TabletMgrTest, DropTablet) {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org