You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2020/02/11 12:38:55 UTC

[incubator-doris] branch master updated: Improve the triggering strategy of BE report (#2881)

This is an automated email from the ASF dual-hosted git repository.

lingbin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5440e19  Improve the triggering strategy of BE report (#2881)
5440e19 is described below

commit 5440e19d01e1e1d1454b713424f86f1dcacbfbae
Author: LingBin <li...@gmail.com>
AuthorDate: Tue Feb 11 20:38:44 2020 +0800

    Improve the triggering strategy of BE report (#2881)
    
    Currently, the report from BE to FE is completed in the background
    threads of `AgentServer` (`report_tablet_thread` and
    `report_disk_stat_thread`).  These two threads will sleep and be in
    a standby state after each report, if there is any need to report
    immediately, they will be notified and wake up immediately to report.
    
    For example, when background thread (`disk_monitor_thread`) in
    `StorageEngine` finds some tablets were deleted, it will notify
    `AgentServer` to trigger a report immediately.
    
    In the current implementation, in order to report ASAP, a local variable
    (`_is_drop_tables`) and two other flags are used to record whether
    reporting is needed, and then `StorageEngine::disk_monitor_thread` checks
    the value of this variable every time it runs, to determine whether it
    needs to be triggered Reporting. This is actually superfluous, and it
    may result in untimely notifications, as shown below:
    
    ```
    (thread_1)        (thread_2)
    disk-monitor     disk-stat-reporter
        |                  |
        |               reporting
        |                  |
      notify_1             |
        |                  |
        |                wait_for_notify(will wait until timeout or next notification)
        |                  |
        V                  V
    ```
    
    When `report_tablet_thread` has not started waiting,
    `StorageEngine::disk_monitor_thread` triggers a notification, so this
    notification will not be received by `report_tablet_thread`,
    resulting in the BE not reporting to the FE until the lock times out
    or the next round of `disk_monitor_thread` detection.
    
    This change restructures the triggering implementation, and solves the above problem.
    
    This change also changes some methods(that do not need to be public) to private.
---
 be/src/agent/heartbeat_server.cpp |   6 +-
 be/src/olap/olap_server.cpp       |  18 ++---
 be/src/olap/storage_engine.cpp    |  62 +++++++---------
 be/src/olap/storage_engine.h      | 147 ++++++++++++++++----------------------
 be/src/olap/tablet_manager.cpp    |   2 +-
 be/src/olap/tablet_manager.h      |   3 +-
 6 files changed, 101 insertions(+), 137 deletions(-)

diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp
index 82ab98d..081583a 100644
--- a/be/src/agent/heartbeat_server.cpp
+++ b/be/src/agent/heartbeat_server.cpp
@@ -72,9 +72,7 @@ void HeartbeatServer::heartbeat(
     }
 }
 
-Status HeartbeatServer::_heartbeat(
-        const TMasterInfo& master_info) {
-    
+Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) {
     std::lock_guard<std::mutex> lk(_hb_mtx);
 
     if (master_info.__isset.backend_ip) {
@@ -155,7 +153,7 @@ Status HeartbeatServer::_heartbeat(
 
     if (need_report) {
         LOG(INFO) << "Master FE is changed or restarted. report tablet and disk info immediately";
-        _olap_engine->report_notify(true);
+        _olap_engine->trigger_report();
     }
 
     return Status::OK();
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index ffc6b97..0699778 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -140,7 +140,7 @@ OLAPStatus StorageEngine::_start_bg_worker() {
         }
     }
 
-    VLOG(10) << "init finished.";
+    VLOG(10) << "all bg worker started.";
     return OLAP_SUCCESS;
 }
 
@@ -156,7 +156,7 @@ void* StorageEngine::_fd_cache_clean_callback(void* arg) {
     }
     while (true) {
         sleep(interval);
-        start_clean_fd_cache();
+        _start_clean_fd_cache();
     }
 
     return nullptr;
@@ -181,7 +181,7 @@ void* StorageEngine::_base_compaction_thread_callback(void* arg, DataDir* data_d
         // add tid to cgroup
         CgroupsMgr::apply_system_cgroup();
         if (!data_dir->reach_capacity_limit(0)) {
-            perform_base_compaction(data_dir);
+            _perform_base_compaction(data_dir);
         }
 
         usleep(interval * 1000000);
@@ -202,9 +202,9 @@ void* StorageEngine::_garbage_sweeper_thread_callback(void* arg) {
                          max_interval, min_interval);
         min_interval = 1;
         max_interval = max_interval >= min_interval ? max_interval : min_interval;
-        LOG(INFO) << "force reset garbage sweep interval."
-                  << "max_interval" << max_interval
-                  << ", min_interval" << min_interval;
+        LOG(INFO) << "force reset garbage sweep interval. "
+                  << "max_interval=" << max_interval
+                  << ", min_interval=" << min_interval;
     }
 
     const double pi = 4 * std::atan(1);
@@ -225,7 +225,7 @@ void* StorageEngine::_garbage_sweeper_thread_callback(void* arg) {
         sleep(curr_interval);
 
         // 开始清理,并得到清理后的磁盘使用率
-        OLAPStatus res = start_trash_sweep(&usage);
+        OLAPStatus res = _start_trash_sweep(&usage);
         if (res != OLAP_SUCCESS) {
             OLAP_LOG_WARNING("one or more errors occur when sweep trash."
                     "see previous message for detail. [err code=%d]", res);
@@ -250,7 +250,7 @@ void* StorageEngine::_disk_stat_monitor_thread_callback(void* arg) {
     }
 
     while (true) {
-        start_disk_stat_monitor();
+        _start_disk_stat_monitor();
         sleep(interval);
     }
 
@@ -275,7 +275,7 @@ void* StorageEngine::_cumulative_compaction_thread_callback(void* arg, DataDir*
         // add tid to cgroup
         CgroupsMgr::apply_system_cgroup();
         if (!data_dir->reach_capacity_limit(0)) {
-            perform_cumulative_compaction(data_dir);
+            _perform_cumulative_compaction(data_dir);
         }
         usleep(interval * 1000000);
     }
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 4e2fdf6..1599055 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -90,7 +90,7 @@ Status StorageEngine::open(const EngineOptions& options, StorageEngine** engine_
     RETURN_IF_ERROR(_validate_options(options));
     LOG(INFO) << "starting backend using uid:" << options.backend_uid.to_string();
     std::unique_ptr<StorageEngine> engine(new StorageEngine(options));
-    auto st = engine->open();
+    auto st = engine->_open();
     if (st != OLAP_SUCCESS) {
         LOG(WARNING) << "engine open failed, res=" << st;
         return Status::InternalError("open engine failed");
@@ -110,10 +110,7 @@ StorageEngine::StorageEngine(const EngineOptions& options)
         _available_storage_medium_type_count(0),
         _effective_cluster_id(-1),
         _is_all_cluster_id_exist(true),
-        _is_drop_tables(false),
         _index_stream_lru_cache(NULL),
-        _is_report_disk_state_already(false),
-        _is_report_tablet_already(false),
         _tablet_manager(new TabletManager()),
         _txn_manager(new TxnManager()),
         _rowset_id_generator(new UniqueRowsetIdGenerator(options.backend_uid)),
@@ -138,6 +135,7 @@ void StorageEngine::load_data_dirs(const std::vector<DataDir*>& data_dirs) {
             if (res != OLAP_SUCCESS) {
                 LOG(WARNING) << "io error when init load tables. res=" << res
                     << ", data dir=" << data_dir->path();
+                    // TODO(lingbin): why not exit progress, to force OP to change the conf
             }
         });
     }
@@ -146,7 +144,7 @@ void StorageEngine::load_data_dirs(const std::vector<DataDir*>& data_dirs) {
     }
 }
 
-OLAPStatus StorageEngine::open() {
+OLAPStatus StorageEngine::_open() {
     // init store_map
     for (auto& path : _options.store_paths) {
         DataDir* store = new DataDir(path.path, path.capacity_bytes, path.storage_medium,
@@ -198,7 +196,7 @@ OLAPStatus StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster
     OLAPStatus res = OLAP_SUCCESS;
 
     if (cluster_id == -1 && _effective_cluster_id == -1) {
-        // maybe this is a new cluster, cluster id will get from heartbeate
+        // maybe this is a new cluster, cluster id will get from heartbeat message
         return res;
     } else if (cluster_id != -1 && _effective_cluster_id == -1) {
         _effective_cluster_id = cluster_id;
@@ -274,8 +272,8 @@ OLAPStatus StorageEngine::get_all_data_dir_info(vector<DataDirInfo>* data_dir_in
     }
 
     // 2. get total tablets' size of each data dir
-    int tablet_counter = 0;
-    _tablet_manager->update_root_path_info(&path_map, &tablet_counter);
+    size_t tablet_count = 0;
+    _tablet_manager->update_root_path_info(&path_map, &tablet_count);
 
     // add path info to data_dir_infos
     for (auto& entry : path_map) {
@@ -284,32 +282,22 @@ OLAPStatus StorageEngine::get_all_data_dir_info(vector<DataDirInfo>* data_dir_in
 
     timer.stop();
     LOG(INFO) << "get root path info cost: " << timer.elapsed_time() / 1000000
-            << " ms. tablet counter: " << tablet_counter;
+            << " ms. tablet counter: " << tablet_count;
 
     return res;
 }
 
-void StorageEngine::start_disk_stat_monitor() {
+void StorageEngine::_start_disk_stat_monitor() {
     for (auto& it : _store_map) {
         it.second->health_check();
     }
+
     _update_storage_medium_type_count();
-    _delete_tablets_on_unused_root_path();
-
-    // if drop tables
-    // notify disk_state_worker_thread and tablet_worker_thread until they received
-    if (_is_drop_tables) {
-        report_notify(true);
-
-        bool is_report_disk_state_expected = true;
-        bool is_report_tablet_expected = true;
-        bool is_report_disk_state_exchanged =
-                _is_report_disk_state_already.compare_exchange_strong(is_report_disk_state_expected, false);
-        bool is_report_tablet_exchanged =
-                _is_report_tablet_already.compare_exchange_strong(is_report_tablet_expected, false);
-        if (is_report_disk_state_exchanged && is_report_tablet_exchanged) {
-            _is_drop_tables = false;
-        }
+    bool some_tablets_were_dropped = _delete_tablets_on_unused_root_path();
+    // If some tablets were dropped, we should notify disk_state_worker_thread and
+    // tablet_worker_thread (see TaskWorkerPool) to make them report to FE ASAP.
+    if (some_tablets_were_dropped) {
+        trigger_report();
     }
 }
 
@@ -405,18 +393,18 @@ static bool too_many_disks_are_failed(uint32_t unused_num, uint32_t total_num) {
             || (unused_num * 100 / total_num > config::max_percentage_of_error_disk));
 }
 
-void StorageEngine::_delete_tablets_on_unused_root_path() {
+bool StorageEngine::_delete_tablets_on_unused_root_path() {
     vector<TabletInfo> tablet_info_vec;
     uint32_t unused_root_path_num = 0;
     uint32_t total_root_path_num = 0;
 
     std::lock_guard<std::mutex> l(_store_lock);
     if (_store_map.size() == 0) {
-        return;
+        return false;
     }
 
     for (auto& it : _store_map) {
-        total_root_path_num++;
+        ++total_root_path_num;
         if (it.second->is_used()) {
             continue;
         }
@@ -432,11 +420,9 @@ void StorageEngine::_delete_tablets_on_unused_root_path() {
         exit(0);
     }
 
-    if (!tablet_info_vec.empty()) {
-        _is_drop_tables = true;
-    }
-
     _tablet_manager->drop_tablets_on_error_root_path(tablet_info_vec);
+    // If tablet_info_vec is not empty, means we have dropped some tablets.
+    return !tablet_info_vec.empty();
 }
 
 OLAPStatus StorageEngine::clear() {
@@ -488,13 +474,13 @@ void StorageEngine::clear_transaction_task(const TTransactionId transaction_id,
     LOG(INFO) << "finish to clear transaction task. transaction_id=" << transaction_id;
 }
 
-void StorageEngine::start_clean_fd_cache() {
+void StorageEngine::_start_clean_fd_cache() {
     VLOG(10) << "start clean file descritpor cache";
     FileHandler::get_fd_cache()->prune();
     VLOG(10) << "end clean file descritpor cache";
 }
 
-void StorageEngine::perform_cumulative_compaction(DataDir* data_dir) {
+void StorageEngine::_perform_cumulative_compaction(DataDir* data_dir) {
     TabletSharedPtr best_tablet = _tablet_manager->find_best_tablet_to_compaction(
             CompactionType::CUMULATIVE_COMPACTION, data_dir);
     if (best_tablet == nullptr) {
@@ -517,7 +503,7 @@ void StorageEngine::perform_cumulative_compaction(DataDir* data_dir) {
     best_tablet->set_last_cumu_compaction_failure_time(0);
 }
 
-void StorageEngine::perform_base_compaction(DataDir* data_dir) {
+void StorageEngine::_perform_base_compaction(DataDir* data_dir) {
     TabletSharedPtr best_tablet = _tablet_manager->find_best_tablet_to_compaction(
             CompactionType::BASE_COMPACTION, data_dir);
     if (best_tablet == nullptr) {
@@ -543,7 +529,7 @@ void StorageEngine::get_cache_status(rapidjson::Document* document) const {
     return _index_stream_lru_cache->get_cache_status(document);
 }
 
-OLAPStatus StorageEngine::start_trash_sweep(double* usage) {
+OLAPStatus StorageEngine::_start_trash_sweep(double* usage) {
     OLAPStatus res = OLAP_SUCCESS;
     LOG(INFO) << "start trash and snapshot sweep.";
 
@@ -785,7 +771,6 @@ OLAPStatus StorageEngine::recover_tablet_until_specfic_version(
 OLAPStatus StorageEngine::obtain_shard_path(
         TStorageMedium::type storage_medium, std::string* shard_path, DataDir** store) {
     LOG(INFO) << "begin to process obtain root path. storage_medium=" << storage_medium;
-    OLAPStatus res = OLAP_SUCCESS;
 
     if (shard_path == NULL) {
         LOG(WARNING) << "invalid output parameter which is null pointer.";
@@ -798,6 +783,7 @@ OLAPStatus StorageEngine::obtain_shard_path(
         return OLAP_ERR_NO_AVAILABLE_ROOT_PATH;
     }
 
+    OLAPStatus res = OLAP_SUCCESS;
     uint64_t shard = 0;
     res = stores[0]->get_shard(&shard);
     if (res != OLAP_SUCCESS) {
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index f14c51f..0e17182 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -76,17 +76,9 @@ public:
     void clear_transaction_task(const TTransactionId transaction_id,
                                 const std::vector<TPartitionId>& partition_ids);
 
-    // Instance should be inited from `static open()`
-    // MUST NOT be called in other circumstances.
-    OLAPStatus open();
-
     // Clear status(tables, ...)
     OLAPStatus clear();
 
-    void start_clean_fd_cache();
-    void perform_cumulative_compaction(DataDir* data_dir);
-    void perform_base_compaction(DataDir* data_dir);
-
     // 获取cache的使用情况信息
     void get_cache_status(rapidjson::Document* document) const;
 
@@ -98,12 +90,8 @@ public:
         return _index_stream_lru_cache;
     }
 
-    // 清理trash和snapshot文件,返回清理后的磁盘使用量
-    OLAPStatus start_trash_sweep(double *usage);
+    template<bool include_unused = false> std::vector<DataDir*> get_stores();
 
-    template<bool include_unused = false>
-    std::vector<DataDir*> get_stores();
-    Status set_cluster_id(int32_t cluster_id);
 
     // @brief 设置root_path是否可用
     void set_store_used_flag(const std::string& root_path, bool is_used);
@@ -111,41 +99,32 @@ public:
     // @brief 获取所有root_path信息
     OLAPStatus get_all_data_dir_info(std::vector<DataDirInfo>* data_dir_infos, bool need_update);
 
-    // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位,
-    // 当检测到有unused标识时,从内存中删除对应表信息,磁盘数据不动。
-    // 当磁盘状态为不可用,但未检测到unused标识时,需要从root_path上
-    // 重新加载数据。
-    void start_disk_stat_monitor();
-
     // get root path for creating tablet. The returned vector of root path should be random,
     // for avoiding that all the tablet would be deployed one disk.
-    std::vector<DataDir*> get_stores_for_create_tablet(
-        TStorageMedium::type storage_medium);
+    std::vector<DataDir*> get_stores_for_create_tablet(TStorageMedium::type storage_medium);
     DataDir* get_store(const std::string& path);
 
     uint32_t available_storage_medium_type_count() {
         return _available_storage_medium_type_count;
     }
 
+    Status set_cluster_id(int32_t cluster_id);
     int32_t effective_cluster_id() const {
         return _effective_cluster_id;
     }
 
     void start_delete_unused_rowset();
-
     void add_unused_rowset(RowsetSharedPtr rowset);
 
-    OLAPStatus recover_tablet_until_specfic_version(
-        const TRecoverTabletReq& recover_tablet_req);
+    OLAPStatus recover_tablet_until_specfic_version(const TRecoverTabletReq& recover_tablet_req);
 
     // Obtain shard path for new tablet.
     //
     // @param [out] shard_path choose an available root_path to clone new tablet
     // @return error code
-    OLAPStatus obtain_shard_path(
-            TStorageMedium::type storage_medium,
-            std::string* shared_path,
-            DataDir** store);
+    OLAPStatus obtain_shard_path(TStorageMedium::type storage_medium,
+                                 std::string* shared_path,
+                                 DataDir** store);
 
     // Load new tablet to make it effective.
     //
@@ -154,18 +133,28 @@ public:
     // @return OLAP_SUCCESS if load tablet success
     OLAPStatus load_header(const std::string& shard_path, const TCloneReq& request);
 
-    // call this if you want to trigger a disk and tablet report
-    void report_notify(bool is_all) {
-        is_all ? _report_cv.notify_all() : _report_cv.notify_one();
+    // To trigger a disk-stat and tablet report
+    void trigger_report() {
+        std::lock_guard<std::mutex> l(_report_mtx);
+        _need_report_tablet = true;
+        _need_report_disk_stat = true;
+        _report_cv.notify_all();
     }
 
-    // call this to wait a report notification until timeout
-    void wait_for_report_notify(int64_t timeout_sec, bool is_tablet_report) {
-        std::unique_lock<std::mutex> lk(_report_mtx);
-        auto cv_status = _report_cv.wait_for(lk, std::chrono::seconds(timeout_sec));
-        if (cv_status == std::cv_status::no_timeout) {
-            is_tablet_report ? _is_report_tablet_already = true :
-                    _is_report_disk_state_already = true;
+    // call this to wait for a report notification until timeout
+    void wait_for_report_notify(int64_t timeout_sec, bool from_report_tablet_thread) {
+        auto wait_timeout_sec = std::chrono::seconds(timeout_sec);
+        std::unique_lock<std::mutex> l(_report_mtx);
+        // When wait_for() returns, regardless of the return-result(possibly a timeout
+        // error), the report_tablet_thread and report_disk_stat_thread(see TaskWorkerPool)
+        // immediately begin the next round of reporting, so there is no need to check
+        // the return-value of wait_for().
+        if (from_report_tablet_thread) {
+            _report_cv.wait_for(l, wait_timeout_sec, [this] { return _need_report_tablet; });
+            _need_report_tablet = false;
+        } else {
+            _report_cv.wait_for(l, wait_timeout_sec, [this] { return _need_report_disk_stat; });
+            _need_report_disk_stat = false;
         }
     }
 
@@ -209,21 +198,21 @@ public:
     }
 
 private:
-    OLAPStatus _start_bg_worker();
-    OLAPStatus _check_file_descriptor_number();
-
-    OLAPStatus _check_all_root_path_cluster_id();
-
-    OLAPStatus _config_root_path_unused_flag_file(
-            const std::string& root_path,
-            std::string* unused_flag_file);
+    // Instance should be inited from `static open()`
+    // MUST NOT be called in other circumstances.
+    OLAPStatus _open();
 
-    void _delete_tablets_on_unused_root_path();
+    OLAPStatus _start_bg_worker();
 
     void _update_storage_medium_type_count();
 
+    // Some check methods
+    OLAPStatus _check_file_descriptor_number();
+    OLAPStatus _check_all_root_path_cluster_id();
     OLAPStatus _judge_and_update_effective_cluster_id(int32_t cluster_id);
 
+    bool _delete_tablets_on_unused_root_path();
+
     void _clean_unused_txns();
 
     void _clean_unused_rowset_metas();
@@ -231,12 +220,14 @@ private:
     OLAPStatus _do_sweep(
             const std::string& scan_root, const time_t& local_tm_now, const int32_t expire);
 
-    // Thread functions
+    // All these xxx_callback() functions are for Background threads
     // unused rowset monitor thread
     void* _unused_rowset_monitor_thread_callback(void* arg);
 
     // base compaction thread process function
     void* _base_compaction_thread_callback(void* arg, DataDir* data_dir);
+    // cumulative process function
+    void* _cumulative_compaction_thread_callback(void* arg, DataDir* data_dir);
 
     // garbage sweep thread process function. clear snapshot and trash folder
     void* _garbage_sweeper_thread_callback(void* arg);
@@ -244,9 +235,6 @@ private:
     // delete tablet with io error process function
     void* _disk_stat_monitor_thread_callback(void* arg);
 
-    // cumulative process function
-    void* _cumulative_compaction_thread_callback(void* arg, DataDir* data_dir);
-
     // clean file descriptors cache
     void* _fd_cache_clean_callback(void* arg);
 
@@ -260,8 +248,18 @@ private:
     // parse the default rowset type config to RowsetTypePB
     void _parse_default_rowset_type();
 
-private:
+    void _start_clean_fd_cache();
+    void _perform_cumulative_compaction(DataDir* data_dir);
+    void _perform_base_compaction(DataDir* data_dir);
+    // 清理trash和snapshot文件,返回清理后的磁盘使用量
+    OLAPStatus _start_trash_sweep(double *usage);
+    // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位,
+    // 当检测到有unused标识时,从内存中删除对应表信息,磁盘数据不动。
+    // 当磁盘状态为不可用,但未检测到unused标识时,需要从root_path上
+    // 重新加载数据。
+    void _start_disk_stat_monitor();
 
+private:
     struct CompactionCandidate {
         CompactionCandidate(uint32_t nicumulative_compaction_, int64_t tablet_id_, uint32_t index_) :
                 nice(nicumulative_compaction_), tablet_id(tablet_id_), disk_index(index_) {}
@@ -270,6 +268,7 @@ private:
         uint32_t disk_index = -1;
     };
 
+    // In descending order
     struct CompactionCandidateComparator {
         bool operator()(const CompactionCandidate& a, const CompactionCandidate& b) {
             return a.nice > b.nice;
@@ -278,11 +277,8 @@ private:
 
     struct CompactionDiskStat {
         CompactionDiskStat(std::string path, uint32_t index, bool used) :
-                storage_path(path),
-                disk_index(index),
-                task_running(0),
-                task_remaining(0),
-                is_used(used){}
+                storage_path(path), disk_index(index), task_running(0),
+                task_remaining(0), is_used(used){}
         const std::string storage_path;
         const uint32_t disk_index;
         uint32_t task_running;
@@ -290,8 +286,6 @@ private:
         bool is_used;
     };
 
-    typedef std::map<std::string, uint32_t> file_system_task_count_t;
-
     EngineOptions _options;
     std::mutex _store_lock;
     std::map<std::string, DataDir*> _store_map;
@@ -299,49 +293,36 @@ private:
 
     int32_t _effective_cluster_id;
     bool _is_all_cluster_id_exist;
-    bool _is_drop_tables;
 
     Cache* _file_descriptor_lru_cache;
     Cache* _index_stream_lru_cache;
 
-    Mutex _fs_task_mutex;
-    file_system_task_count_t _fs_base_compaction_task_num_map;
-    std::vector<CompactionCandidate> _cumulative_compaction_candidate;
-
     static StorageEngine* _s_instance;
 
-    std::unordered_map<std::string, RowsetSharedPtr> _unused_rowsets;
     Mutex _gc_mutex;
+    std::unordered_map<std::string, RowsetSharedPtr> _unused_rowsets;
 
     std::thread _unused_rowset_monitor_thread;
-
     // thread to monitor snapshot expiry
     std::thread _garbage_sweeper_thread;
-
     // thread to monitor disk stat
     std::thread _disk_stat_monitor_thread;
-
-    // thread to run base compaction
+    // threads to run base compaction
     std::vector<std::thread> _base_compaction_threads;
-
-    // thread to check cumulative
+    // threads to check cumulative
     std::vector<std::thread> _cumulative_compaction_threads;
-
     std::thread _fd_cache_clean_thread;
-
     std::vector<std::thread> _path_gc_threads;
-
-    // thread to scan disk paths
+    // threads to scan disk paths
     std::vector<std::thread> _path_scan_threads;
-
-    // thread to run tablet checkpoint
+    // threads to run tablet checkpoint
     std::vector<std::thread> _tablet_checkpoint_threads;
 
-    // for tablet and disk report
+    // For tablet and disk-stat report
     std::mutex _report_mtx;
     std::condition_variable _report_cv;
-    std::atomic_bool _is_report_disk_state_already;
-    std::atomic_bool _is_report_tablet_already;
+    bool _need_report_tablet = false;
+    bool _need_report_disk_stat = false;
 
     Mutex _engine_task_mutex;
 
@@ -352,11 +333,9 @@ private:
 
     std::unique_ptr<MemTableFlushExecutor> _memtable_flush_executor;
 
-    // default rowset type for load
-    // used to decide the type of new loaded data
+    // Used to control the migration from segment_v1 to segment_v2, can be deleted in futrue.
+    // Type of new loaded data
     RowsetTypePB _default_rowset_type;
-    // default rowset type for compaction.
-    // used to control the the process of converting old data
     RowsetTypePB _compaction_rowset_type;
 
     HeartbeatFlags* _heartbeat_flags;
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 0c3c267..024904c 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -1092,7 +1092,7 @@ bool TabletManager::try_schema_change_lock(TTabletId tablet_id) {
 }
 
 void TabletManager::update_root_path_info(std::map<std::string, DataDirInfo>* path_map,
-    int* tablet_counter) {
+                                          size_t* tablet_counter) {
     ReadLock rlock(&_tablet_map_lock);
     for (auto& entry : _tablet_map) {
         const TableInstances& instance = entry.second;
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 7beb651..93b7d8a 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -116,7 +116,8 @@ public:
     // Prevent schema change executed concurrently.
     bool try_schema_change_lock(TTabletId tablet_id);
 
-    void update_root_path_info(std::map<std::string, DataDirInfo>* path_map, int* tablet_counter);
+    void update_root_path_info(std::map<std::string, DataDirInfo>* path_map,
+                               size_t* tablet_counter);
 
     void get_partition_related_tablets(int64_t partition_id, std::set<TabletInfo>* tablet_infos);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org