You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/11 00:35:10 UTC

[doris] branch master updated: [memtracker]fix fix_memtracker_performance_ (#10629)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4cb80c5733 [memtracker]fix fix_memtracker_performance_ (#10629)
4cb80c5733 is described below

commit 4cb80c5733a1a70a016cee8ab902f8d38f6acc7f
Author: Kidd <10...@users.noreply.github.com>
AuthorDate: Mon Jul 11 08:35:05 2022 +0800

    [memtracker]fix fix_memtracker_performance_ (#10629)
---
 be/src/common/config.h                  |   2 +-
 be/src/runtime/exec_env.h               |   3 -
 be/src/runtime/load_channel.h           |   3 +-
 be/src/runtime/mem_tracker.cpp          |   2 +-
 be/src/runtime/tcmalloc_hook.h          |  30 +++++--
 be/src/runtime/thread_context.cpp       |  15 ++--
 be/src/runtime/thread_context.h         | 136 ++++++++++++++------------------
 be/src/runtime/thread_mem_tracker_mgr.h |   9 +--
 8 files changed, 94 insertions(+), 106 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index d212617867..a9ff1624f9 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -658,7 +658,7 @@ CONF_mInt16(mem_tracker_level, "0");
 // smaller than this value will continue to accumulate. specified as number of bytes.
 // Decreasing this value will increase the frequency of consume/release.
 // Increasing this value will cause MemTracker statistics to be inaccurate.
-CONF_mInt32(mem_tracker_consume_min_size_bytes, "2097152");
+CONF_mInt32(mem_tracker_consume_min_size_bytes, "4194304");
 
 // When MemTracker is a negative value, it is considered that a memory leak has occurred,
 // but the actual MemTracker records inaccurately will also cause a negative value,
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index d37e7e2ae1..7a02dae866 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -73,8 +73,6 @@ class ClientCache;
 
 class HeartbeatFlags;
 
-static bool exec_env_existed = false;
-
 // Execution environment for queries/plan fragments.
 // Contains all required global structures, and handles to
 // singleton services. Clients must call StartServices exactly
@@ -90,7 +88,6 @@ public:
     /// we return the most recently created instance.
     static ExecEnv* GetInstance() {
         static ExecEnv s_exec_env;
-        exec_env_existed = true;
         return &s_exec_env;
     }
 
diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h
index 20ef476dd9..91005a6b89 100644
--- a/be/src/runtime/load_channel.h
+++ b/be/src/runtime/load_channel.h
@@ -40,7 +40,8 @@ class Cache;
 class LoadChannel {
 public:
     LoadChannel(const UniqueId& load_id, std::shared_ptr<MemTracker>& mem_tracker,
-                int64_t timeout_s, bool is_high_priority, const std::string& sender_ip, bool is_ve);
+                int64_t timeout_s, bool is_high_priority, const std::string& sender_ip,
+                bool is_vec);
     ~LoadChannel();
 
     // open a new load channel if not exist
diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index d53e975d64..71e89f8b1e 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -185,7 +185,7 @@ void MemTracker::init_virtual() {
 MemTracker::~MemTracker() {
     consume(_untracked_mem.exchange(0)); // before memory_leak_check
     // TCMalloc hook will be triggered during destructor memtracker, may cause crash.
-    if (_label == "Process") STOP_THREAD_LOCAL_MEM_TRACKER(false);
+    if (_label == "Process") doris::thread_local_ctx._init = false;
     if (!_virtual && config::memory_leak_detection) MemTracker::memory_leak_check(this);
     if (!_virtual && parent()) {
         // Do not call release on the parent tracker to avoid repeated releases.
diff --git a/be/src/runtime/tcmalloc_hook.h b/be/src/runtime/tcmalloc_hook.h
index dd0e1c788f..8b3a0290ed 100644
--- a/be/src/runtime/tcmalloc_hook.h
+++ b/be/src/runtime/tcmalloc_hook.h
@@ -37,18 +37,32 @@
 //  destructor to control the behavior of consume can lead to unexpected behavior,
 //  like this: if (LIKELY(doris::start_thread_mem_tracker)) {
 void new_hook(const void* ptr, size_t size) {
-    if (doris::thread_local_ctx._init) {
-        doris::tls_ctx()->consume_mem(tc_nallocx(size, 0));
-    } else if (doris::exec_env_existed && doris::ExecEnv::GetInstance()->initialized()) {
-        doris::MemTracker::get_process_tracker()->consume(tc_nallocx(size, 0));
+    if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_tls != nullptr) {
+        // Currently in bthread, consume thread context mem tracker in bthread tls.
+        if (doris::btls_key != doris::bthread_tls_key) {
+            // pthread switch occurs, updating bthread_tls and bthread_tls_key cached in pthread tls.
+            doris::bthread_tls =
+                    static_cast<doris::ThreadContext*>(bthread_getspecific(doris::btls_key));
+            doris::bthread_tls_key = doris::btls_key;
+        }
+        doris::bthread_tls->_thread_mem_tracker_mgr->cache_consume(tc_nallocx(size, 0));
+    } else if (doris::thread_local_ctx._init) {
+        doris::thread_local_ctx._tls->_thread_mem_tracker_mgr->cache_consume(tc_nallocx(size, 0));
     }
 }
 
 void delete_hook(const void* ptr) {
-    if (doris::thread_local_ctx._init) {
-        doris::tls_ctx()->release_mem(tc_malloc_size(const_cast<void*>(ptr)));
-    } else if (doris::exec_env_existed && doris::ExecEnv::GetInstance()->initialized()) {
-        doris::MemTracker::get_process_tracker()->release(tc_malloc_size(const_cast<void*>(ptr)));
+    if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_tls != nullptr) {
+        if (doris::btls_key != doris::bthread_tls_key) {
+            doris::bthread_tls =
+                    static_cast<doris::ThreadContext*>(bthread_getspecific(doris::btls_key));
+            doris::bthread_tls_key = doris::btls_key;
+        }
+        doris::bthread_tls->_thread_mem_tracker_mgr->cache_consume(
+                -tc_malloc_size(const_cast<void*>(ptr)));
+    } else if (doris::thread_local_ctx._init) {
+        doris::thread_local_ctx._tls->_thread_mem_tracker_mgr->cache_consume(
+                -tc_malloc_size(const_cast<void*>(ptr)));
     }
 }
 
diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp
index a439b4ca8f..63d84cf21d 100644
--- a/be/src/runtime/thread_context.cpp
+++ b/be/src/runtime/thread_context.cpp
@@ -22,15 +22,10 @@
 
 namespace doris {
 
-DEFINE_STATIC_THREAD_LOCAL(ThreadContext, ThreadContextPtr, thread_local_ctx);
+DEFINE_STATIC_THREAD_LOCAL(ThreadContext, ThreadContextPtr, _tls);
 
 ThreadContextPtr::ThreadContextPtr() {
-    INIT_STATIC_THREAD_LOCAL(ThreadContext, thread_local_ctx);
-    _init = true;
-}
-
-ThreadContext* ThreadContextPtr::get() {
-    return thread_local_ctx;
+    INIT_STATIC_THREAD_LOCAL(ThreadContext, _tls);
 }
 
 AttachTaskThread::AttachTaskThread(const ThreadContext::TaskType& type, const std::string& task_id,
@@ -170,8 +165,10 @@ SwitchBthread::SwitchBthread() {
         DCHECK(tls->type() == ThreadContext::TaskType::UNKNOWN);
         tls->_thread_mem_tracker_mgr->clear_untracked_mems();
     }
-    tls->_thread_mem_tracker_mgr->init();
+    tls->init();
     tls->set_type(ThreadContext::TaskType::BRPC);
+    bthread_tls_key = btls_key;
+    bthread_tls = tls;
 #endif
 }
 
@@ -181,6 +178,8 @@ SwitchBthread::~SwitchBthread() {
     tls->_thread_mem_tracker_mgr->clear_untracked_mems();
     tls->_thread_mem_tracker_mgr->init();
     tls->set_type(ThreadContext::TaskType::UNKNOWN);
+    bthread_tls = nullptr;
+    bthread_tls_key = EMPTY_BTLS_KEY;
 #ifndef NDEBUG
     DorisMetrics::instance()->switch_bthread_count->increment(1);
 #endif // NDEBUG
diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h
index bf347e0a28..b2bddb2d21 100644
--- a/be/src/runtime/thread_context.h
+++ b/be/src/runtime/thread_context.h
@@ -32,11 +32,6 @@
 // Attach to task when thread starts
 #define SCOPED_ATTACH_TASK_THREAD(type, ...) \
     auto VARNAME_LINENUM(attach_task_thread) = AttachTaskThread(type, ##__VA_ARGS__)
-// Be careful to stop the thread mem tracker, because the actual order of malloc and free memory
-// may be different from the order of execution of instructions, which will cause the position of
-// the memory track to be unexpected.
-#define STOP_THREAD_LOCAL_MEM_TRACKER(scope) \
-    auto VARNAME_LINENUM(stop_tracker) = doris::StopThreadMemTracker(scope)
 // Switch thread mem tracker during task execution.
 // After the non-query thread switches the mem tracker, if the thread will not switch the mem
 // tracker again in the short term, can consider manually clear_untracked_mems.
@@ -87,8 +82,50 @@
 namespace doris {
 
 class TUniqueId;
+class ThreadContext;
 
 extern bthread_key_t btls_key;
+static const bthread_key_t EMPTY_BTLS_KEY = {0, 0};
+
+// Using gcc11 compiles thread_local variable on lower versions of GLIBC will report an error,
+// see https://github.com/apache/doris/pull/7911
+//
+// If we want to avoid this error,
+// 1. For non-trivial variables in thread_local, such as std::string, you need to store them as pointers to
+//    ensure that thread_local is trivial, these non-trivial pointers will uniformly call destructors elsewhere.
+// 2. The default destructor of the thread_local variable cannot be overridden.
+//
+// This is difficult to implement. Because the destructor is not overwritten, it means that the outside cannot
+// be notified when the thread terminates, and the non-trivial pointers in thread_local cannot be released in time.
+// The func provided by pthread and std::thread doesn't help either.
+//
+// So, kudu Class-scoped static thread local implementation was introduced. Solve the above problem by
+// Thread-scoped thread local + Class-scoped thread local.
+//
+// This may look very trick, but it's the best way I can find.
+//
+// refer to:
+//  https://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Thread-Local.html
+//  https://stackoverflow.com/questions/12049684/
+//  https://sourceware.org/glibc/wiki/Destructor%20support%20for%20thread_local%20variables
+//  https://www.jianshu.com/p/756240e837dd
+//  https://man7.org/linux/man-pages/man3/pthread_tryjoin_np.3.html
+class ThreadContextPtr {
+public:
+    ThreadContextPtr();
+    // Cannot add destructor `~ThreadContextPtr`, otherwise it will no longer be of type POD, the reason is as above.
+
+    // TCMalloc hook is triggered during ThreadContext construction, which may lead to deadlock.
+    bool _init = false;
+
+    DECLARE_STATIC_THREAD_LOCAL(ThreadContext, _tls);
+};
+
+inline thread_local ThreadContextPtr thread_local_ctx;
+// To avoid performance problems caused by frequently calling `bthread_getspecific` to obtain bthread TLS
+// in tcmalloc hook, cache the key and value of bthread TLS in pthread TLS.
+inline thread_local ThreadContext* bthread_tls;
+inline thread_local bthread_key_t bthread_tls_key;
 
 // The thread context saves some info about a working thread.
 // 2 required info:
@@ -113,10 +150,25 @@ public:
                                                      "STORAGE"};
 
 public:
-    ThreadContext() : _type(TaskType::UNKNOWN) {
+    ThreadContext() {
         _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr());
+        init();
+        thread_local_ctx._init = true;
+    }
+
+    ~ThreadContext() {
+        // Restore to the memory state before _init=true to ensure accurate overall memory statistics.
+        // Thereby ensuring that the memory alloc size is not tracked during the initialization of the
+        // ThreadContext before `_init = true in ThreadContextPtr()`,
+        // Equal to the size of the memory release that is not tracked during the destruction of the
+        // ThreadContext after `_init = false in ~ThreadContextPtr()`,
+        init();
+        thread_local_ctx._init = false;
+    }
+
+    void init() {
+        _type = TaskType::UNKNOWN;
         _thread_mem_tracker_mgr->init();
-        start_thread_mem_tracker = true;
         _thread_id = get_thread_id();
     }
 
@@ -154,22 +206,6 @@ public:
         return ss.str();
     }
 
-    void consume_mem(int64_t size) {
-        if (start_thread_mem_tracker) {
-            _thread_mem_tracker_mgr->cache_consume(size);
-        } else {
-            MemTracker::get_process_tracker()->consume(size);
-        }
-    }
-
-    void release_mem(int64_t size) {
-        if (start_thread_mem_tracker) {
-            _thread_mem_tracker_mgr->cache_consume(-size);
-        } else {
-            MemTracker::get_process_tracker()->release(size);
-        }
-    }
-
     // After _thread_mem_tracker_mgr is initialized, the current thread TCMalloc Hook starts to
     // consume/release mem_tracker.
     // Note that the use of shared_ptr will cause a crash. The guess is that there is an
@@ -185,50 +221,12 @@ private:
     TUniqueId _fragment_instance_id;
 };
 
-// Using gcc11 compiles thread_local variable on lower versions of GLIBC will report an error,
-// see https://github.com/apache/doris/pull/7911
-//
-// If we want to avoid this error,
-// 1. For non-trivial variables in thread_local, such as std::string, you need to store them as pointers to
-//    ensure that thread_local is trivial, these non-trivial pointers will uniformly call destructors elsewhere.
-// 2. The default destructor of the thread_local variable cannot be overridden.
-//
-// This is difficult to implement. Because the destructor is not overwritten, it means that the outside cannot
-// be notified when the thread terminates, and the non-trivial pointers in thread_local cannot be released in time.
-// The func provided by pthread and std::thread doesn't help either.
-//
-// So, kudu Class-scoped static thread local implementation was introduced. Solve the above problem by
-// Thread-scoped thread local + Class-scoped thread local.
-//
-// This may look very trick, but it's the best way I can find.
-//
-// refer to:
-//  https://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Thread-Local.html
-//  https://stackoverflow.com/questions/12049684/
-//  https://sourceware.org/glibc/wiki/Destructor%20support%20for%20thread_local%20variables
-//  https://www.jianshu.com/p/756240e837dd
-//  https://man7.org/linux/man-pages/man3/pthread_tryjoin_np.3.html
-class ThreadContextPtr {
-public:
-    ThreadContextPtr();
-
-    ThreadContext* get();
-
-    // TCMalloc hook is triggered during ThreadContext construction, which may lead to deadlock.
-    bool _init = false;
-
-private:
-    DECLARE_STATIC_THREAD_LOCAL(ThreadContext, thread_local_ctx);
-};
-
-inline thread_local ThreadContextPtr thread_local_ctx;
-
 static ThreadContext* tls_ctx() {
     ThreadContext* tls = static_cast<ThreadContext*>(bthread_getspecific(btls_key));
     if (tls != nullptr) {
         return tls;
     } else {
-        return thread_local_ctx.get();
+        return thread_local_ctx._tls;
     }
 }
 
@@ -266,20 +264,6 @@ public:
     ~AttachTaskThread();
 };
 
-class StopThreadMemTracker {
-public:
-    explicit StopThreadMemTracker(const bool scope = true) : _scope(scope) {
-        start_thread_mem_tracker = false;
-    }
-
-    ~StopThreadMemTracker() {
-        if (_scope == true) start_thread_mem_tracker = true;
-    }
-
-private:
-    bool _scope = true;
-};
-
 template <bool Existed>
 class SwitchThreadMemTracker {
 public:
diff --git a/be/src/runtime/thread_mem_tracker_mgr.h b/be/src/runtime/thread_mem_tracker_mgr.h
index ffc6fdc01f..ada4651ed3 100644
--- a/be/src/runtime/thread_mem_tracker_mgr.h
+++ b/be/src/runtime/thread_mem_tracker_mgr.h
@@ -48,13 +48,6 @@ struct ConsumeErrCallBackInfo {
     }
 };
 
-// If there is a memory new/delete operation in the consume method, it may enter infinite recursion.
-// Note: After the tracker is stopped, the memory alloc in the consume method should be released in time,
-// otherwise the MemTracker statistics will be inaccurate.
-// In some cases, we want to turn off thread automatic memory statistics, manually call consume.
-// In addition, when ~RootTracker, TCMalloc delete hook release RootTracker will crash.
-inline thread_local bool start_thread_mem_tracker = false;
-
 // TCMalloc new/delete Hook is counted in the memory_tracker of the current thread.
 //
 // In the original design, the MemTracker consume method is called before the memory is allocated.
@@ -72,7 +65,6 @@ public:
         _mem_trackers.clear();
         _untracked_mems.clear();
         _mem_tracker_labels.clear();
-        start_thread_mem_tracker = false;
     }
 
     // After thread initialization, calling `init` again must call `clear_untracked_mems` first
@@ -177,6 +169,7 @@ private:
     phmap::flat_hash_map<int64_t, std::string> _mem_tracker_labels;
     // If true, call memtracker try_consume, otherwise call consume.
     bool _check_limit;
+    // If there is a memory new/delete operation in the consume method, it may enter infinite recursion.
     bool _stop_consume = false;
 
     int64_t _tracker_id;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org