You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/28 14:49:05 UTC

[doris] branch branch-1.2-lts updated: [branch-1.2](memory) pick some memory fix (#21294)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 63d0396ea9 [branch-1.2](memory) pick some memory fix (#21294)
63d0396ea9 is described below

commit 63d0396ea9df42b33a9c82ed414df155f9116e9f
Author: Xinyi Zou <zo...@gmail.com>
AuthorDate: Wed Jun 28 22:48:56 2023 +0800

    [branch-1.2](memory) pick some memory fix (#21294)
    
    pick
    1.  #21237
    2. #20782
    3. #21282
    4. disable chunk allocator in allocator
---
 be/src/common/config.h                        |  2 +-
 be/src/common/daemon.cpp                      |  4 +--
 be/src/http/action/metrics_action.cpp         |  5 +++-
 be/src/olap/delta_writer.cpp                  |  4 ++-
 be/src/olap/olap_server.cpp                   |  3 +-
 be/src/olap/rowset/segment_v2/page_io.cpp     | 19 +++++++------
 be/src/runtime/memory/mem_tracker_limiter.cpp |  2 +-
 be/src/util/mem_info.cpp                      | 17 ++++++++----
 be/src/util/mem_info.h                        | 40 +++++++++++++++++++++++++++
 be/src/util/system_metrics.cpp                | 30 ++++++++++++++++++++
 be/src/vec/common/allocator.h                 |  2 +-
 11 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index 535b3be63b..6787fba585 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -474,7 +474,7 @@ CONF_Int32(min_chunk_reserved_bytes, "1024");
 // of gperftools tcmalloc central lock.
 // Jemalloc or google tcmalloc have core cache, Chunk Allocator may no longer be needed after replacing
 // gperftools tcmalloc.
-CONF_mBool(disable_chunk_allocator_in_vec, "false");
+CONF_mBool(disable_chunk_allocator_in_vec, "true");
 
 // The probing algorithm of partitioned hash table.
 // Enable quadratic probing hash table
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 176ab1dc64..80dab2b422 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -256,7 +256,7 @@ void Daemon::memory_gc_thread() {
             // No longer full gc and minor gc during sleep.
             memory_full_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
             memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
-            doris::MemTrackerLimiter::print_log_process_usage("process full gc", false);
+            doris::MemTrackerLimiter::print_log_process_usage("Start Full GC", false);
             if (doris::MemInfo::process_full_gc()) {
                 // If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc.
                 doris::MemTrackerLimiter::enable_print_log_process_usage();
@@ -268,7 +268,7 @@ void Daemon::memory_gc_thread() {
                             doris::MemInfo::soft_mem_limit())) {
             // No minor gc during sleep, but full gc is possible.
             memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
-            doris::MemTrackerLimiter::print_log_process_usage("process minor gc", false);
+            doris::MemTrackerLimiter::print_log_process_usage("Start Minor GC", false);
             if (doris::MemInfo::process_minor_gc()) {
                 doris::MemTrackerLimiter::enable_print_log_process_usage();
             }
diff --git a/be/src/http/action/metrics_action.cpp b/be/src/http/action/metrics_action.cpp
index f2bd8b43cb..832f2570d0 100644
--- a/be/src/http/action/metrics_action.cpp
+++ b/be/src/http/action/metrics_action.cpp
@@ -29,6 +29,7 @@
 #include "http/http_request.h"
 #include "http/http_response.h"
 #include "runtime/exec_env.h"
+#include "util/mem_info.h"
 #include "util/metrics.h"
 
 namespace doris {
@@ -37,7 +38,9 @@ void MetricsAction::handle(HttpRequest* req) {
     const std::string& type = req->param("type");
     const std::string& with_tablet = req->param("with_tablet");
     std::string str;
-    if (type == "core") {
+    if (MemInfo::is_exceed_soft_mem_limit(GB_EXCHANGE_BYTE)) {
+        str = "";
+    } else if (type == "core") {
         str = _metric_registry->to_core_string();
     } else if (type == "json") {
         str = _metric_registry->to_json(with_tablet == "true");
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 480e09d925..fb26c010fb 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -34,6 +34,7 @@
 #include "runtime/tuple_row.h"
 #include "service/backend_options.h"
 #include "util/brpc_client_cache.h"
+#include "util/mem_info.h"
 #include "util/ref_count_closure.h"
 
 namespace doris {
@@ -116,7 +117,8 @@ Status DeltaWriter::init() {
     }
 
     // check tablet version number
-    if (_tablet->version_count() > config::max_tablet_version_num) {
+    if (_tablet->version_count() > config::max_tablet_version_num &&
+        !MemInfo::is_exceed_soft_mem_limit(GB_EXCHANGE_BYTE)) {
         //trigger quick compaction
         if (config::enable_quick_compaction) {
             StorageEngine::instance()->submit_quick_compaction_task(_tablet);
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 386cb30736..28fd8e7629 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -413,7 +413,8 @@ void StorageEngine::_compaction_tasks_producer_callback() {
 
     int64_t interval = config::generate_compaction_tasks_min_interval_ms;
     do {
-        if (!config::disable_auto_compaction) {
+        if (!config::disable_auto_compaction &&
+            !MemInfo::is_exceed_soft_mem_limit(GB_EXCHANGE_BYTE)) {
             _adjust_compaction_thread_num();
 
             bool check_score = false;
diff --git a/be/src/olap/rowset/segment_v2/page_io.cpp b/be/src/olap/rowset/segment_v2/page_io.cpp
index cdcb7a6fa8..c8321e0f3d 100644
--- a/be/src/olap/rowset/segment_v2/page_io.cpp
+++ b/be/src/olap/rowset/segment_v2/page_io.cpp
@@ -131,10 +131,13 @@ Status PageIO::read_and_decompress_page(const PageReadOptions& opts, PageHandle*
         return Status::Corruption("Bad page: too small size ({})", page_size);
     }
 
-    if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(page_size)) {
-        return Status::MemoryLimitExceeded(
-                fmt::format("Bad page: sys memory check failed, try alloc: {}", page_size));
-    }
+    // This will cause the query to be canceled very sensitively,
+    // Expect to wait for a period of time after the memory exceeds the limit,
+    // like mem_check in the Allocator in the Apache Doris master branch
+    // if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(page_size)) {
+    //     return Status::MemoryLimitExceeded(
+    //             fmt::format("Bad page: sys memory check failed, try alloc: {}", page_size));
+    // }
 
     // hold compressed page at first, reset to decompressed page later
     std::unique_ptr<char[]> page(new char[page_size]);
@@ -172,10 +175,10 @@ Status PageIO::read_and_decompress_page(const PageReadOptions& opts, PageHandle*
         }
         SCOPED_RAW_TIMER(&opts.stats->decompress_ns);
         auto uncompressed_size = footer->uncompressed_size() + footer_size + 4;
-        if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(uncompressed_size)) {
-            return Status::MemoryLimitExceeded(fmt::format(
-                    "Bad page: sys memory check failed, try alloc: {}", uncompressed_size));
-        }
+        // if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(uncompressed_size)) {
+        //     return Status::MemoryLimitExceeded(fmt::format(
+        //             "Bad page: sys memory check failed, try alloc: {}", uncompressed_size));
+        // }
         std::unique_ptr<char[]> decompressed_page(new char[uncompressed_size]);
 
         // decompress page body
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp
index e7f15bf212..99e501da8d 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -134,7 +134,7 @@ void MemTrackerLimiter::make_process_snapshots(std::vector<MemTracker::Snapshot>
         process_mem_sum += it.second->current_value();
     }
 
-    snapshot.type = "tc/jemalloc_cache";
+    snapshot.type = "tc/jemalloc_free_memory";
     snapshot.label = "";
     snapshot.limit = -1;
     snapshot.cur_consumption = MemInfo::allocator_cache_mem();
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index bb0f6bb172..13c0c677e6 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -68,14 +68,18 @@ void MemInfo::refresh_allocator_mem() {
 #if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
     LOG(INFO) << "Memory tracking is not available with address sanitizer builds.";
 #elif defined(USE_JEMALLOC)
+    // 'epoch' is a special mallctl -- it updates the statistics. Without it, all
+    // the following calls will return stale values. It increments and returns
+    // the current epoch number, which might be useful to log as a sanity check.
     uint64_t epoch = 0;
     size_t sz = sizeof(epoch);
     je_mallctl("epoch", &epoch, &sz, &epoch, sz);
 
     // https://jemalloc.net/jemalloc.3.html
-    _s_allocator_cache_mem =
-            get_je_metrics(fmt::format("stats.arenas.{}.tcache_bytes", MALLCTL_ARENAS_ALL)) +
-            get_je_metrics("stats.metadata");
+    // https://www.bookstack.cn/read/aliyun-rds-core/4a0cdf677f62feb3.md
+    _s_allocator_cache_mem = get_je_all_arena_metrics("tcache_bytes") +
+                             get_je_metrics("stats.metadata") +
+                             get_je_all_arena_metrics("pdirty") * get_page_size();
     _s_allocator_cache_mem_str =
             PrettyPrinter::print(static_cast<uint64_t>(_s_allocator_cache_mem), TUnit::BYTES);
     _s_virtual_memory_used = get_je_metrics("stats.mapped");
@@ -105,6 +109,7 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) {
                 StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE);
         StoragePageCache::instance()->prune(segment_v2::DATA_PAGE);
     }
+    je_purge_all_arena_dirty_pages();
 }
 
 // step1: free all cache
@@ -118,7 +123,8 @@ bool MemInfo::process_minor_gc() {
     std::string mem_available_str = MemInfo::sys_mem_available_str();
 
     Defer defer {[&]() {
-        LOG(INFO) << fmt::format("Process Minor GC Free Memory {} Bytes. cost(us): {}", freed_mem,
+        je_purge_all_arena_dirty_pages();
+        LOG(INFO) << fmt::format("End Minor GC, Free Memory {} Bytes. cost(us): {}", freed_mem,
                                  watch.elapsed_time() / 1000);
     }};
 
@@ -152,7 +158,8 @@ bool MemInfo::process_full_gc() {
     std::string mem_available_str = MemInfo::sys_mem_available_str();
 
     Defer defer {[&]() {
-        LOG(INFO) << fmt::format("Process Full GC Free Memory {} Bytes. cost(us): {}", freed_mem,
+        je_purge_all_arena_dirty_pages();
+        LOG(INFO) << fmt::format("End Full GC Free, Memory {} Bytes. cost(us): {}", freed_mem,
                                  watch.elapsed_time() / 1000);
     }};
 
diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h
index 2cb17eb6b8..2779591427 100644
--- a/be/src/util/mem_info.h
+++ b/be/src/util/mem_info.h
@@ -24,6 +24,12 @@
 
 #include <string>
 
+#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE)
+#include <unistd.h>
+#else
+#include <mach/vm_page_size.h>
+#endif
+
 #include "common/logging.h"
 #ifdef USE_JEMALLOC
 #include "jemalloc/jemalloc.h"
@@ -45,6 +51,14 @@ public:
 
     static inline bool initialized() { return _s_initialized; }
 
+    static int get_page_size() {
+#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE)
+        return getpagesize();
+#else
+        return vm_page_size;
+#endif
+    }
+
     // Get total physical memory in bytes (if has cgroups memory limits, return the limits).
     static inline int64_t physical_mem() {
         DCHECK(_s_initialized);
@@ -82,6 +96,22 @@ public:
 #endif
         return 0;
     }
+
+    static inline int64_t get_je_all_arena_metrics(const std::string& name) {
+#ifdef USE_JEMALLOC
+        return get_je_metrics(fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, name));
+#endif
+        return 0;
+    }
+
+    static inline void je_purge_all_arena_dirty_pages() {
+#ifdef USE_JEMALLOC
+        // Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals MALLCTL_ARENAS_ALL.
+        jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr, nullptr,
+                  nullptr, 0);
+#endif
+    }
+
     static inline size_t allocator_virtual_mem() { return _s_virtual_memory_used; }
     static inline size_t allocator_cache_mem() { return _s_allocator_cache_mem; }
     static inline std::string allocator_cache_mem_str() { return _s_allocator_cache_mem_str; }
@@ -93,6 +123,13 @@ public:
     // obtained by the process malloc, not the physical memory actually used by the process in the OS.
     static void refresh_allocator_mem();
 
+    /** jemalloc pdirty is number of pages within unused extents that are potentially
+      * dirty, and for which madvise() or similar has not been called.
+      *
+      * So they will be subtracted from RSS to make accounting more
+      * accurate, since those pages are not really RSS but a memory
+      * that can be used at anytime via jemalloc.
+      */
     static inline void refresh_proc_mem_no_allocator_cache() {
         _s_proc_mem_no_allocator_cache =
                 PerfCounters::get_vm_rss() - static_cast<int64_t>(_s_allocator_cache_mem);
@@ -115,6 +152,9 @@ public:
         DCHECK(_s_initialized);
         return _s_soft_mem_limit_str;
     }
+    static bool is_exceed_soft_mem_limit(int64_t bytes = 0) {
+        return proc_mem_no_allocator_cache() + bytes > soft_mem_limit();
+    }
 
     static std::string debug_string();
 
diff --git a/be/src/util/system_metrics.cpp b/be/src/util/system_metrics.cpp
index 2f259d343f..3e3c7d0957 100644
--- a/be/src/util/system_metrics.cpp
+++ b/be/src/util/system_metrics.cpp
@@ -108,6 +108,12 @@ DEFINE_MEMORY_GAUGE_METRIC(jemalloc_metadata_bytes, MetricUnit::BYTES);
 DEFINE_MEMORY_GAUGE_METRIC(jemalloc_resident_bytes, MetricUnit::BYTES);
 DEFINE_MEMORY_GAUGE_METRIC(jemalloc_mapped_bytes, MetricUnit::BYTES);
 DEFINE_MEMORY_GAUGE_METRIC(jemalloc_retained_bytes, MetricUnit::BYTES);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_tcache_bytes, MetricUnit::BYTES);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pactive_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pdirty_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pmuzzy_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_dirty_purged_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_muzzy_purged_num, MetricUnit::NOUNIT);
 #endif
 
 struct MemoryMetrics {
@@ -133,6 +139,12 @@ struct MemoryMetrics {
         INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_resident_bytes);
         INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_mapped_bytes);
         INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_retained_bytes);
+        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_tcache_bytes);
+        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pactive_num);
+        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pdirty_num);
+        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pmuzzy_num);
+        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_dirty_purged_num);
+        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_muzzy_purged_num);
 #endif
     }
 
@@ -158,6 +170,12 @@ struct MemoryMetrics {
     IntGauge* memory_jemalloc_resident_bytes;
     IntGauge* memory_jemalloc_mapped_bytes;
     IntGauge* memory_jemalloc_retained_bytes;
+    IntGauge* memory_jemalloc_tcache_bytes;
+    IntGauge* memory_jemalloc_pactive_num;
+    IntGauge* memory_jemalloc_pdirty_num;
+    IntGauge* memory_jemalloc_pmuzzy_num;
+    IntGauge* memory_jemalloc_dirty_purged_num;
+    IntGauge* memory_jemalloc_muzzy_purged_num;
 #endif
 };
 
@@ -448,6 +466,18 @@ void SystemMetrics::update_allocator_metrics() {
             MemInfo::get_je_metrics("stats.mapped"));
     _memory_metrics->memory_jemalloc_retained_bytes->set_value(
             MemInfo::get_je_metrics("stats.retained"));
+    _memory_metrics->memory_jemalloc_tcache_bytes->set_value(
+            MemInfo::get_je_all_arena_metrics("tcache_bytes"));
+    _memory_metrics->memory_jemalloc_pactive_num->set_value(
+            MemInfo::get_je_all_arena_metrics("pactive"));
+    _memory_metrics->memory_jemalloc_pdirty_num->set_value(
+            MemInfo::get_je_all_arena_metrics("pdirty"));
+    _memory_metrics->memory_jemalloc_pmuzzy_num->set_value(
+            MemInfo::get_je_all_arena_metrics("pmuzzy"));
+    _memory_metrics->memory_jemalloc_dirty_purged_num->set_value(
+            MemInfo::get_je_all_arena_metrics("dirty_purged"));
+    _memory_metrics->memory_jemalloc_muzzy_purged_num->set_value(
+            MemInfo::get_je_all_arena_metrics("muzzy_purged"));
 #else
     _memory_metrics->memory_tcmalloc_allocated_bytes->set_value(
             MemInfo::get_tc_metrics("generic.total_physical_bytes"));
diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h
index 22b69c1854..81fa79c4c3 100644
--- a/be/src/vec/common/allocator.h
+++ b/be/src/vec/common/allocator.h
@@ -220,7 +220,7 @@ public:
         if (old_size == new_size) {
             /// nothing to do.
             /// BTW, it's not possible to change alignment while doing realloc.
-        } else if (old_size < CHUNK_THRESHOLD && new_size < CHUNK_THRESHOLD &&
+        } else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD &&
                    alignment <= MALLOC_MIN_ALIGNMENT) {
             /// Resize malloc'd memory region with no special alignment requirement.
             void* new_buf = ::realloc(buf, new_size);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org