You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/28 14:49:05 UTC
[doris] branch branch-1.2-lts updated: [branch-1.2](memory) pick some memory fix (#21294)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 63d0396ea9 [branch-1.2](memory) pick some memory fix (#21294)
63d0396ea9 is described below
commit 63d0396ea9df42b33a9c82ed414df155f9116e9f
Author: Xinyi Zou <zo...@gmail.com>
AuthorDate: Wed Jun 28 22:48:56 2023 +0800
[branch-1.2](memory) pick some memory fix (#21294)
pick
1. #21237
2. #20782
3. #21282
4. disable chunk allocator in allocator
---
be/src/common/config.h | 2 +-
be/src/common/daemon.cpp | 4 +--
be/src/http/action/metrics_action.cpp | 5 +++-
be/src/olap/delta_writer.cpp | 4 ++-
be/src/olap/olap_server.cpp | 3 +-
be/src/olap/rowset/segment_v2/page_io.cpp | 19 +++++++------
be/src/runtime/memory/mem_tracker_limiter.cpp | 2 +-
be/src/util/mem_info.cpp | 17 ++++++++----
be/src/util/mem_info.h | 40 +++++++++++++++++++++++++++
be/src/util/system_metrics.cpp | 30 ++++++++++++++++++++
be/src/vec/common/allocator.h | 2 +-
11 files changed, 107 insertions(+), 21 deletions(-)
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 535b3be63b..6787fba585 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -474,7 +474,7 @@ CONF_Int32(min_chunk_reserved_bytes, "1024");
// of gperftools tcmalloc central lock.
// Jemalloc or google tcmalloc have core cache, Chunk Allocator may no longer be needed after replacing
// gperftools tcmalloc.
-CONF_mBool(disable_chunk_allocator_in_vec, "false");
+CONF_mBool(disable_chunk_allocator_in_vec, "true");
// The probing algorithm of partitioned hash table.
// Enable quadratic probing hash table
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 176ab1dc64..80dab2b422 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -256,7 +256,7 @@ void Daemon::memory_gc_thread() {
// No longer full gc and minor gc during sleep.
memory_full_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
- doris::MemTrackerLimiter::print_log_process_usage("process full gc", false);
+ doris::MemTrackerLimiter::print_log_process_usage("Start Full GC", false);
if (doris::MemInfo::process_full_gc()) {
// If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc.
doris::MemTrackerLimiter::enable_print_log_process_usage();
@@ -268,7 +268,7 @@ void Daemon::memory_gc_thread() {
doris::MemInfo::soft_mem_limit())) {
// No minor gc during sleep, but full gc is possible.
memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
- doris::MemTrackerLimiter::print_log_process_usage("process minor gc", false);
+ doris::MemTrackerLimiter::print_log_process_usage("Start Minor GC", false);
if (doris::MemInfo::process_minor_gc()) {
doris::MemTrackerLimiter::enable_print_log_process_usage();
}
diff --git a/be/src/http/action/metrics_action.cpp b/be/src/http/action/metrics_action.cpp
index f2bd8b43cb..832f2570d0 100644
--- a/be/src/http/action/metrics_action.cpp
+++ b/be/src/http/action/metrics_action.cpp
@@ -29,6 +29,7 @@
#include "http/http_request.h"
#include "http/http_response.h"
#include "runtime/exec_env.h"
+#include "util/mem_info.h"
#include "util/metrics.h"
namespace doris {
@@ -37,7 +38,9 @@ void MetricsAction::handle(HttpRequest* req) {
const std::string& type = req->param("type");
const std::string& with_tablet = req->param("with_tablet");
std::string str;
- if (type == "core") {
+ if (MemInfo::is_exceed_soft_mem_limit(GB_EXCHANGE_BYTE)) {
+ str = "";
+ } else if (type == "core") {
str = _metric_registry->to_core_string();
} else if (type == "json") {
str = _metric_registry->to_json(with_tablet == "true");
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 480e09d925..fb26c010fb 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -34,6 +34,7 @@
#include "runtime/tuple_row.h"
#include "service/backend_options.h"
#include "util/brpc_client_cache.h"
+#include "util/mem_info.h"
#include "util/ref_count_closure.h"
namespace doris {
@@ -116,7 +117,8 @@ Status DeltaWriter::init() {
}
// check tablet version number
- if (_tablet->version_count() > config::max_tablet_version_num) {
+ if (_tablet->version_count() > config::max_tablet_version_num &&
+ !MemInfo::is_exceed_soft_mem_limit(GB_EXCHANGE_BYTE)) {
//trigger quick compaction
if (config::enable_quick_compaction) {
StorageEngine::instance()->submit_quick_compaction_task(_tablet);
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 386cb30736..28fd8e7629 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -413,7 +413,8 @@ void StorageEngine::_compaction_tasks_producer_callback() {
int64_t interval = config::generate_compaction_tasks_min_interval_ms;
do {
- if (!config::disable_auto_compaction) {
+ if (!config::disable_auto_compaction &&
+ !MemInfo::is_exceed_soft_mem_limit(GB_EXCHANGE_BYTE)) {
_adjust_compaction_thread_num();
bool check_score = false;
diff --git a/be/src/olap/rowset/segment_v2/page_io.cpp b/be/src/olap/rowset/segment_v2/page_io.cpp
index cdcb7a6fa8..c8321e0f3d 100644
--- a/be/src/olap/rowset/segment_v2/page_io.cpp
+++ b/be/src/olap/rowset/segment_v2/page_io.cpp
@@ -131,10 +131,13 @@ Status PageIO::read_and_decompress_page(const PageReadOptions& opts, PageHandle*
return Status::Corruption("Bad page: too small size ({})", page_size);
}
- if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(page_size)) {
- return Status::MemoryLimitExceeded(
- fmt::format("Bad page: sys memory check failed, try alloc: {}", page_size));
- }
+ // This will cause the query to be canceled very sensitively,
+ // Expect to wait for a period of time after the memory exceeds the limit,
+ // like mem_check in the Allocator in the Apache Doris master branch
+ // if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(page_size)) {
+ // return Status::MemoryLimitExceeded(
+ // fmt::format("Bad page: sys memory check failed, try alloc: {}", page_size));
+ // }
// hold compressed page at first, reset to decompressed page later
std::unique_ptr<char[]> page(new char[page_size]);
@@ -172,10 +175,10 @@ Status PageIO::read_and_decompress_page(const PageReadOptions& opts, PageHandle*
}
SCOPED_RAW_TIMER(&opts.stats->decompress_ns);
auto uncompressed_size = footer->uncompressed_size() + footer_size + 4;
- if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(uncompressed_size)) {
- return Status::MemoryLimitExceeded(fmt::format(
- "Bad page: sys memory check failed, try alloc: {}", uncompressed_size));
- }
+ // if (doris::MemTrackerLimiter::sys_mem_exceed_limit_check(uncompressed_size)) {
+ // return Status::MemoryLimitExceeded(fmt::format(
+ // "Bad page: sys memory check failed, try alloc: {}", uncompressed_size));
+ // }
std::unique_ptr<char[]> decompressed_page(new char[uncompressed_size]);
// decompress page body
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp
index e7f15bf212..99e501da8d 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -134,7 +134,7 @@ void MemTrackerLimiter::make_process_snapshots(std::vector<MemTracker::Snapshot>
process_mem_sum += it.second->current_value();
}
- snapshot.type = "tc/jemalloc_cache";
+ snapshot.type = "tc/jemalloc_free_memory";
snapshot.label = "";
snapshot.limit = -1;
snapshot.cur_consumption = MemInfo::allocator_cache_mem();
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index bb0f6bb172..13c0c677e6 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -68,14 +68,18 @@ void MemInfo::refresh_allocator_mem() {
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
LOG(INFO) << "Memory tracking is not available with address sanitizer builds.";
#elif defined(USE_JEMALLOC)
+ // 'epoch' is a special mallctl -- it updates the statistics. Without it, all
+ // the following calls will return stale values. It increments and returns
+ // the current epoch number, which might be useful to log as a sanity check.
uint64_t epoch = 0;
size_t sz = sizeof(epoch);
je_mallctl("epoch", &epoch, &sz, &epoch, sz);
// https://jemalloc.net/jemalloc.3.html
- _s_allocator_cache_mem =
- get_je_metrics(fmt::format("stats.arenas.{}.tcache_bytes", MALLCTL_ARENAS_ALL)) +
- get_je_metrics("stats.metadata");
+ // https://www.bookstack.cn/read/aliyun-rds-core/4a0cdf677f62feb3.md
+ _s_allocator_cache_mem = get_je_all_arena_metrics("tcache_bytes") +
+ get_je_metrics("stats.metadata") +
+ get_je_all_arena_metrics("pdirty") * get_page_size();
_s_allocator_cache_mem_str =
PrettyPrinter::print(static_cast<uint64_t>(_s_allocator_cache_mem), TUnit::BYTES);
_s_virtual_memory_used = get_je_metrics("stats.mapped");
@@ -105,6 +109,7 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) {
StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE);
StoragePageCache::instance()->prune(segment_v2::DATA_PAGE);
}
+ je_purge_all_arena_dirty_pages();
}
// step1: free all cache
@@ -118,7 +123,8 @@ bool MemInfo::process_minor_gc() {
std::string mem_available_str = MemInfo::sys_mem_available_str();
Defer defer {[&]() {
- LOG(INFO) << fmt::format("Process Minor GC Free Memory {} Bytes. cost(us): {}", freed_mem,
+ je_purge_all_arena_dirty_pages();
+ LOG(INFO) << fmt::format("End Minor GC, Free Memory {} Bytes. cost(us): {}", freed_mem,
watch.elapsed_time() / 1000);
}};
@@ -152,7 +158,8 @@ bool MemInfo::process_full_gc() {
std::string mem_available_str = MemInfo::sys_mem_available_str();
Defer defer {[&]() {
- LOG(INFO) << fmt::format("Process Full GC Free Memory {} Bytes. cost(us): {}", freed_mem,
+ je_purge_all_arena_dirty_pages();
+ LOG(INFO) << fmt::format("End Full GC Free, Memory {} Bytes. cost(us): {}", freed_mem,
watch.elapsed_time() / 1000);
}};
diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h
index 2cb17eb6b8..2779591427 100644
--- a/be/src/util/mem_info.h
+++ b/be/src/util/mem_info.h
@@ -24,6 +24,12 @@
#include <string>
+#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE)
+#include <unistd.h>
+#else
+#include <mach/vm_page_size.h>
+#endif
+
#include "common/logging.h"
#ifdef USE_JEMALLOC
#include "jemalloc/jemalloc.h"
@@ -45,6 +51,14 @@ public:
static inline bool initialized() { return _s_initialized; }
+ static int get_page_size() {
+#if !defined(__APPLE__) || !defined(_POSIX_C_SOURCE)
+ return getpagesize();
+#else
+ return vm_page_size;
+#endif
+ }
+
// Get total physical memory in bytes (if has cgroups memory limits, return the limits).
static inline int64_t physical_mem() {
DCHECK(_s_initialized);
@@ -82,6 +96,22 @@ public:
#endif
return 0;
}
+
+ static inline int64_t get_je_all_arena_metrics(const std::string& name) {
+#ifdef USE_JEMALLOC
+ return get_je_metrics(fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, name));
+#endif
+ return 0;
+ }
+
+ static inline void je_purge_all_arena_dirty_pages() {
+#ifdef USE_JEMALLOC
+ // Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals MALLCTL_ARENAS_ALL.
+ jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr, nullptr,
+ nullptr, 0);
+#endif
+ }
+
static inline size_t allocator_virtual_mem() { return _s_virtual_memory_used; }
static inline size_t allocator_cache_mem() { return _s_allocator_cache_mem; }
static inline std::string allocator_cache_mem_str() { return _s_allocator_cache_mem_str; }
@@ -93,6 +123,13 @@ public:
// obtained by the process malloc, not the physical memory actually used by the process in the OS.
static void refresh_allocator_mem();
+ /** jemalloc pdirty is number of pages within unused extents that are potentially
+ * dirty, and for which madvise() or similar has not been called.
+ *
+ * So they will be subtracted from RSS to make accounting more
+ * accurate, since those pages are not really RSS but a memory
+ * that can be used at anytime via jemalloc.
+ */
static inline void refresh_proc_mem_no_allocator_cache() {
_s_proc_mem_no_allocator_cache =
PerfCounters::get_vm_rss() - static_cast<int64_t>(_s_allocator_cache_mem);
@@ -115,6 +152,9 @@ public:
DCHECK(_s_initialized);
return _s_soft_mem_limit_str;
}
+ static bool is_exceed_soft_mem_limit(int64_t bytes = 0) {
+ return proc_mem_no_allocator_cache() + bytes > soft_mem_limit();
+ }
static std::string debug_string();
diff --git a/be/src/util/system_metrics.cpp b/be/src/util/system_metrics.cpp
index 2f259d343f..3e3c7d0957 100644
--- a/be/src/util/system_metrics.cpp
+++ b/be/src/util/system_metrics.cpp
@@ -108,6 +108,12 @@ DEFINE_MEMORY_GAUGE_METRIC(jemalloc_metadata_bytes, MetricUnit::BYTES);
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_resident_bytes, MetricUnit::BYTES);
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_mapped_bytes, MetricUnit::BYTES);
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_retained_bytes, MetricUnit::BYTES);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_tcache_bytes, MetricUnit::BYTES);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pactive_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pdirty_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pmuzzy_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_dirty_purged_num, MetricUnit::NOUNIT);
+DEFINE_MEMORY_GAUGE_METRIC(jemalloc_muzzy_purged_num, MetricUnit::NOUNIT);
#endif
struct MemoryMetrics {
@@ -133,6 +139,12 @@ struct MemoryMetrics {
INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_resident_bytes);
INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_mapped_bytes);
INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_retained_bytes);
+ INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_tcache_bytes);
+ INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pactive_num);
+ INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pdirty_num);
+ INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pmuzzy_num);
+ INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_dirty_purged_num);
+ INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_muzzy_purged_num);
#endif
}
@@ -158,6 +170,12 @@ struct MemoryMetrics {
IntGauge* memory_jemalloc_resident_bytes;
IntGauge* memory_jemalloc_mapped_bytes;
IntGauge* memory_jemalloc_retained_bytes;
+ IntGauge* memory_jemalloc_tcache_bytes;
+ IntGauge* memory_jemalloc_pactive_num;
+ IntGauge* memory_jemalloc_pdirty_num;
+ IntGauge* memory_jemalloc_pmuzzy_num;
+ IntGauge* memory_jemalloc_dirty_purged_num;
+ IntGauge* memory_jemalloc_muzzy_purged_num;
#endif
};
@@ -448,6 +466,18 @@ void SystemMetrics::update_allocator_metrics() {
MemInfo::get_je_metrics("stats.mapped"));
_memory_metrics->memory_jemalloc_retained_bytes->set_value(
MemInfo::get_je_metrics("stats.retained"));
+ _memory_metrics->memory_jemalloc_tcache_bytes->set_value(
+ MemInfo::get_je_all_arena_metrics("tcache_bytes"));
+ _memory_metrics->memory_jemalloc_pactive_num->set_value(
+ MemInfo::get_je_all_arena_metrics("pactive"));
+ _memory_metrics->memory_jemalloc_pdirty_num->set_value(
+ MemInfo::get_je_all_arena_metrics("pdirty"));
+ _memory_metrics->memory_jemalloc_pmuzzy_num->set_value(
+ MemInfo::get_je_all_arena_metrics("pmuzzy"));
+ _memory_metrics->memory_jemalloc_dirty_purged_num->set_value(
+ MemInfo::get_je_all_arena_metrics("dirty_purged"));
+ _memory_metrics->memory_jemalloc_muzzy_purged_num->set_value(
+ MemInfo::get_je_all_arena_metrics("muzzy_purged"));
#else
_memory_metrics->memory_tcmalloc_allocated_bytes->set_value(
MemInfo::get_tc_metrics("generic.total_physical_bytes"));
diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h
index 22b69c1854..81fa79c4c3 100644
--- a/be/src/vec/common/allocator.h
+++ b/be/src/vec/common/allocator.h
@@ -220,7 +220,7 @@ public:
if (old_size == new_size) {
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
- } else if (old_size < CHUNK_THRESHOLD && new_size < CHUNK_THRESHOLD &&
+ } else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD &&
alignment <= MALLOC_MIN_ALIGNMENT) {
/// Resize malloc'd memory region with no special alignment requirement.
void* new_buf = ::realloc(buf, new_size);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org