You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2022/10/12 04:11:06 UTC

[doris] branch master updated: [enhancement](storage) set the segment cache capacity according to the open file limit of the process (#13269)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 239e5b9943 [enhancement](storage) set the segment cache capacity according to the open file limit of the process (#13269)
239e5b9943 is described below

commit 239e5b9943d1bf96c43142821655c9e553a2c347
Author: Xin Liao <li...@126.com>
AuthorDate: Wed Oct 12 12:10:58 2022 +0800

    [enhancement](storage) set the segment cache capacity according to the open file limit of the process (#13269)
---
 be/src/common/config.h           |  7 -------
 be/src/runtime/exec_env_init.cpp | 16 +++++++++++++++-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index e5bab17f9d..79b5c2ffb3 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -227,8 +227,6 @@ CONF_mInt32(snapshot_expire_time_sec, "172800");
 CONF_mInt32(trash_file_expire_time_sec, "259200");
 // check row nums for BE/CE and schema change. true is open, false is closed.
 CONF_mBool(row_nums_check, "true");
-//file descriptors cache, by default, cache 32768 descriptors
-CONF_Int32(file_descriptor_cache_capacity, "32768");
 // minimum file descriptor number
 // modify them upon necessity
 CONF_Int32(min_file_descriptor_number, "60000");
@@ -721,11 +719,6 @@ CONF_mInt32(max_segment_num_per_rowset, "200");
 // The connection timeout when connecting to external table such as odbc table.
 CONF_mInt32(external_table_connect_timeout_sec, "30");
 
-// The capacity of lru cache in segment loader.
-// Althought it is called "segment cache", but it caches segments in rowset granularity.
-// So the value of this config should corresponding to the number of rowsets on this BE.
-CONF_mInt32(segment_cache_capacity, "1000000");
-
 // Global bitmap cache capacity for aggregation cache, size in bytes
 CONF_Int64(delete_bitmap_agg_cache_capacity, "104857600");
 
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 828830ebeb..a55f547778 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -287,7 +287,21 @@ Status ExecEnv::_init_mem_tracker() {
               << PrettyPrinter::print(storage_cache_limit, TUnit::BYTES)
               << ", origin config value: " << config::storage_page_cache_limit;
 
-    SegmentLoader::create_global_instance(config::segment_cache_capacity);
+    uint64_t fd_number = config::min_file_descriptor_number;
+    struct rlimit l;
+    int ret = getrlimit(RLIMIT_NOFILE, &l);
+    if (ret != 0) {
+        LOG(WARNING) << "call getrlimit() failed. errno=" << strerror(errno)
+                     << ", use default configuration instead.";
+    } else {
+        fd_number = static_cast<uint64_t>(l.rlim_cur);
+    }
+    // SegmentLoader caches segments in rowset granularity. So the size of
+    // opened files will greater than segment_cache_capacity.
+    uint64_t segment_cache_capacity = fd_number / 3 * 2;
+    LOG(INFO) << "segment_cache_capacity = fd_number / 3 * 2, fd_number: " << fd_number
+              << " segment_cache_capacity: " << segment_cache_capacity;
+    SegmentLoader::create_global_instance(segment_cache_capacity);
 
     // 4. init other managers
     RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org