You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ga...@apache.org on 2022/05/28 02:17:15 UTC
[incubator-doris] branch master updated: [feature][config] introduce a new BE config storage_page_cache_shard_size (#9821)

This is an automated email from the ASF dual-hosted git repository.

gaodayue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d1e926b6c [feature][config] introduce a new BE config storage_page_cache_shard_size (#9821)
4d1e926b6c is described below

commit 4d1e926b6c8b637c4707bbcbe37042d237c081db
Author: Dayue Gao <ga...@meituan.com>
AuthorDate: Sat May 28 10:17:09 2022 +0800

    [feature][config] introduce a new BE config storage_page_cache_shard_size (#9821)
    
    Co-authored-by: gaodayue <ga...@bytedance.com>
---
 be/src/common/config.h                      |  3 ++
 be/src/olap/lru_cache.cpp                   | 47 +++++++++++++++++------------
 be/src/olap/lru_cache.h                     | 21 +++++++------
 be/src/olap/page_cache.cpp                  | 20 +++++++-----
 be/src/olap/page_cache.h                    |  7 +++--
 be/src/olap/segment_loader.cpp              |  2 +-
 be/src/runtime/exec_env_init.cpp            |  5 +--
 be/test/olap/page_cache_test.cpp            |  8 +++--
 docs/en/admin-manual/config/be-config.md    |  6 ++++
 docs/zh-CN/admin-manual/config/be-config.md |  6 ++++
 10 files changed, 80 insertions(+), 45 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index a73efcf24d..9bdd8ba3e4 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -226,6 +226,9 @@ CONF_Int64(index_stream_cache_capacity, "10737418240");
 
 // Cache for storage page size
 CONF_String(storage_page_cache_limit, "20%");
+// Shard size for page cache, the value must be power of two.
+// It's recommended to set it to a value close to the number of BE cores in order to reduce lock contentions.
+CONF_Int32(storage_page_cache_shard_size, "16");
 // Percentage for index page cache
 // all storage page cache will be divided into data_page_cache and index_page_cache
 CONF_Int32(index_page_cache_percentage, "10");
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 90b2e4ddf5..19d8b5d4e9 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -11,6 +11,7 @@
 #include <sstream>
 #include <string>
 
+#include "gutil/bits.h"
 #include "olap/olap_common.h"
 #include "olap/olap_define.h"
 #include "olap/olap_index.h"
@@ -430,19 +431,25 @@ inline uint32_t ShardedLRUCache::_hash_slice(const CacheKey& s) {
     return s.hash(s.data(), s.size(), 0);
 }
 
-uint32_t ShardedLRUCache::_shard(uint32_t hash) {
-    return hash >> (32 - kNumShardBits);
-}
-
-ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type)
+ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type,
+                                 uint32_t num_shards)
         : _name(name),
+          _num_shard_bits(Bits::FindLSBSetNonZero(num_shards)),
+          _num_shards(num_shards),
+          _shards(nullptr),
           _last_id(1),
           _mem_tracker(MemTracker::create_tracker(-1, name, nullptr, MemTrackerLevel::OVERVIEW)) {
-    const size_t per_shard = (total_capacity + (kNumShards - 1)) / kNumShards;
-    for (int s = 0; s < kNumShards; s++) {
-        _shards[s] = new LRUCache(type);
-        _shards[s]->set_capacity(per_shard);
+    CHECK(num_shards > 0) << "num_shards cannot be 0";
+    CHECK_EQ((num_shards & (num_shards - 1)), 0)
+            << "num_shards should be power of two, but got " << num_shards;
+
+    const size_t per_shard = (total_capacity + (_num_shards - 1)) / _num_shards;
+    LRUCache** shards = new (std::nothrow) LRUCache*[_num_shards];
+    for (int s = 0; s < _num_shards; s++) {
+        shards[s] = new LRUCache(type);
+        shards[s]->set_capacity(per_shard);
     }
+    _shards = shards;
 
     _entity = DorisMetrics::instance()->metric_registry()->register_entity(
             std::string("lru_cache:") + name, {{"name", name}});
@@ -456,8 +463,11 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity,
 }
 
 ShardedLRUCache::~ShardedLRUCache() {
-    for (int s = 0; s < kNumShards; s++) {
-        delete _shards[s];
+    if (_shards) {
+        for (int s = 0; s < _num_shards; s++) {
+            delete _shards[s];
+        }
+        delete[] _shards;
     }
     _entity->deregister_hook(_name);
     DorisMetrics::instance()->metric_registry()->deregister_entity(_entity);
@@ -501,7 +511,7 @@ uint64_t ShardedLRUCache::new_id() {
 
 int64_t ShardedLRUCache::prune() {
     int64_t num_prune = 0;
-    for (int s = 0; s < kNumShards; s++) {
+    for (int s = 0; s < _num_shards; s++) {
         num_prune += _shards[s]->prune();
     }
     return num_prune;
@@ -509,7 +519,7 @@ int64_t ShardedLRUCache::prune() {
 
 int64_t ShardedLRUCache::prune_if(CacheValuePredicate pred) {
     int64_t num_prune = 0;
-    for (int s = 0; s < kNumShards; s++) {
+    for (int s = 0; s < _num_shards; s++) {
         num_prune += _shards[s]->prune_if(pred);
     }
     return num_prune;
@@ -520,7 +530,7 @@ void ShardedLRUCache::update_cache_metrics() const {
     size_t total_usage = 0;
     size_t total_lookup_count = 0;
     size_t total_hit_count = 0;
-    for (int i = 0; i < kNumShards; i++) {
+    for (int i = 0; i < _num_shards; i++) {
         total_capacity += _shards[i]->get_capacity();
         total_usage += _shards[i]->get_usage();
         total_lookup_count += _shards[i]->get_lookup_count();
@@ -536,12 +546,9 @@ void ShardedLRUCache::update_cache_metrics() const {
                                                  : ((double)total_hit_count / total_lookup_count));
 }
 
-Cache* new_lru_cache(const std::string& name, size_t capacity) {
-    return new ShardedLRUCache(name, capacity, LRUCacheType::SIZE);
-}
-
-Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type) {
-    return new ShardedLRUCache(name, capacity, type);
+Cache* new_lru_cache(const std::string& name, size_t capacity, LRUCacheType type,
+                     uint32_t num_shards) {
+    return new ShardedLRUCache(name, capacity, type, num_shards);
 }
 
 } // namespace doris
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 6fb87744f4..4bac173fad 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -53,11 +53,10 @@ enum LRUCacheType {
     NUMBER // The capacity of cache is based on the number of cache entry.
 };
 
-// Create a new cache with a specified name and a fixed SIZE capacity.
+// Create a new cache with a specified name and capacity.
 // This implementation of Cache uses a least-recently-used eviction policy.
-extern Cache* new_lru_cache(const std::string& name, size_t capacity);
-
-extern Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type);
+extern Cache* new_lru_cache(const std::string& name, size_t capacity,
+                            LRUCacheType type = LRUCacheType::SIZE, uint32_t num_shards = 16);
 
 class CacheKey {
 public:
@@ -356,12 +355,10 @@ private:
     uint64_t _hit_count = 0;    // 命中cache的总次数
 };
 
-static const int kNumShardBits = 4;
-static const int kNumShards = 1 << kNumShardBits;
-
 class ShardedLRUCache : public Cache {
 public:
-    explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type);
+    explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type,
+                             uint32_t num_shards);
     // TODO(fdy): 析构时清除所有cache元素
     virtual ~ShardedLRUCache();
     virtual Handle* insert(const CacheKey& key, void* value, size_t charge,
@@ -381,10 +378,14 @@ private:
 
 private:
     static uint32_t _hash_slice(const CacheKey& s);
-    static uint32_t _shard(uint32_t hash);
+    uint32_t _shard(uint32_t hash) {
+        return _num_shard_bits > 0 ? (hash >> (32 - _num_shard_bits)) : 0;
+    }
 
     std::string _name;
-    LRUCache* _shards[kNumShards];
+    const int _num_shard_bits;
+    const uint32_t _num_shards;
+    LRUCache** _shards;
     std::atomic<uint64_t> _last_id;
 
     std::shared_ptr<MemTracker> _mem_tracker;
diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp
index c1b0aac1d8..2ec540b384 100644
--- a/be/src/olap/page_cache.cpp
+++ b/be/src/olap/page_cache.cpp
@@ -23,26 +23,32 @@ namespace doris {
 
 StoragePageCache* StoragePageCache::_s_instance = nullptr;
 
-void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage) {
+void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage,
+                                           uint32_t num_shards) {
     DCHECK(_s_instance == nullptr);
-    static StoragePageCache instance(capacity, index_cache_percentage);
+    static StoragePageCache instance(capacity, index_cache_percentage, num_shards);
     _s_instance = &instance;
 }
 
-StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage)
+StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage,
+                                   uint32_t num_shards)
         : _index_cache_percentage(index_cache_percentage),
           _mem_tracker(MemTracker::create_tracker(capacity, "StoragePageCache", nullptr,
                                                   MemTrackerLevel::OVERVIEW)) {
     SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
     if (index_cache_percentage == 0) {
-        _data_page_cache = std::unique_ptr<Cache>(new_lru_cache("DataPageCache", capacity));
+        _data_page_cache = std::unique_ptr<Cache>(
+                new_lru_cache("DataPageCache", capacity, LRUCacheType::SIZE, num_shards));
     } else if (index_cache_percentage == 100) {
-        _index_page_cache = std::unique_ptr<Cache>(new_lru_cache("IndexPageCache", capacity));
+        _index_page_cache = std::unique_ptr<Cache>(
+                new_lru_cache("IndexPageCache", capacity, LRUCacheType::SIZE, num_shards));
     } else if (index_cache_percentage > 0 && index_cache_percentage < 100) {
         _data_page_cache = std::unique_ptr<Cache>(
-                new_lru_cache("DataPageCache", capacity * (100 - index_cache_percentage) / 100));
+                new_lru_cache("DataPageCache", capacity * (100 - index_cache_percentage) / 100,
+                              LRUCacheType::SIZE, num_shards));
         _index_page_cache = std::unique_ptr<Cache>(
-                new_lru_cache("IndexPageCache", capacity * index_cache_percentage / 100));
+                new_lru_cache("IndexPageCache", capacity * index_cache_percentage / 100,
+                              LRUCacheType::SIZE, num_shards));
     } else {
         CHECK(false) << "invalid index page cache percentage";
     }
diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h
index be5a3ffd71..f03f50bd5a 100644
--- a/be/src/olap/page_cache.h
+++ b/be/src/olap/page_cache.h
@@ -54,14 +54,17 @@ public:
         }
     };
 
+    static constexpr uint32_t kDefaultNumShards = 16;
+
     // Create global instance of this class
-    static void create_global_cache(size_t capacity, int32_t index_cache_percentage);
+    static void create_global_cache(size_t capacity, int32_t index_cache_percentage,
+                                    uint32_t num_shards = kDefaultNumShards);
 
     // Return global instance.
     // Client should call create_global_cache before.
     static StoragePageCache* instance() { return _s_instance; }
 
-    StoragePageCache(size_t capacity, int32_t index_cache_percentage);
+    StoragePageCache(size_t capacity, int32_t index_cache_percentage, uint32_t num_shards);
 
     // Lookup the given page in the cache.
     //
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index 5b62c92e24..a105336f6a 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -32,7 +32,7 @@ void SegmentLoader::create_global_instance(size_t capacity) {
 
 SegmentLoader::SegmentLoader(size_t capacity) {
     _cache = std::unique_ptr<Cache>(
-            new_typed_lru_cache("SegmentLoader:SegmentCache", capacity, LRUCacheType::NUMBER));
+            new_lru_cache("SegmentLoader:SegmentCache", capacity, LRUCacheType::NUMBER));
 }
 
 bool SegmentLoader::_lookup(const SegmentLoader::CacheKey& key, SegmentCacheHandle* handle) {
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index ff5b847810..a1cdf32b63 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -250,8 +250,9 @@ Status ExecEnv::_init_mem_tracker() {
         // Reason same as buffer_pool_limit
         storage_cache_limit = storage_cache_limit / 2;
     }
-    int32_t index_page_cache_percentage = config::index_page_cache_percentage;
-    StoragePageCache::create_global_cache(storage_cache_limit, index_page_cache_percentage);
+    int32_t index_percentage = config::index_page_cache_percentage;
+    uint32_t num_shards = config::storage_page_cache_shard_size;
+    StoragePageCache::create_global_cache(storage_cache_limit, index_percentage, num_shards);
     LOG(INFO) << "Storage page cache memory limit: "
               << PrettyPrinter::print(storage_cache_limit, TUnit::BYTES)
               << ", origin config value: " << config::storage_page_cache_limit;
diff --git a/be/test/olap/page_cache_test.cpp b/be/test/olap/page_cache_test.cpp
index 1eca896010..73c11c39b8 100644
--- a/be/test/olap/page_cache_test.cpp
+++ b/be/test/olap/page_cache_test.cpp
@@ -21,6 +21,8 @@
 
 namespace doris {
 
+static int kNumShards = StoragePageCache::kDefaultNumShards;
+
 class StoragePageCacheTest : public testing::Test {
 public:
     StoragePageCacheTest() {}
@@ -29,7 +31,7 @@ public:
 
 // All cache space is allocated to data pages
 TEST(StoragePageCacheTest, data_page_only) {
-    StoragePageCache cache(kNumShards * 2048, 0);
+    StoragePageCache cache(kNumShards * 2048, 0, kNumShards);
 
     StoragePageCache::CacheKey key("abc", 0);
     StoragePageCache::CacheKey memory_key("mem", 0);
@@ -89,7 +91,7 @@ TEST(StoragePageCacheTest, data_page_only) {
 
 // All cache space is allocated to index pages
 TEST(StoragePageCacheTest, index_page_only) {
-    StoragePageCache cache(kNumShards * 2048, 100);
+    StoragePageCache cache(kNumShards * 2048, 100, kNumShards);
 
     StoragePageCache::CacheKey key("abc", 0);
     StoragePageCache::CacheKey memory_key("mem", 0);
@@ -149,7 +151,7 @@ TEST(StoragePageCacheTest, index_page_only) {
 
 // Cache space is allocated by index_page_cache_ratio
 TEST(StoragePageCacheTest, mixed_pages) {
-    StoragePageCache cache(kNumShards * 2048, 10);
+    StoragePageCache cache(kNumShards * 2048, 10, kNumShards);
 
     StoragePageCache::CacheKey data_key("data", 0);
     StoragePageCache::CacheKey index_key("index", 0);
diff --git a/docs/en/admin-manual/config/be-config.md b/docs/en/admin-manual/config/be-config.md
index 4bddb44111..ddb0e31f83 100644
--- a/docs/en/admin-manual/config/be-config.md
+++ b/docs/en/admin-manual/config/be-config.md
@@ -1136,6 +1136,12 @@ Default: 20%
 
 Cache for storage page size
 
+### `storage_page_cache_shard_size`
+
+Default: 16
+
+Shard size of StoragePageCache, the value must be power of two. It's recommended to set it to a value close to the number of BE cores in order to reduce lock contentions.
+
 ### `index_page_cache_percentage`
 * Type: int32
 * Description: Index page cache as a percentage of total storage page cache, value range is [0, 100]
diff --git a/docs/zh-CN/admin-manual/config/be-config.md b/docs/zh-CN/admin-manual/config/be-config.md
index 873ef1d2b8..2d6599a284 100644
--- a/docs/zh-CN/admin-manual/config/be-config.md
+++ b/docs/zh-CN/admin-manual/config/be-config.md
@@ -1143,6 +1143,12 @@ storage_flood_stage_usage_percent和storage_flood_stage_left_capacity_bytes两
 
 缓存存储页大小
 
+### `storage_page_cache_shard_size`
+
+默认值: 16
+
+StoragePageCache的分片大小，值为 2^n (n=0,1,2,...)。建议设置为接近BE CPU核数的值，可减少StoragePageCache的锁竞争。
+
 ### `index_page_cache_percentage`
 * 类型：int32
 * 描述：索引页缓存占总页面缓存的百分比，取值为[0, 100]。


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org