You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by ji...@apache.org on 2021/07/08 10:37:47 UTC

[incubator-pegasus] branch master updated: feat: add more rocksdb perf-counter support (#774)

This is an automated email from the ASF dual-hosted git repository.

jiashuo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git


The following commit(s) were added to refs/heads/master by this push:
     new 32719d6  feat: add more rocksdb  perf-counter support (#774)
32719d6 is described below

commit 32719d686aa96f421d6a1adc07f0756125789664
Author: Jiashuo <js...@live.com>
AuthorDate: Thu Jul 8 18:37:38 2021 +0800

    feat: add more rocksdb  perf-counter support (#774)
---
 .gitignore                              |  4 ++
 src/server/pegasus_server_impl.cpp      | 94 ++++++++++++++++++++++++++-------
 src/server/pegasus_server_impl.h        | 32 ++++++-----
 src/server/pegasus_server_impl_init.cpp | 68 +++++++++++++++++++++++-
 4 files changed, 166 insertions(+), 32 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6b08d1d..daebca2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,3 +97,7 @@ cmake-build-debug
 packages
 
 src/test/function_test/pegasus-bulk-load-function-test-files/
+config-shell.ini.*
+*.tar.gz
+pegasus-server*
+*.log
diff --git a/src/server/pegasus_server_impl.cpp b/src/server/pegasus_server_impl.cpp
index f759ad8..637a6dc 100644
--- a/src/server/pegasus_server_impl.cpp
+++ b/src/server/pegasus_server_impl.cpp
@@ -46,6 +46,7 @@ namespace pegasus {
 namespace server {
 
 DEFINE_TASK_CODE(LPC_PEGASUS_SERVER_DELAY, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)
+DSN_DECLARE_int32(read_amp_bytes_per_bit);
 
 DSN_DEFINE_int32("pegasus.server",
                  hotkey_analyse_time_interval_s,
@@ -150,7 +151,7 @@ void pegasus_server_impl::gc_checkpoints(bool force_reserve_one)
                 dwarn("get last write time of file %s failed", current_file.c_str());
                 break;
             }
-            uint64_t last_write_time = (uint64_t)tm;
+            auto last_write_time = (uint64_t)tm;
             if (last_write_time + reserve_time >= current_time) {
                 // not expired
                 break;
@@ -1531,7 +1532,7 @@ void pegasus_server_impl::on_clear_scanner(const int64_t &args) { _context_cache
         update_usage_scenario(envs);
     }
 
-    dinfo_replica("start the update rocksdb statistics timer task");
+    dinfo_replica("start the update replica-level rocksdb statistics timer task");
     _update_replica_rdb_stat =
         ::dsn::tasking::enqueue_timer(LPC_REPLICATION_LONG_COMMON,
                                       &_tracker,
@@ -1543,8 +1544,8 @@ void pegasus_server_impl::on_clear_scanner(const int64_t &args) { _context_cache
     static std::once_flag flag;
     std::call_once(flag, [&]() {
         // The timer task will always running even though there is no replicas
-        dassert(kServerStatUpdateTimeSec.count() != 0,
-                "kServerStatUpdateTimeSec shouldn't be zero");
+        dassert_f(kServerStatUpdateTimeSec.count() != 0,
+                  "kServerStatUpdateTimeSec shouldn't be zero");
         _update_server_rdb_stat = ::dsn::tasking::enqueue_timer(
             LPC_REPLICATION_LONG_COMMON,
             nullptr, // TODO: the tracker is nullptr, we will fix it later
@@ -1595,6 +1596,10 @@ void pegasus_server_impl::cancel_background_work(bool wait)
         _update_replica_rdb_stat->cancel(true);
         _update_replica_rdb_stat = nullptr;
     }
+    if (_update_server_rdb_stat != nullptr) {
+        _update_server_rdb_stat->cancel(true);
+        _update_server_rdb_stat = nullptr;
+    }
     _tracker.cancel_outstanding_tasks();
 
     _context_cache.clear();
@@ -2227,15 +2232,16 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
         dinfo_replica("_pfc_rdb_sst_size: {} bytes", val);
     }
 
-    // Update _pfc_rdb_block_cache_hit_count and _pfc_rdb_block_cache_total_count
-    uint64_t block_cache_hit = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_HIT);
-    _pfc_rdb_block_cache_hit_count->set(block_cache_hit);
-    dinfo_replica("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit);
-
-    uint64_t block_cache_miss = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_MISS);
-    uint64_t block_cache_total = block_cache_hit + block_cache_miss;
-    _pfc_rdb_block_cache_total_count->set(block_cache_total);
-    dinfo_replica("_pfc_rdb_block_cache_total_count: {}", block_cache_total);
+    // Update _pfc_rdb_write_amplification
+    std::map<std::string, std::string> props;
+    if (_db->GetMapProperty(_data_cf, "rocksdb.cfstats", &props)) {
+        auto write_amplification_iter = props.find("compaction.Sum.WriteAmp");
+        auto write_amplification = write_amplification_iter == props.end()
+                                       ? 1
+                                       : std::stod(write_amplification_iter->second);
+        _pfc_rdb_write_amplification->set(write_amplification);
+        dinfo_replica("_pfc_rdb_write_amplification: {}", write_amplification);
+    }
 
     // Update _pfc_rdb_index_and_filter_blocks_mem_usage
     if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kEstimateTableReadersMem, &str_val) &&
@@ -2260,32 +2266,82 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
         dinfo_replica("_pfc_rdb_estimate_num_keys: {}", val);
     }
 
+    // the follow stats is related to `read`, so only primary need update it,ignore
+    // `backup-request` case
+    if (!is_primary()) {
+        return;
+    }
+
+    // Update _pfc_rdb_read_amplification
+    if (FLAGS_read_amp_bytes_per_bit > 0) {
+        auto estimate_useful_bytes =
+            _statistics->getTickerCount(rocksdb::READ_AMP_ESTIMATE_USEFUL_BYTES);
+        if (estimate_useful_bytes) {
+            auto read_amplification =
+                _statistics->getTickerCount(rocksdb::READ_AMP_TOTAL_READ_BYTES) /
+                estimate_useful_bytes;
+            _pfc_rdb_read_amplification->set(read_amplification);
+            dinfo_replica("_pfc_rdb_read_amplification: {}", read_amplification);
+        }
+    }
+
     // Update _pfc_rdb_bf_seek_negatives
-    uint64_t bf_seek_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_USEFUL);
+    auto bf_seek_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_USEFUL);
     _pfc_rdb_bf_seek_negatives->set(bf_seek_negatives);
     dinfo_replica("_pfc_rdb_bf_seek_negatives: {}", bf_seek_negatives);
 
     // Update _pfc_rdb_bf_seek_total
-    uint64_t bf_seek_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_CHECKED);
+    auto bf_seek_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_CHECKED);
     _pfc_rdb_bf_seek_total->set(bf_seek_total);
     dinfo_replica("_pfc_rdb_bf_seek_total: {}", bf_seek_total);
 
     // Update _pfc_rdb_bf_point_positive_true
-    uint64_t bf_point_positive_true =
+    auto bf_point_positive_true =
         _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_TRUE_POSITIVE);
     _pfc_rdb_bf_point_positive_true->set(bf_point_positive_true);
     dinfo_replica("_pfc_rdb_bf_point_positive_true: {}", bf_point_positive_true);
 
     // Update _pfc_rdb_bf_point_positive_total
-    uint64_t bf_point_positive_total =
-        _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_POSITIVE);
+    auto bf_point_positive_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_POSITIVE);
     _pfc_rdb_bf_point_positive_total->set(bf_point_positive_total);
     dinfo_replica("_pfc_rdb_bf_point_positive_total: {}", bf_point_positive_total);
 
     // Update _pfc_rdb_bf_point_negatives
-    uint64_t bf_point_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_USEFUL);
+    auto bf_point_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_USEFUL);
     _pfc_rdb_bf_point_negatives->set(bf_point_negatives);
     dinfo_replica("_pfc_rdb_bf_point_negatives: {}", bf_point_negatives);
+
+    // Update _pfc_rdb_block_cache_hit_count and _pfc_rdb_block_cache_total_count
+    auto block_cache_hit = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_HIT);
+    _pfc_rdb_block_cache_hit_count->set(block_cache_hit);
+    dinfo_replica("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit);
+
+    auto block_cache_miss = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_MISS);
+    auto block_cache_total = block_cache_hit + block_cache_miss;
+    _pfc_rdb_block_cache_total_count->set(block_cache_total);
+    dinfo_replica("_pfc_rdb_block_cache_total_count: {}", block_cache_total);
+
+    // update block memtable/l0/l1/l2andup hit rate under block cache up level
+    auto memtable_hit_count = _statistics->getTickerCount(rocksdb::MEMTABLE_HIT);
+    _pfc_rdb_memtable_hit_count->set(memtable_hit_count);
+    dinfo_replica("_pfc_rdb_memtable_hit_count: {}", memtable_hit_count);
+
+    auto memtable_miss_count = _statistics->getTickerCount(rocksdb::MEMTABLE_MISS);
+    auto memtable_total = memtable_hit_count + memtable_miss_count;
+    _pfc_rdb_memtable_total_count->set(memtable_total);
+    dinfo_replica("_pfc_rdb_memtable_total_count: {}", memtable_total);
+
+    auto l0_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L0);
+    _pfc_rdb_l0_hit_count->set(l0_hit_count);
+    dinfo_replica("_pfc_rdb_l0_hit_count: {}", l0_hit_count);
+
+    auto l1_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L1);
+    _pfc_rdb_l1_hit_count->set(l1_hit_count);
+    dinfo_replica("_pfc_rdb_l1_hit_count: {}", l1_hit_count);
+
+    auto l2andup_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L2_AND_UP);
+    _pfc_rdb_l2andup_hit_count->set(l2andup_hit_count);
+    dinfo_replica("_pfc_rdb_l2andup_hit_count: {}", l2andup_hit_count);
 }
 
 void pegasus_server_impl::update_server_rocksdb_statistics()
diff --git a/src/server/pegasus_server_impl.h b/src/server/pegasus_server_impl.h
index da8047a..48613f3 100644
--- a/src/server/pegasus_server_impl.h
+++ b/src/server/pegasus_server_impl.h
@@ -442,18 +442,26 @@ private:
     static ::dsn::perf_counter_wrapper _pfc_rdb_write_limiter_rate_bytes;
     static ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_mem_usage;
     // replica level
-    ::dsn::perf_counter_wrapper _pfc_rdb_sst_count;
-    ::dsn::perf_counter_wrapper _pfc_rdb_sst_size;
-    ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_hit_count;
-    ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_total_count;
-    ::dsn::perf_counter_wrapper _pfc_rdb_index_and_filter_blocks_mem_usage;
-    ::dsn::perf_counter_wrapper _pfc_rdb_memtable_mem_usage;
-    ::dsn::perf_counter_wrapper _pfc_rdb_estimate_num_keys;
-    ::dsn::perf_counter_wrapper _pfc_rdb_bf_seek_negatives;
-    ::dsn::perf_counter_wrapper _pfc_rdb_bf_seek_total;
-    ::dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_true;
-    ::dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_total;
-    ::dsn::perf_counter_wrapper _pfc_rdb_bf_point_negatives;
+    dsn::perf_counter_wrapper _pfc_rdb_sst_count;
+    dsn::perf_counter_wrapper _pfc_rdb_sst_size;
+    dsn::perf_counter_wrapper _pfc_rdb_index_and_filter_blocks_mem_usage;
+    dsn::perf_counter_wrapper _pfc_rdb_memtable_mem_usage;
+    dsn::perf_counter_wrapper _pfc_rdb_estimate_num_keys;
+
+    dsn::perf_counter_wrapper _pfc_rdb_bf_seek_negatives;
+    dsn::perf_counter_wrapper _pfc_rdb_bf_seek_total;
+    dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_true;
+    dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_total;
+    dsn::perf_counter_wrapper _pfc_rdb_bf_point_negatives;
+    dsn::perf_counter_wrapper _pfc_rdb_block_cache_hit_count;
+    dsn::perf_counter_wrapper _pfc_rdb_block_cache_total_count;
+    dsn::perf_counter_wrapper _pfc_rdb_write_amplification;
+    dsn::perf_counter_wrapper _pfc_rdb_read_amplification;
+    dsn::perf_counter_wrapper _pfc_rdb_memtable_hit_count;
+    dsn::perf_counter_wrapper _pfc_rdb_memtable_total_count;
+    dsn::perf_counter_wrapper _pfc_rdb_l0_hit_count;
+    dsn::perf_counter_wrapper _pfc_rdb_l1_hit_count;
+    dsn::perf_counter_wrapper _pfc_rdb_l2andup_hit_count;
 };
 
 } // namespace server
diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp
index 414e2c4..6b38e63 100644
--- a/src/server/pegasus_server_impl_init.cpp
+++ b/src/server/pegasus_server_impl_init.cpp
@@ -44,6 +44,42 @@ DSN_DEFINE_bool("pegasus.server",
                 false,
                 "whether to enable write rate auto tune when open rocksdb write limit");
 
+// If used, For every data block we load into memory, we will create a bitmap
+// of size ((block_size / `read_amp_bytes_per_bit`) / 8) bytes. This bitmap
+// will be used to figure out the percentage we actually read of the blocks.
+//
+// When this feature is used Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES and
+// Tickers::READ_AMP_TOTAL_READ_BYTES can be used to calculate the
+// read amplification using this formula
+// (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES)
+//
+// value  =>  memory usage (percentage of loaded blocks memory)
+// 1      =>  12.50 %
+// 2      =>  06.25 %
+// 4      =>  03.12 %
+// 8      =>  01.56 %
+// 16     =>  00.78 %
+//
+// Note: This number must be a power of 2, if not it will be sanitized
+// to be the next lowest power of 2, for example a value of 7 will be
+// treated as 4, a value of 19 will be treated as 16.
+//
+// Default: 0 (disabled)
+// see https://github.com/XiaoMi/pegasus-rocksdb/blob/v6.6.4-compatible/include/rocksdb/table.h#L247
+DSN_DEFINE_int32("pegasus.server",
+                 read_amp_bytes_per_bit,
+                 0,
+                 "config for using to calculate the "
+                 "read amplification, must be a power "
+                 "of 2, zero means disable count read "
+                 "amplification");
+
+DSN_DEFINE_validator(read_amp_bytes_per_bit, [](const int64_t read_amp_bytes_per_bit) -> bool {
+    return read_amp_bytes_per_bit == 0 ||
+           (read_amp_bytes_per_bit > 0 &&
+            (read_amp_bytes_per_bit & (read_amp_bytes_per_bit - 1)) == 0);
+});
+
 static const std::unordered_map<std::string, rocksdb::BlockBasedTableOptions::IndexType>
     INDEX_TYPE_STRING_MAP = {
         {"binary_search", rocksdb::BlockBasedTableOptions::IndexType::kBinarySearch},
@@ -260,6 +296,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
             "parse rocksdb_compression_type failed.");
 
     rocksdb::BlockBasedTableOptions tbl_opts;
+    tbl_opts.read_amp_bytes_per_bit = FLAGS_read_amp_bytes_per_bit;
+
     if (dsn_config_get_value_bool("pegasus.server",
                                   "rocksdb_disable_table_block_cache",
                                   false,
@@ -501,7 +539,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
     _checkpoint_reserve_time_seconds = _checkpoint_reserve_time_seconds_in_config;
 
     _update_rdb_stat_interval = std::chrono::seconds(dsn_config_get_value_uint64(
-        "pegasus.server", "update_rdb_stat_interval", 600, "update_rdb_stat_interval, in seconds"));
+        "pegasus.server", "update_rdb_stat_interval", 60, "update_rdb_stat_interval, in seconds"));
 
     // TODO: move the qps/latency counters and it's statistics to replication_app_base layer
     std::string str_gpid = _gpid.to_string();
@@ -575,6 +613,34 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
         COUNTER_TYPE_NUMBER,
         "statistic the total count of rocksdb block cache");
 
+    snprintf(name, 255, "rdb.write_amplification@%s", str_gpid.c_str());
+    _pfc_rdb_write_amplification.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the write amplification of rocksdb");
+
+    snprintf(name, 255, "rdb.read_amplification@%s", str_gpid.c_str());
+    _pfc_rdb_read_amplification.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read amplification of rocksdb");
+
+    snprintf(name, 255, "rdb.read_memtable_hit_count@%s", str_gpid.c_str());
+    _pfc_rdb_memtable_hit_count.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read memtable hit count");
+
+    snprintf(name, 255, "rdb.read_memtable_total_count@%s", str_gpid.c_str());
+    _pfc_rdb_memtable_total_count.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read memtable total count");
+
+    snprintf(name, 255, "rdb.read_l0_hit_count@%s", str_gpid.c_str());
+    _pfc_rdb_l0_hit_count.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l0 hit count");
+
+    snprintf(name, 255, "rdb.read_l1_hit_count@%s", str_gpid.c_str());
+    _pfc_rdb_l1_hit_count.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l1 hit count");
+
+    snprintf(name, 255, "rdb.read_l2andup_hit_count@%s", str_gpid.c_str());
+    _pfc_rdb_l2andup_hit_count.init_app_counter(
+        "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l2andup hit count");
+
     // These counters are singletons on this server shared by all replicas, so we initialize
     // them only once.
     static std::once_flag flag;

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org