You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by ji...@apache.org on 2021/07/08 10:37:47 UTC
[incubator-pegasus] branch master updated: feat: add more rocksdb
perf-counter support (#774)
This is an automated email from the ASF dual-hosted git repository.
jiashuo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
The following commit(s) were added to refs/heads/master by this push:
new 32719d6 feat: add more rocksdb perf-counter support (#774)
32719d6 is described below
commit 32719d686aa96f421d6a1adc07f0756125789664
Author: Jiashuo <js...@live.com>
AuthorDate: Thu Jul 8 18:37:38 2021 +0800
feat: add more rocksdb perf-counter support (#774)
---
.gitignore | 4 ++
src/server/pegasus_server_impl.cpp | 94 ++++++++++++++++++++++++++-------
src/server/pegasus_server_impl.h | 32 ++++++-----
src/server/pegasus_server_impl_init.cpp | 68 +++++++++++++++++++++++-
4 files changed, 166 insertions(+), 32 deletions(-)
diff --git a/.gitignore b/.gitignore
index 6b08d1d..daebca2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,3 +97,7 @@ cmake-build-debug
packages
src/test/function_test/pegasus-bulk-load-function-test-files/
+config-shell.ini.*
+*.tar.gz
+pegasus-server*
+*.log
diff --git a/src/server/pegasus_server_impl.cpp b/src/server/pegasus_server_impl.cpp
index f759ad8..637a6dc 100644
--- a/src/server/pegasus_server_impl.cpp
+++ b/src/server/pegasus_server_impl.cpp
@@ -46,6 +46,7 @@ namespace pegasus {
namespace server {
DEFINE_TASK_CODE(LPC_PEGASUS_SERVER_DELAY, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)
+DSN_DECLARE_int32(read_amp_bytes_per_bit);
DSN_DEFINE_int32("pegasus.server",
hotkey_analyse_time_interval_s,
@@ -150,7 +151,7 @@ void pegasus_server_impl::gc_checkpoints(bool force_reserve_one)
dwarn("get last write time of file %s failed", current_file.c_str());
break;
}
- uint64_t last_write_time = (uint64_t)tm;
+ auto last_write_time = (uint64_t)tm;
if (last_write_time + reserve_time >= current_time) {
// not expired
break;
@@ -1531,7 +1532,7 @@ void pegasus_server_impl::on_clear_scanner(const int64_t &args) { _context_cache
update_usage_scenario(envs);
}
- dinfo_replica("start the update rocksdb statistics timer task");
+ dinfo_replica("start the update replica-level rocksdb statistics timer task");
_update_replica_rdb_stat =
::dsn::tasking::enqueue_timer(LPC_REPLICATION_LONG_COMMON,
&_tracker,
@@ -1543,8 +1544,8 @@ void pegasus_server_impl::on_clear_scanner(const int64_t &args) { _context_cache
static std::once_flag flag;
std::call_once(flag, [&]() {
// The timer task will always running even though there is no replicas
- dassert(kServerStatUpdateTimeSec.count() != 0,
- "kServerStatUpdateTimeSec shouldn't be zero");
+ dassert_f(kServerStatUpdateTimeSec.count() != 0,
+ "kServerStatUpdateTimeSec shouldn't be zero");
_update_server_rdb_stat = ::dsn::tasking::enqueue_timer(
LPC_REPLICATION_LONG_COMMON,
nullptr, // TODO: the tracker is nullptr, we will fix it later
@@ -1595,6 +1596,10 @@ void pegasus_server_impl::cancel_background_work(bool wait)
_update_replica_rdb_stat->cancel(true);
_update_replica_rdb_stat = nullptr;
}
+ if (_update_server_rdb_stat != nullptr) {
+ _update_server_rdb_stat->cancel(true);
+ _update_server_rdb_stat = nullptr;
+ }
_tracker.cancel_outstanding_tasks();
_context_cache.clear();
@@ -2227,15 +2232,16 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
dinfo_replica("_pfc_rdb_sst_size: {} bytes", val);
}
- // Update _pfc_rdb_block_cache_hit_count and _pfc_rdb_block_cache_total_count
- uint64_t block_cache_hit = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_HIT);
- _pfc_rdb_block_cache_hit_count->set(block_cache_hit);
- dinfo_replica("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit);
-
- uint64_t block_cache_miss = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_MISS);
- uint64_t block_cache_total = block_cache_hit + block_cache_miss;
- _pfc_rdb_block_cache_total_count->set(block_cache_total);
- dinfo_replica("_pfc_rdb_block_cache_total_count: {}", block_cache_total);
+ // Update _pfc_rdb_write_amplification
+ std::map<std::string, std::string> props;
+ if (_db->GetMapProperty(_data_cf, "rocksdb.cfstats", &props)) {
+ auto write_amplification_iter = props.find("compaction.Sum.WriteAmp");
+ auto write_amplification = write_amplification_iter == props.end()
+ ? 1
+ : std::stod(write_amplification_iter->second);
+ _pfc_rdb_write_amplification->set(write_amplification);
+ dinfo_replica("_pfc_rdb_write_amplification: {}", write_amplification);
+ }
// Update _pfc_rdb_index_and_filter_blocks_mem_usage
if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kEstimateTableReadersMem, &str_val) &&
@@ -2260,32 +2266,82 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
dinfo_replica("_pfc_rdb_estimate_num_keys: {}", val);
}
+ // the follow stats is related to `read`, so only primary need update it,ignore
+ // `backup-request` case
+ if (!is_primary()) {
+ return;
+ }
+
+ // Update _pfc_rdb_read_amplification
+ if (FLAGS_read_amp_bytes_per_bit > 0) {
+ auto estimate_useful_bytes =
+ _statistics->getTickerCount(rocksdb::READ_AMP_ESTIMATE_USEFUL_BYTES);
+ if (estimate_useful_bytes) {
+ auto read_amplification =
+ _statistics->getTickerCount(rocksdb::READ_AMP_TOTAL_READ_BYTES) /
+ estimate_useful_bytes;
+ _pfc_rdb_read_amplification->set(read_amplification);
+ dinfo_replica("_pfc_rdb_read_amplification: {}", read_amplification);
+ }
+ }
+
// Update _pfc_rdb_bf_seek_negatives
- uint64_t bf_seek_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_USEFUL);
+ auto bf_seek_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_USEFUL);
_pfc_rdb_bf_seek_negatives->set(bf_seek_negatives);
dinfo_replica("_pfc_rdb_bf_seek_negatives: {}", bf_seek_negatives);
// Update _pfc_rdb_bf_seek_total
- uint64_t bf_seek_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_CHECKED);
+ auto bf_seek_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_CHECKED);
_pfc_rdb_bf_seek_total->set(bf_seek_total);
dinfo_replica("_pfc_rdb_bf_seek_total: {}", bf_seek_total);
// Update _pfc_rdb_bf_point_positive_true
- uint64_t bf_point_positive_true =
+ auto bf_point_positive_true =
_statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_TRUE_POSITIVE);
_pfc_rdb_bf_point_positive_true->set(bf_point_positive_true);
dinfo_replica("_pfc_rdb_bf_point_positive_true: {}", bf_point_positive_true);
// Update _pfc_rdb_bf_point_positive_total
- uint64_t bf_point_positive_total =
- _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_POSITIVE);
+ auto bf_point_positive_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_POSITIVE);
_pfc_rdb_bf_point_positive_total->set(bf_point_positive_total);
dinfo_replica("_pfc_rdb_bf_point_positive_total: {}", bf_point_positive_total);
// Update _pfc_rdb_bf_point_negatives
- uint64_t bf_point_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_USEFUL);
+ auto bf_point_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_USEFUL);
_pfc_rdb_bf_point_negatives->set(bf_point_negatives);
dinfo_replica("_pfc_rdb_bf_point_negatives: {}", bf_point_negatives);
+
+ // Update _pfc_rdb_block_cache_hit_count and _pfc_rdb_block_cache_total_count
+ auto block_cache_hit = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_HIT);
+ _pfc_rdb_block_cache_hit_count->set(block_cache_hit);
+ dinfo_replica("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit);
+
+ auto block_cache_miss = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_MISS);
+ auto block_cache_total = block_cache_hit + block_cache_miss;
+ _pfc_rdb_block_cache_total_count->set(block_cache_total);
+ dinfo_replica("_pfc_rdb_block_cache_total_count: {}", block_cache_total);
+
+ // update block memtable/l0/l1/l2andup hit rate under block cache up level
+ auto memtable_hit_count = _statistics->getTickerCount(rocksdb::MEMTABLE_HIT);
+ _pfc_rdb_memtable_hit_count->set(memtable_hit_count);
+ dinfo_replica("_pfc_rdb_memtable_hit_count: {}", memtable_hit_count);
+
+ auto memtable_miss_count = _statistics->getTickerCount(rocksdb::MEMTABLE_MISS);
+ auto memtable_total = memtable_hit_count + memtable_miss_count;
+ _pfc_rdb_memtable_total_count->set(memtable_total);
+ dinfo_replica("_pfc_rdb_memtable_total_count: {}", memtable_total);
+
+ auto l0_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L0);
+ _pfc_rdb_l0_hit_count->set(l0_hit_count);
+ dinfo_replica("_pfc_rdb_l0_hit_count: {}", l0_hit_count);
+
+ auto l1_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L1);
+ _pfc_rdb_l1_hit_count->set(l1_hit_count);
+ dinfo_replica("_pfc_rdb_l1_hit_count: {}", l1_hit_count);
+
+ auto l2andup_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L2_AND_UP);
+ _pfc_rdb_l2andup_hit_count->set(l2andup_hit_count);
+ dinfo_replica("_pfc_rdb_l2andup_hit_count: {}", l2andup_hit_count);
}
void pegasus_server_impl::update_server_rocksdb_statistics()
diff --git a/src/server/pegasus_server_impl.h b/src/server/pegasus_server_impl.h
index da8047a..48613f3 100644
--- a/src/server/pegasus_server_impl.h
+++ b/src/server/pegasus_server_impl.h
@@ -442,18 +442,26 @@ private:
static ::dsn::perf_counter_wrapper _pfc_rdb_write_limiter_rate_bytes;
static ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_mem_usage;
// replica level
- ::dsn::perf_counter_wrapper _pfc_rdb_sst_count;
- ::dsn::perf_counter_wrapper _pfc_rdb_sst_size;
- ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_hit_count;
- ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_total_count;
- ::dsn::perf_counter_wrapper _pfc_rdb_index_and_filter_blocks_mem_usage;
- ::dsn::perf_counter_wrapper _pfc_rdb_memtable_mem_usage;
- ::dsn::perf_counter_wrapper _pfc_rdb_estimate_num_keys;
- ::dsn::perf_counter_wrapper _pfc_rdb_bf_seek_negatives;
- ::dsn::perf_counter_wrapper _pfc_rdb_bf_seek_total;
- ::dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_true;
- ::dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_total;
- ::dsn::perf_counter_wrapper _pfc_rdb_bf_point_negatives;
+ dsn::perf_counter_wrapper _pfc_rdb_sst_count;
+ dsn::perf_counter_wrapper _pfc_rdb_sst_size;
+ dsn::perf_counter_wrapper _pfc_rdb_index_and_filter_blocks_mem_usage;
+ dsn::perf_counter_wrapper _pfc_rdb_memtable_mem_usage;
+ dsn::perf_counter_wrapper _pfc_rdb_estimate_num_keys;
+
+ dsn::perf_counter_wrapper _pfc_rdb_bf_seek_negatives;
+ dsn::perf_counter_wrapper _pfc_rdb_bf_seek_total;
+ dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_true;
+ dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_total;
+ dsn::perf_counter_wrapper _pfc_rdb_bf_point_negatives;
+ dsn::perf_counter_wrapper _pfc_rdb_block_cache_hit_count;
+ dsn::perf_counter_wrapper _pfc_rdb_block_cache_total_count;
+ dsn::perf_counter_wrapper _pfc_rdb_write_amplification;
+ dsn::perf_counter_wrapper _pfc_rdb_read_amplification;
+ dsn::perf_counter_wrapper _pfc_rdb_memtable_hit_count;
+ dsn::perf_counter_wrapper _pfc_rdb_memtable_total_count;
+ dsn::perf_counter_wrapper _pfc_rdb_l0_hit_count;
+ dsn::perf_counter_wrapper _pfc_rdb_l1_hit_count;
+ dsn::perf_counter_wrapper _pfc_rdb_l2andup_hit_count;
};
} // namespace server
diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp
index 414e2c4..6b38e63 100644
--- a/src/server/pegasus_server_impl_init.cpp
+++ b/src/server/pegasus_server_impl_init.cpp
@@ -44,6 +44,42 @@ DSN_DEFINE_bool("pegasus.server",
false,
"whether to enable write rate auto tune when open rocksdb write limit");
+// If used, For every data block we load into memory, we will create a bitmap
+// of size ((block_size / `read_amp_bytes_per_bit`) / 8) bytes. This bitmap
+// will be used to figure out the percentage we actually read of the blocks.
+//
+// When this feature is used Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES and
+// Tickers::READ_AMP_TOTAL_READ_BYTES can be used to calculate the
+// read amplification using this formula
+// (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES)
+//
+// value => memory usage (percentage of loaded blocks memory)
+// 1 => 12.50 %
+// 2 => 06.25 %
+// 4 => 03.12 %
+// 8 => 01.56 %
+// 16 => 00.78 %
+//
+// Note: This number must be a power of 2, if not it will be sanitized
+// to be the next lowest power of 2, for example a value of 7 will be
+// treated as 4, a value of 19 will be treated as 16.
+//
+// Default: 0 (disabled)
+// see https://github.com/XiaoMi/pegasus-rocksdb/blob/v6.6.4-compatible/include/rocksdb/table.h#L247
+DSN_DEFINE_int32("pegasus.server",
+ read_amp_bytes_per_bit,
+ 0,
+ "config for using to calculate the "
+ "read amplification, must be a power "
+ "of 2, zero means disable count read "
+ "amplification");
+
+DSN_DEFINE_validator(read_amp_bytes_per_bit, [](const int64_t read_amp_bytes_per_bit) -> bool {
+ return read_amp_bytes_per_bit == 0 ||
+ (read_amp_bytes_per_bit > 0 &&
+ (read_amp_bytes_per_bit & (read_amp_bytes_per_bit - 1)) == 0);
+});
+
static const std::unordered_map<std::string, rocksdb::BlockBasedTableOptions::IndexType>
INDEX_TYPE_STRING_MAP = {
{"binary_search", rocksdb::BlockBasedTableOptions::IndexType::kBinarySearch},
@@ -260,6 +296,8 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
"parse rocksdb_compression_type failed.");
rocksdb::BlockBasedTableOptions tbl_opts;
+ tbl_opts.read_amp_bytes_per_bit = FLAGS_read_amp_bytes_per_bit;
+
if (dsn_config_get_value_bool("pegasus.server",
"rocksdb_disable_table_block_cache",
false,
@@ -501,7 +539,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
_checkpoint_reserve_time_seconds = _checkpoint_reserve_time_seconds_in_config;
_update_rdb_stat_interval = std::chrono::seconds(dsn_config_get_value_uint64(
- "pegasus.server", "update_rdb_stat_interval", 600, "update_rdb_stat_interval, in seconds"));
+ "pegasus.server", "update_rdb_stat_interval", 60, "update_rdb_stat_interval, in seconds"));
// TODO: move the qps/latency counters and it's statistics to replication_app_base layer
std::string str_gpid = _gpid.to_string();
@@ -575,6 +613,34 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
COUNTER_TYPE_NUMBER,
"statistic the total count of rocksdb block cache");
+ snprintf(name, 255, "rdb.write_amplification@%s", str_gpid.c_str());
+ _pfc_rdb_write_amplification.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the write amplification of rocksdb");
+
+ snprintf(name, 255, "rdb.read_amplification@%s", str_gpid.c_str());
+ _pfc_rdb_read_amplification.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read amplification of rocksdb");
+
+ snprintf(name, 255, "rdb.read_memtable_hit_count@%s", str_gpid.c_str());
+ _pfc_rdb_memtable_hit_count.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read memtable hit count");
+
+ snprintf(name, 255, "rdb.read_memtable_total_count@%s", str_gpid.c_str());
+ _pfc_rdb_memtable_total_count.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read memtable total count");
+
+ snprintf(name, 255, "rdb.read_l0_hit_count@%s", str_gpid.c_str());
+ _pfc_rdb_l0_hit_count.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l0 hit count");
+
+ snprintf(name, 255, "rdb.read_l1_hit_count@%s", str_gpid.c_str());
+ _pfc_rdb_l1_hit_count.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l1 hit count");
+
+ snprintf(name, 255, "rdb.read_l2andup_hit_count@%s", str_gpid.c_str());
+ _pfc_rdb_l2andup_hit_count.init_app_counter(
+ "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l2andup hit count");
+
// These counters are singletons on this server shared by all replicas, so we initialize
// them only once.
static std::once_flag flag;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org