You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pegasus.apache.org by wa...@apache.org on 2023/06/21 04:24:17 UTC
[incubator-pegasus] 04/38: feat(new_metrics): migrate replica-level metrics for pegasus_server_impl (part 2) (#1386)
This is an automated email from the ASF dual-hosted git repository.
wangdan pushed a commit to branch migrate-metrics-dev
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
commit 6902d4b68796bb9555dae257df1c9c5ad5ffe240
Author: Dan Wang <wa...@apache.org>
AuthorDate: Fri Mar 10 17:05:59 2023 +0800
feat(new_metrics): migrate replica-level metrics for pegasus_server_impl (part 2) (#1386)
This PR is to migrate replica-level metrics of pegasus_server_impl to new framework,
2nd part, for #1333.
This PR focuses on migrating all rocksdb-related metrics for each replica, including
total number and size of sst files, estimated number of keys, memory usage and hit
rate, write/read amplification, negatives/positives of bloom filters.
---
src/server/pegasus_server_impl.cpp | 150 ++++++-----------
src/server/pegasus_server_impl.h | 48 +++---
src/server/pegasus_server_impl_init.cpp | 275 ++++++++++++++++++--------------
src/utils/metrics.h | 12 +-
4 files changed, 238 insertions(+), 247 deletions(-)
diff --git a/src/server/pegasus_server_impl.cpp b/src/server/pegasus_server_impl.cpp
index ab6c4dc1c..7d0dad595 100644
--- a/src/server/pegasus_server_impl.cpp
+++ b/src/server/pegasus_server_impl.cpp
@@ -360,6 +360,15 @@ void pegasus_server_impl::log_expired_data(const char *op,
} \
} while (0)
+#define CHECK_READ_THROTTLING() \
+ do { \
+ if (dsn_unlikely(!_read_size_throttling_controller->available())) { \
+ rpc.error() = dsn::ERR_BUSY; \
+ METRIC_VAR_INCREMENT(throttling_rejected_read_requests); \
+ return; \
+ } \
+ } while (0)
+
void pegasus_server_impl::on_get(get_rpc rpc)
{
CHECK_TRUE(_is_open);
@@ -376,11 +385,7 @@ void pegasus_server_impl::on_get(get_rpc rpc)
return;
}
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
METRIC_VAR_AUTO_LATENCY(get_latency_ns);
@@ -453,11 +458,7 @@ void pegasus_server_impl::on_multi_get(multi_get_rpc rpc)
resp.partition_index = _gpid.get_partition_index();
resp.server = _primary_address;
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
METRIC_VAR_AUTO_LATENCY(multi_get_latency_ns);
@@ -866,11 +867,7 @@ void pegasus_server_impl::on_batch_get(batch_get_rpc rpc)
response.partition_index = _gpid.get_partition_index();
response.server = _primary_address;
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
METRIC_VAR_AUTO_LATENCY(batch_get_latency_ns);
@@ -982,11 +979,7 @@ void pegasus_server_impl::on_sortkey_count(sortkey_count_rpc rpc)
resp.partition_index = _gpid.get_partition_index();
resp.server = _primary_address;
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
METRIC_VAR_AUTO_LATENCY(scan_latency_ns);
@@ -1059,11 +1052,7 @@ void pegasus_server_impl::on_ttl(ttl_rpc rpc)
resp.partition_index = _gpid.get_partition_index();
resp.server = _primary_address;
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
rocksdb::Slice skey(key.data(), key.length());
std::string value;
@@ -1123,11 +1112,7 @@ void pegasus_server_impl::on_get_scanner(get_scanner_rpc rpc)
resp.partition_index = _gpid.get_partition_index();
resp.server = _primary_address;
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
METRIC_VAR_AUTO_LATENCY(scan_latency_ns);
@@ -1374,11 +1359,7 @@ void pegasus_server_impl::on_scan(scan_rpc rpc)
resp.partition_index = _gpid.get_partition_index();
resp.server = _primary_address;
- if (!_read_size_throttling_controller->available()) {
- rpc.error() = dsn::ERR_BUSY;
- _counter_recent_read_throttling_reject_count->increment();
- return;
- }
+ CHECK_READ_THROTTLING();
METRIC_VAR_AUTO_LATENCY(scan_latency_ns);
@@ -1877,13 +1858,13 @@ void pegasus_server_impl::cancel_background_work(bool wait)
LOG_ERROR_PREFIX("rmdir {} failed when stop app", data_dir());
return ::dsn::ERR_FILE_OPERATION_FAILED;
}
- _pfc_rdb_sst_count->set(0);
- _pfc_rdb_sst_size->set(0);
- _pfc_rdb_block_cache_hit_count->set(0);
- _pfc_rdb_block_cache_total_count->set(0);
+ METRIC_VAR_SET(rdb_total_sst_files, 0);
+ METRIC_VAR_SET(rdb_total_sst_size_mb, 0);
+ METRIC_VAR_SET(rdb_index_and_filter_blocks_mem_usage_bytes, 0);
+ METRIC_VAR_SET(rdb_memtable_mem_usage_bytes, 0);
+ METRIC_VAR_SET(rdb_block_cache_hit_count, 0);
+ METRIC_VAR_SET(rdb_block_cache_total_count, 0);
_pfc_rdb_block_cache_mem_usage->set(0);
- _pfc_rdb_index_and_filter_blocks_mem_usage->set(0);
- _pfc_rdb_memtable_mem_usage->set(0);
}
LOG_INFO_PREFIX("close app succeed, clear_state = {}", clear_state ? "true" : "false");
@@ -2447,12 +2428,16 @@ range_iteration_state pegasus_server_impl::append_key_value_for_multi_get(
return range_iteration_state::kNormal;
}
+#define GET_TICKER_COUNT_AND_SET_METRIC(ticker_name, metric_name) \
+ do { \
+ METRIC_VAR_SET(metric_name, _statistics->getTickerCount(rocksdb::ticker_name)); \
+ } while (0)
+
void pegasus_server_impl::update_replica_rocksdb_statistics()
{
std::string str_val;
uint64_t val = 0;
- // Update _pfc_rdb_sst_count
for (int i = 0; i < _data_cf_opts.num_levels; ++i) {
int cur_level_count = 0;
if (_db->GetProperty(rocksdb::DB::Properties::kNumFilesAtLevelPrefix + std::to_string(i),
@@ -2461,49 +2446,38 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
val += cur_level_count;
}
}
- _pfc_rdb_sst_count->set(val);
- LOG_DEBUG_PREFIX("_pfc_rdb_sst_count: {}", val);
+ METRIC_VAR_SET(rdb_total_sst_files, val);
- // Update _pfc_rdb_sst_size
if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kTotalSstFilesSize, &str_val) &&
dsn::buf2uint64(str_val, val)) {
static uint64_t bytes_per_mb = 1U << 20U;
- _pfc_rdb_sst_size->set(val / bytes_per_mb);
- LOG_DEBUG_PREFIX("_pfc_rdb_sst_size: {} bytes", val);
+ METRIC_VAR_SET(rdb_total_sst_size_mb, val / bytes_per_mb);
}
- // Update _pfc_rdb_write_amplification
std::map<std::string, std::string> props;
if (_db->GetMapProperty(_data_cf, "rocksdb.cfstats", &props)) {
auto write_amplification_iter = props.find("compaction.Sum.WriteAmp");
auto write_amplification = write_amplification_iter == props.end()
? 1
: std::stod(write_amplification_iter->second);
- _pfc_rdb_write_amplification->set(write_amplification);
- LOG_DEBUG_PREFIX("_pfc_rdb_write_amplification: {}", write_amplification);
+ METRIC_VAR_SET(rdb_write_amplification, write_amplification);
}
- // Update _pfc_rdb_index_and_filter_blocks_mem_usage
if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kEstimateTableReadersMem, &str_val) &&
dsn::buf2uint64(str_val, val)) {
- _pfc_rdb_index_and_filter_blocks_mem_usage->set(val);
- LOG_DEBUG_PREFIX("_pfc_rdb_index_and_filter_blocks_mem_usage: {} bytes", val);
+ METRIC_VAR_SET(rdb_index_and_filter_blocks_mem_usage_bytes, val);
}
- // Update _pfc_rdb_memtable_mem_usage
if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kCurSizeAllMemTables, &str_val) &&
dsn::buf2uint64(str_val, val)) {
- _pfc_rdb_memtable_mem_usage->set(val);
- LOG_DEBUG_PREFIX("_pfc_rdb_memtable_mem_usage: {} bytes", val);
+ METRIC_VAR_SET(rdb_memtable_mem_usage_bytes, val);
}
- // Update _pfc_rdb_estimate_num_keys
// NOTE: for the same n kv pairs, kEstimateNumKeys will be counted n times, you need compaction
// to remove duplicate
if (_db->GetProperty(_data_cf, rocksdb::DB::Properties::kEstimateNumKeys, &str_val) &&
dsn::buf2uint64(str_val, val)) {
- _pfc_rdb_estimate_num_keys->set(val);
- LOG_DEBUG_PREFIX("_pfc_rdb_estimate_num_keys: {}", val);
+ METRIC_VAR_SET(rdb_estimated_keys, val);
}
// the follow stats is related to `read`, so only primary need update it,ignore
@@ -2512,7 +2486,6 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
return;
}
- // Update _pfc_rdb_read_amplification
if (FLAGS_read_amp_bytes_per_bit > 0) {
auto estimate_useful_bytes =
_statistics->getTickerCount(rocksdb::READ_AMP_ESTIMATE_USEFUL_BYTES);
@@ -2520,68 +2493,41 @@ void pegasus_server_impl::update_replica_rocksdb_statistics()
auto read_amplification =
_statistics->getTickerCount(rocksdb::READ_AMP_TOTAL_READ_BYTES) /
estimate_useful_bytes;
- _pfc_rdb_read_amplification->set(read_amplification);
- LOG_DEBUG_PREFIX("_pfc_rdb_read_amplification: {}", read_amplification);
+ METRIC_VAR_SET(rdb_read_amplification, read_amplification);
}
}
- // Update _pfc_rdb_bf_seek_negatives
- auto bf_seek_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_USEFUL);
- _pfc_rdb_bf_seek_negatives->set(bf_seek_negatives);
- LOG_DEBUG_PREFIX("_pfc_rdb_bf_seek_negatives: {}", bf_seek_negatives);
+ GET_TICKER_COUNT_AND_SET_METRIC(BLOOM_FILTER_PREFIX_USEFUL, rdb_bloom_filter_seek_negatives);
- // Update _pfc_rdb_bf_seek_total
- auto bf_seek_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_PREFIX_CHECKED);
- _pfc_rdb_bf_seek_total->set(bf_seek_total);
- LOG_DEBUG_PREFIX("_pfc_rdb_bf_seek_total: {}", bf_seek_total);
+ GET_TICKER_COUNT_AND_SET_METRIC(BLOOM_FILTER_PREFIX_CHECKED, rdb_bloom_filter_seek_total);
- // Update _pfc_rdb_bf_point_positive_true
- auto bf_point_positive_true =
- _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_TRUE_POSITIVE);
- _pfc_rdb_bf_point_positive_true->set(bf_point_positive_true);
- LOG_DEBUG_PREFIX("_pfc_rdb_bf_point_positive_true: {}", bf_point_positive_true);
+ GET_TICKER_COUNT_AND_SET_METRIC(BLOOM_FILTER_USEFUL, rdb_bloom_filter_point_lookup_negatives);
- // Update _pfc_rdb_bf_point_positive_total
- auto bf_point_positive_total = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_FULL_POSITIVE);
- _pfc_rdb_bf_point_positive_total->set(bf_point_positive_total);
- LOG_DEBUG_PREFIX("_pfc_rdb_bf_point_positive_total: {}", bf_point_positive_total);
+ GET_TICKER_COUNT_AND_SET_METRIC(BLOOM_FILTER_FULL_POSITIVE,
+ rdb_bloom_filter_point_lookup_positives);
- // Update _pfc_rdb_bf_point_negatives
- auto bf_point_negatives = _statistics->getTickerCount(rocksdb::BLOOM_FILTER_USEFUL);
- _pfc_rdb_bf_point_negatives->set(bf_point_negatives);
- LOG_DEBUG_PREFIX("_pfc_rdb_bf_point_negatives: {}", bf_point_negatives);
+ GET_TICKER_COUNT_AND_SET_METRIC(BLOOM_FILTER_FULL_TRUE_POSITIVE,
+ rdb_bloom_filter_point_lookup_true_positives);
- // Update _pfc_rdb_block_cache_hit_count and _pfc_rdb_block_cache_total_count
auto block_cache_hit = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_HIT);
- _pfc_rdb_block_cache_hit_count->set(block_cache_hit);
- LOG_DEBUG_PREFIX("_pfc_rdb_block_cache_hit_count: {}", block_cache_hit);
+ METRIC_VAR_SET(rdb_block_cache_hit_count, block_cache_hit);
auto block_cache_miss = _statistics->getTickerCount(rocksdb::BLOCK_CACHE_MISS);
auto block_cache_total = block_cache_hit + block_cache_miss;
- _pfc_rdb_block_cache_total_count->set(block_cache_total);
- LOG_DEBUG_PREFIX("_pfc_rdb_block_cache_total_count: {}", block_cache_total);
+ METRIC_VAR_SET(rdb_block_cache_total_count, block_cache_total);
- // update block memtable/l0/l1/l2andup hit rate under block cache up level
auto memtable_hit_count = _statistics->getTickerCount(rocksdb::MEMTABLE_HIT);
- _pfc_rdb_memtable_hit_count->set(memtable_hit_count);
- LOG_DEBUG_PREFIX("_pfc_rdb_memtable_hit_count: {}", memtable_hit_count);
+ METRIC_VAR_SET(rdb_memtable_hit_count, memtable_hit_count);
auto memtable_miss_count = _statistics->getTickerCount(rocksdb::MEMTABLE_MISS);
auto memtable_total = memtable_hit_count + memtable_miss_count;
- _pfc_rdb_memtable_total_count->set(memtable_total);
- LOG_DEBUG_PREFIX("_pfc_rdb_memtable_total_count: {}", memtable_total);
+ METRIC_VAR_SET(rdb_memtable_total_count, memtable_total);
- auto l0_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L0);
- _pfc_rdb_l0_hit_count->set(l0_hit_count);
- LOG_DEBUG_PREFIX("_pfc_rdb_l0_hit_count: {}", l0_hit_count);
+ GET_TICKER_COUNT_AND_SET_METRIC(GET_HIT_L0, rdb_l0_hit_count);
- auto l1_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L1);
- _pfc_rdb_l1_hit_count->set(l1_hit_count);
- LOG_DEBUG_PREFIX("_pfc_rdb_l1_hit_count: {}", l1_hit_count);
+ GET_TICKER_COUNT_AND_SET_METRIC(GET_HIT_L1, rdb_l1_hit_count);
- auto l2andup_hit_count = _statistics->getTickerCount(rocksdb::GET_HIT_L2_AND_UP);
- _pfc_rdb_l2andup_hit_count->set(l2andup_hit_count);
- LOG_DEBUG_PREFIX("_pfc_rdb_l2andup_hit_count: {}", l2andup_hit_count);
+ GET_TICKER_COUNT_AND_SET_METRIC(GET_HIT_L2_AND_UP, rdb_l2_and_up_hit_count);
}
void pegasus_server_impl::update_server_rocksdb_statistics()
diff --git a/src/server/pegasus_server_impl.h b/src/server/pegasus_server_impl.h
index 7203d0ce2..c107089af 100644
--- a/src/server/pegasus_server_impl.h
+++ b/src/server/pegasus_server_impl.h
@@ -539,34 +539,36 @@ private:
METRIC_VAR_DECLARE_counter(read_expired_values);
METRIC_VAR_DECLARE_counter(read_filtered_values);
METRIC_VAR_DECLARE_counter(abnormal_read_requests);
+ METRIC_VAR_DECLARE_counter(throttling_rejected_read_requests);
// rocksdb internal statistics
// server level
static ::dsn::perf_counter_wrapper _pfc_rdb_write_limiter_rate_bytes;
static ::dsn::perf_counter_wrapper _pfc_rdb_block_cache_mem_usage;
- // replica level
- dsn::perf_counter_wrapper _pfc_rdb_sst_count;
- dsn::perf_counter_wrapper _pfc_rdb_sst_size;
- dsn::perf_counter_wrapper _pfc_rdb_index_and_filter_blocks_mem_usage;
- dsn::perf_counter_wrapper _pfc_rdb_memtable_mem_usage;
- dsn::perf_counter_wrapper _pfc_rdb_estimate_num_keys;
-
- dsn::perf_counter_wrapper _pfc_rdb_bf_seek_negatives;
- dsn::perf_counter_wrapper _pfc_rdb_bf_seek_total;
- dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_true;
- dsn::perf_counter_wrapper _pfc_rdb_bf_point_positive_total;
- dsn::perf_counter_wrapper _pfc_rdb_bf_point_negatives;
- dsn::perf_counter_wrapper _pfc_rdb_block_cache_hit_count;
- dsn::perf_counter_wrapper _pfc_rdb_block_cache_total_count;
- dsn::perf_counter_wrapper _pfc_rdb_write_amplification;
- dsn::perf_counter_wrapper _pfc_rdb_read_amplification;
- dsn::perf_counter_wrapper _pfc_rdb_memtable_hit_count;
- dsn::perf_counter_wrapper _pfc_rdb_memtable_total_count;
- dsn::perf_counter_wrapper _pfc_rdb_l0_hit_count;
- dsn::perf_counter_wrapper _pfc_rdb_l1_hit_count;
- dsn::perf_counter_wrapper _pfc_rdb_l2andup_hit_count;
-
- dsn::perf_counter_wrapper _counter_recent_read_throttling_reject_count;
+
+ // Replica-level metrics for rocksdb.
+ METRIC_VAR_DECLARE_gauge_int64(rdb_total_sst_files);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_total_sst_size_mb);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_estimated_keys);
+
+ METRIC_VAR_DECLARE_gauge_int64(rdb_index_and_filter_blocks_mem_usage_bytes);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_memtable_mem_usage_bytes);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_block_cache_hit_count);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_block_cache_total_count);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_memtable_hit_count);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_memtable_total_count);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_l0_hit_count);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_l1_hit_count);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_l2_and_up_hit_count);
+
+ METRIC_VAR_DECLARE_gauge_int64(rdb_write_amplification);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_read_amplification);
+
+ METRIC_VAR_DECLARE_gauge_int64(rdb_bloom_filter_seek_negatives);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_bloom_filter_seek_total);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_bloom_filter_point_lookup_negatives);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_bloom_filter_point_lookup_positives);
+ METRIC_VAR_DECLARE_gauge_int64(rdb_bloom_filter_point_lookup_true_positives);
};
} // namespace server
diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp
index 27aebdfab..0840c094b 100644
--- a/src/server/pegasus_server_impl_init.cpp
+++ b/src/server/pegasus_server_impl_init.cpp
@@ -65,57 +65,179 @@ class replica;
METRIC_DEFINE_counter(replica,
get_requests,
dsn::metric_unit::kRequests,
- "The number of GET requests for each replica");
+ "The number of GET requests");
METRIC_DEFINE_counter(replica,
multi_get_requests,
dsn::metric_unit::kRequests,
- "The number of MULTI_GET requests for each replica");
+ "The number of MULTI_GET requests");
METRIC_DEFINE_counter(replica,
batch_get_requests,
dsn::metric_unit::kRequests,
- "The number of BATCH_GET requests for each replica");
+ "The number of BATCH_GET requests");
METRIC_DEFINE_counter(replica,
scan_requests,
dsn::metric_unit::kRequests,
- "The number of SCAN requests for each replica");
+ "The number of SCAN requests");
METRIC_DEFINE_percentile_int64(replica,
get_latency_ns,
dsn::metric_unit::kNanoSeconds,
- "The latency of GET requests for each replica");
+ "The latency of GET requests");
METRIC_DEFINE_percentile_int64(replica,
multi_get_latency_ns,
dsn::metric_unit::kNanoSeconds,
- "The latency of MULTI_GET requests for each replica");
+ "The latency of MULTI_GET requests");
METRIC_DEFINE_percentile_int64(replica,
batch_get_latency_ns,
dsn::metric_unit::kNanoSeconds,
- "The latency of BATCH_GET requests for each replica");
+ "The latency of BATCH_GET requests");
METRIC_DEFINE_percentile_int64(replica,
scan_latency_ns,
dsn::metric_unit::kNanoSeconds,
- "The latency of SCAN requests for each replica");
+ "The latency of SCAN requests");
METRIC_DEFINE_counter(replica,
read_expired_values,
dsn::metric_unit::kValues,
- "The number of expired values read for each replica");
+ "The number of expired values read");
METRIC_DEFINE_counter(replica,
read_filtered_values,
dsn::metric_unit::kValues,
- "The number of filtered values read for each replica");
+ "The number of filtered values read");
METRIC_DEFINE_counter(replica,
abnormal_read_requests,
dsn::metric_unit::kRequests,
- "The number of abnormal read requests for each replica");
+ "The number of abnormal read requests");
+
+METRIC_DEFINE_counter(replica,
+ throttling_rejected_read_requests,
+ dsn::metric_unit::kRequests,
+ "The number of rejected read requests by throttling");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_total_sst_files,
+ dsn::metric_unit::kFiles,
+ "The total number of rocksdb sst files");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_total_sst_size_mb,
+ dsn::metric_unit::kMegaBytes,
+ "The total size of rocksdb sst files in MB");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_estimated_keys,
+ dsn::metric_unit::kKeys,
+ "The estimated number of rocksdb keys");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_index_and_filter_blocks_mem_usage_bytes,
+ dsn::metric_unit::kBytes,
+ "The memory usage of rocksdb index and filter blocks in bytes");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_memtable_mem_usage_bytes,
+ dsn::metric_unit::kBytes,
+ "The memory usage of rocksdb memtables in bytes");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_block_cache_hit_count,
+ dsn::metric_unit::kPointLookups,
+ "The hit number of lookups on rocksdb block cache");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_block_cache_total_count,
+ dsn::metric_unit::kPointLookups,
+ "The total number of lookups on rocksdb block cache");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_memtable_hit_count,
+ dsn::metric_unit::kPointLookups,
+ "The hit number of lookups on rocksdb memtable");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_memtable_total_count,
+ dsn::metric_unit::kPointLookups,
+ "The total number of lookups on rocksdb memtable");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_l0_hit_count,
+ dsn::metric_unit::kPointLookups,
+ "The number of lookups served by rocksdb L0");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_l1_hit_count,
+ dsn::metric_unit::kPointLookups,
+ "The number of lookups served by rocksdb L1");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_l2_and_up_hit_count,
+ dsn::metric_unit::kPointLookups,
+ "The number of lookups served by rocksdb L2 and up");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_write_amplification,
+ dsn::metric_unit::kAmplification,
+ "The write amplification of rocksdb");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_read_amplification,
+ dsn::metric_unit::kAmplification,
+ "The read amplification of rocksdb");
+
+// Following metrics are rocksdb statistics that are related to bloom filters.
+//
+// To measure prefix bloom filters, these metrics are updated after each ::Seek and ::SeekForPrev if
+// prefix is enabled and check_filter is set:
+// * rdb_bloom_filter_seek_negatives: seek_negatives
+// * rdb_bloom_filter_seek_total: seek_negatives + seek_positives
+//
+// To measure full bloom filters, these metrics are updated after each point lookup. If
+// whole_key_filtering is set, this is the result of checking the bloom of the whole key, otherwise
+// this is the result of checking the bloom of the prefix:
+// * rdb_bloom_filter_point_lookup_negatives: [true] negatives
+// * rdb_bloom_filter_point_lookup_positives: positives
+// * rdb_bloom_filter_point_lookup_true_positives: true positives
+//
+// For details please see https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#statistic.
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_bloom_filter_seek_negatives,
+ dsn::metric_unit::kSeeks,
+ "The number of times the check for prefix bloom filter was useful in "
+ "avoiding iterator creation (and thus likely IOPs), used by rocksdb for "
+ "each replica");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_bloom_filter_seek_total,
+ dsn::metric_unit::kSeeks,
+ "The number of times prefix bloom filter was checked before creating "
+ "iterator on a file, used by rocksdb");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_bloom_filter_point_lookup_negatives,
+ dsn::metric_unit::kPointLookups,
+ "The number of times full bloom filter has avoided file reads (i.e., "
+ "negatives), used by rocksdb");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_bloom_filter_point_lookup_positives,
+ dsn::metric_unit::kPointLookups,
+ "The number of times full bloom filter has not avoided the reads, used "
+ "by rocksdb");
+
+METRIC_DEFINE_gauge_int64(replica,
+ rdb_bloom_filter_point_lookup_true_positives,
+ dsn::metric_unit::kPointLookups,
+ "The number of times full bloom filter has not avoided the reads and "
+ "data actually exist, used by rocksdb");
namespace pegasus {
namespace server {
@@ -469,7 +591,27 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
METRIC_VAR_INIT_replica(scan_latency_ns),
METRIC_VAR_INIT_replica(read_expired_values),
METRIC_VAR_INIT_replica(read_filtered_values),
- METRIC_VAR_INIT_replica(abnormal_read_requests)
+ METRIC_VAR_INIT_replica(abnormal_read_requests),
+ METRIC_VAR_INIT_replica(throttling_rejected_read_requests),
+ METRIC_VAR_INIT_replica(rdb_total_sst_files),
+ METRIC_VAR_INIT_replica(rdb_total_sst_size_mb),
+ METRIC_VAR_INIT_replica(rdb_estimated_keys),
+ METRIC_VAR_INIT_replica(rdb_index_and_filter_blocks_mem_usage_bytes),
+ METRIC_VAR_INIT_replica(rdb_memtable_mem_usage_bytes),
+ METRIC_VAR_INIT_replica(rdb_block_cache_hit_count),
+ METRIC_VAR_INIT_replica(rdb_block_cache_total_count),
+ METRIC_VAR_INIT_replica(rdb_memtable_hit_count),
+ METRIC_VAR_INIT_replica(rdb_memtable_total_count),
+ METRIC_VAR_INIT_replica(rdb_l0_hit_count),
+ METRIC_VAR_INIT_replica(rdb_l1_hit_count),
+ METRIC_VAR_INIT_replica(rdb_l2_and_up_hit_count),
+ METRIC_VAR_INIT_replica(rdb_write_amplification),
+ METRIC_VAR_INIT_replica(rdb_read_amplification),
+ METRIC_VAR_INIT_replica(rdb_bloom_filter_seek_negatives),
+ METRIC_VAR_INIT_replica(rdb_bloom_filter_seek_total),
+ METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_negatives),
+ METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_positives),
+ METRIC_VAR_INIT_replica(rdb_bloom_filter_point_lookup_true_positives)
{
_primary_address = dsn::rpc_address(dsn_primary_address()).to_string();
_gpid = get_gpid();
@@ -675,54 +817,6 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
std::string str_gpid = _gpid.to_string();
char name[256];
- // register the perf counters
- snprintf(name, 255, "disk.storage.sst.count@%s", str_gpid.c_str());
- _pfc_rdb_sst_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistic the count of sstable files");
-
- snprintf(name, 255, "disk.storage.sst(MB)@%s", str_gpid.c_str());
- _pfc_rdb_sst_size.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistic the size of sstable files");
-
- snprintf(name, 255, "rdb.block_cache.hit_count@%s", str_gpid.c_str());
- _pfc_rdb_block_cache_hit_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistic the hit count of rocksdb block cache");
-
- snprintf(name, 255, "rdb.block_cache.total_count@%s", str_gpid.c_str());
- _pfc_rdb_block_cache_total_count.init_app_counter(
- "app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistic the total count of rocksdb block cache");
-
- snprintf(name, 255, "rdb.write_amplification@%s", str_gpid.c_str());
- _pfc_rdb_write_amplification.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the write amplification of rocksdb");
-
- snprintf(name, 255, "rdb.read_amplification@%s", str_gpid.c_str());
- _pfc_rdb_read_amplification.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read amplification of rocksdb");
-
- snprintf(name, 255, "rdb.read_memtable_hit_count@%s", str_gpid.c_str());
- _pfc_rdb_memtable_hit_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read memtable hit count");
-
- snprintf(name, 255, "rdb.read_memtable_total_count@%s", str_gpid.c_str());
- _pfc_rdb_memtable_total_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read memtable total count");
-
- snprintf(name, 255, "rdb.read_l0_hit_count@%s", str_gpid.c_str());
- _pfc_rdb_l0_hit_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l0 hit count");
-
- snprintf(name, 255, "rdb.read_l1_hit_count@%s", str_gpid.c_str());
- _pfc_rdb_l1_hit_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l1 hit count");
-
- snprintf(name, 255, "rdb.read_l2andup_hit_count@%s", str_gpid.c_str());
- _pfc_rdb_l2andup_hit_count.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistics the read l2andup hit count");
-
// These counters are singletons on this server shared by all replicas, so we initialize
// them only once.
static std::once_flag flag;
@@ -741,66 +835,7 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r)
COUNTER_TYPE_NUMBER,
"statistic the through bytes of rocksdb write rate limiter");
});
-
- snprintf(name, 255, "rdb.index_and_filter_blocks.memory_usage@%s", str_gpid.c_str());
- _pfc_rdb_index_and_filter_blocks_mem_usage.init_app_counter(
- "app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistic the memory usage of rocksdb index and filter blocks");
-
- snprintf(name, 255, "rdb.memtable.memory_usage@%s", str_gpid.c_str());
- _pfc_rdb_memtable_mem_usage.init_app_counter(
- "app.pegasus", name, COUNTER_TYPE_NUMBER, "statistic the memory usage of rocksdb memtable");
-
- snprintf(name, 255, "rdb.estimate_num_keys@%s", str_gpid.c_str());
- _pfc_rdb_estimate_num_keys.init_app_counter(
- "app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistics the estimated number of keys inside the rocksdb");
-
- snprintf(name, 255, "rdb.bf_seek_negatives@%s", str_gpid.c_str());
- _pfc_rdb_bf_seek_negatives.init_app_counter("app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistics the number of times bloom filter was "
- "checked before creating iterator on a file and "
- "useful in avoiding iterator creation (and thus "
- "likely IOPs)");
-
- snprintf(name, 255, "rdb.bf_seek_total@%s", str_gpid.c_str());
- _pfc_rdb_bf_seek_total.init_app_counter("app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistics the number of times bloom filter was "
- "checked before creating iterator on a file");
-
- snprintf(name, 255, "rdb.bf_point_positive_true@%s", str_gpid.c_str());
- _pfc_rdb_bf_point_positive_true.init_app_counter(
- "app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistics the number of times bloom filter has avoided file reads, i.e., negatives");
-
- snprintf(name, 255, "rdb.bf_point_positive_total@%s", str_gpid.c_str());
- _pfc_rdb_bf_point_positive_total.init_app_counter(
- "app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistics the number of times bloom FullFilter has not avoided the reads");
-
- snprintf(name, 255, "rdb.bf_point_negatives@%s", str_gpid.c_str());
- _pfc_rdb_bf_point_negatives.init_app_counter("app.pegasus",
- name,
- COUNTER_TYPE_NUMBER,
- "statistics the number of times bloom FullFilter "
- "has not avoided the reads and data actually "
- "exist");
-
- auto counter_str = fmt::format("recent.read.throttling.reject.count@{}", str_gpid.c_str());
- _counter_recent_read_throttling_reject_count.init_app_counter(
- "eon.replica", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());
}
+
} // namespace server
} // namespace pegasus
diff --git a/src/utils/metrics.h b/src/utils/metrics.h
index da1d056d8..facc83f26 100644
--- a/src/utils/metrics.h
+++ b/src/utils/metrics.h
@@ -161,8 +161,9 @@ class error_code;
METRIC_VAR_DECLARE(name, dsn::percentile_ptr<int64_t>)
// Initialize a metric variable in user class.
-#define METRIC_VAR_INIT(name, entity) _##name(METRIC_##name.instantiate(entity##_metric_entity()))
-#define METRIC_VAR_INIT_replica(name) METRIC_VAR_INIT(name, replica)
+#define METRIC_VAR_INIT(name, entity, ...) \
+ _##name(METRIC_##name.instantiate(entity##_metric_entity(), ##__VA_ARGS__))
+#define METRIC_VAR_INIT_replica(name, ...) METRIC_VAR_INIT(name, replica, ##__VA_ARGS__)
// Perform increment-related operations on metrics including gauge and counter.
#define METRIC_VAR_INCREMENT_BY(name, x) \
@@ -608,8 +609,15 @@ enum class metric_unit : size_t
kMicroSeconds,
kMilliSeconds,
kSeconds,
+ kBytes,
+ kMegaBytes,
kRequests,
+ kSeeks,
+ kPointLookups,
kValues,
+ kKeys,
+ kFiles,
+ kAmplification,
kInvalidUnit,
};
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pegasus.apache.org
For additional commands, e-mail: commits-help@pegasus.apache.org