You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pegasus.apache.org by GitBox <gi...@apache.org> on 2023/01/09 03:42:19 UTC

[GitHub] [incubator-pegasus] acelyc111 commented on a diff in pull request #1304: feat(new_metrics): retire the old metrics and entities that are not in use

acelyc111 commented on code in PR #1304:
URL: https://github.com/apache/incubator-pegasus/pull/1304#discussion_r1064266714


##########
src/utils/metrics.cpp:
##########
@@ -177,6 +203,109 @@ void metric_entity::take_snapshot(metric_json_writer &writer, const metric_filte
     writer.EndObject();
 }
 
+bool metric_entity::is_stale() const
+{
+    // Since this entity itself is still being accessed, its reference count should be 1
+    // at least.
+    CHECK_GE(get_count(), 1);
+
+    // Once this entity did not have any metric, and had only one reference kept in the
+    // registry, this entity would be considered useless.
+    return _metrics.empty() && get_count() == 1;
+}
+
+metric_entity::collected_old_metrics_info metric_entity::collect_old_metrics() const

Review Comment:
   what dose 'old' mean?



##########
src/utils/metrics.h:
##########
@@ -153,6 +153,36 @@ class metric_entity : public ref_counter
 public:
     using attr_map = std::unordered_map<std::string, std::string>;
     using metric_map = std::unordered_map<const metric_prototype *, metric_ptr>;
+    using old_metric_list = std::unordered_set<const metric_prototype *>;
+
+    struct collected_old_metrics_info
+    {
+        old_metric_list old_metrics;

Review Comment:
   Could you add some comments about `old_metrics`?



##########
src/utils/metrics.h:
##########
@@ -354,10 +392,96 @@ class metrics_http_service : public http_server_base
     DISALLOW_COPY_AND_ASSIGN(metrics_http_service);
 };
 
+// `metric_timer` is a timer class that encapsulates the details how each percentile is
+// computed periodically.
+//
+// To be instantiated, it requires `interval_ms` at which a percentile is computed and `exec`
+// which is used to compute percentile.
+//
+// In case that all percentiles are computed at the same time and lead to very high load,
+// first computation for percentile will be delayed at a random interval.
+class metric_timer
+{
+public:
+    enum class state : int
+    {
+        kRunning,
+        kClosing,
+        kClosed,
+    };
+
+    using on_exec_fn = std::function<void()>;
+    using on_close_fn = std::function<void()>;
+
+    metric_timer(uint64_t interval_ms, on_exec_fn on_exec, on_close_fn on_close);
+    ~metric_timer() = default;
+
+    void close();
+    void wait();
+
+    // Get the initial delay that is randomly generated by `generate_initial_delay_ms()`.
+    uint64_t get_initial_delay_ms() const { return _initial_delay_ms; }
+
+private:
+    // Generate an initial delay randomly in case that all percentiles are computed at the
+    // same time.
+    static uint64_t generate_initial_delay_ms(uint64_t interval_ms);
+
+    void on_close();
+
+    void on_timer(const boost::system::error_code &ec);
+
+    const uint64_t _initial_delay_ms;
+    const uint64_t _interval_ms;
+    const on_exec_fn _on_exec;
+    const on_close_fn _on_close;
+    std::atomic<state> _state;
+    utils::notify_event _completed;
+    std::unique_ptr<boost::asio::deadline_timer> _timer;
+
+    DISALLOW_COPY_AND_ASSIGN(metric_timer);
+};
+
 class metric_registry : public utils::singleton<metric_registry>
 {
 public:
     using entity_map = std::unordered_map<std::string, metric_entity_ptr>;
+    using old_entity_map = std::unordered_map<std::string, metric_entity::old_metric_list>;
+
+    struct collected_old_entities_info
+    {
+        old_entity_map old_entities;

Review Comment:
   Could you add some comments about it?



##########
src/utils/metrics.cpp:
##########
@@ -383,15 +547,134 @@ metric_entity_ptr metric_registry::find_or_create_entity(const metric_entity_pro
     return entity;
 }
 
+metric_registry::collected_old_entities_info metric_registry::collect_old_metrics() const
+{
+    collected_old_entities_info entities_info;
+
+    utils::auto_read_lock l(_lock);
+
+    for (const auto &entity : _entities) {
+        const auto &metrics_info = entity.second->collect_old_metrics();
+
+        entities_info.num_all_metrics += metrics_info.num_all_metrics;
+        entities_info.num_scheduled_metrics += metrics_info.num_scheduled_metrics;
+        if (!metrics_info.old_metrics.empty()) {
+            // Those entities which have metrics that should be retired will be collected.
+            entities_info.old_entities.emplace(entity.first, std::move(metrics_info.old_metrics));
+            continue;
+        }
+
+        if (!entity.second->is_stale()) {
+            continue;
+        }
+
+        // Since this entity itself should be retired, it will be collected without any
+        // metric. Actually it has already not had any metric itself.
+        (void)entities_info.old_entities[entity.first];
+    }
+
+    entities_info.num_all_entities = _entities.size();
+    return entities_info;
+}
+
+metric_registry::retired_entities_stat
+metric_registry::retire_old_metrics(const old_entity_map &old_entities)
+{
+    if (old_entities.empty()) {

Review Comment:
   If some other thread is writing `old_entities`, will it cause data race?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org