You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pegasus.apache.org by "empiredan (via GitHub)" <gi...@apache.org> on 2023/02/07 03:36:18 UTC

[GitHub] [incubator-pegasus] empiredan commented on a diff in pull request #1304: feat(new_metrics): retire stale metric entities that are not used by any other object

empiredan commented on code in PR #1304:
URL: https://github.com/apache/incubator-pegasus/pull/1304#discussion_r1098140697


##########
src/utils/metrics.cpp:
##########
@@ -383,6 +434,111 @@ metric_entity_ptr metric_registry::find_or_create_entity(const metric_entity_pro
     return entity;
 }
 
+metric_registry::collected_stale_entities_info metric_registry::collect_stale_entities() const
+{
+    collected_stale_entities_info collected_info;
+
+    auto now = dsn_now_ms();
+
+    utils::auto_read_lock l(_lock);
+
+    for (const auto &entity : _entities) {
+        if (!entity.second->is_stale()) {
+            if (entity.second->_retire_time_ms > 0) {
+                // This entity had been scheduled to be retired. However, it was reemployed
+                // after that. It has been in use since then, therefore its scheduled time
+                // for retirement should be reset to 0.
+                collected_info.stale_entities.insert(entity.first);
+            }
+            continue;
+        }
+
+        if (entity.second->_retire_time_ms > now) {
+            // This entity has been scheduled to be retired, however it is still within
+            // the retention interval. Thus do not collect it.
+            ++collected_info.num_scheduled_entities;
+            continue;
+        }
+
+        collected_info.stale_entities.insert(entity.first);
+    }
+
+    collected_info.num_all_entities = _entities.size();
+    return collected_info;
+}
+
+metric_registry::retired_entities_stat
+metric_registry::retire_stale_entities(const stale_entity_list &stale_entities)
+{
+    if (stale_entities.empty()) {
+        // Do not lock for empty list.
+        return retired_entities_stat();
+    }
+
+    retired_entities_stat retired_stat;
+
+    auto now = dsn_now_ms();
+
+    utils::auto_write_lock l(_lock);
+
+    for (const auto &stale_entity : stale_entities) {
+        auto iter = _entities.find(stale_entity);
+        if (dsn_unlikely(iter == _entities.end())) {
+            // The entity has been removed from the registry for some unusual reason.
+            continue;
+        }
+
+        if (!iter->second->is_stale()) {
+            if (iter->second->_retire_time_ms > 0) {
+                // For those entities which are reemployed, their scheduled time for retirement
+                // should be reset to 0 though previously they could have been scheduled to be
+                // retired.
+                iter->second->_retire_time_ms = 0;
+                ++retired_stat.num_reemployed_entities;
+            }
+            continue;
+        }
+
+        if (dsn_unlikely(iter->second->_retire_time_ms > now)) {
+            // Since in collect_stale_entities() we've filtered the metrics which have been
+            // outside the retention interval, this is unlikely to happen. However, we still
+            // check here.
+            continue;
+        }
+
+        if (iter->second->_retire_time_ms == 0) {
+            // The entity should be marked with a scheduled time for retirement, since it has
+            // already been considered stale.
+            iter->second->_retire_time_ms = now + FLAGS_entity_retirement_delay_ms;
+            ++retired_stat.num_recently_scheduled_entities;
+            continue;
+        }
+
+        // Once the entity is outside the retention interval, retire it from the registry.
+        _entities.erase(iter);
+        ++retired_stat.num_retired_entities;
+    }
+
+    return retired_stat;
+}
+
+void metric_registry::process_stale_entities()
+{
+    LOG_INFO("begin to process stale metric entities");
+
+    const auto &collected_info = collect_stale_entities();
+    const auto &retired_stat = retire_stale_entities(collected_info.stale_entities);
+
+    LOG_INFO("stat for metric entities: total={}, collected={}, retired={}, scheduled={}, "
+             "recently_scheduled={}, reemployed={}",
+             collected_info.num_all_entities,
+             collected_info.stale_entities.size(),

Review Comment:
   OK, in comparison to `stale_entities`, `collected_entities` is more accurate.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pegasus.apache.org
For additional commands, e-mail: dev-help@pegasus.apache.org