You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2024/02/26 17:44:08 UTC
(pinot) branch master updated: Remove all table metrics when a table is deleted (#12403)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 8977c85e4f Remove all table metrics when a table is deleted (#12403)
8977c85e4f is described below
commit 8977c85e4f8e3a8529f875ec32f0042c52938faf
Author: Gonzalo Ortiz Jaureguizar <go...@users.noreply.github.com>
AuthorDate: Mon Feb 26 18:44:01 2024 +0100
Remove all table metrics when a table is deleted (#12403)
---
.../pinot/common/metrics/AbstractMetrics.java | 15 ++++++
.../controller/helix/SegmentStatusChecker.java | 54 ++++++++--------------
.../controller/helix/SegmentStatusCheckerTest.java | 24 +++++-----
.../manager/realtime/IngestionDelayTracker.java | 2 +
.../ControllerPeriodicTasksIntegrationTest.java | 3 +-
.../helix/SegmentMessageHandlerFactory.java | 20 ++++++++
6 files changed, 69 insertions(+), 49 deletions(-)
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java
index 456d6ecb14..ee13493e15 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java
@@ -133,6 +133,11 @@ public abstract class AbstractMetrics<QP extends AbstractMetrics.QueryPhase, M e
}
}
+ public void removePhaseTiming(String tableName, QP phase) {
+ String fullTimerName = _metricPrefix + getTableName(tableName) + "." + phase.getQueryPhaseName();
+ removeTimer(fullTimerName);
+ }
+
public void addPhaseTiming(String tableName, QP phase, long duration, TimeUnit timeUnit) {
String fullTimerName = _metricPrefix + getTableName(tableName) + "." + phase.getQueryPhaseName();
addValueToTimer(fullTimerName, duration, timeUnit);
@@ -208,6 +213,16 @@ public abstract class AbstractMetrics<QP extends AbstractMetrics.QueryPhase, M e
}
}
+ public void removeTimer(final String fullTimerName) {
+ PinotMetricUtils
+ .removeMetric(_metricsRegistry, PinotMetricUtils.makePinotMetricName(_clazz, fullTimerName));
+ }
+
+ public void removeTableTimer(final String tableName, final T timer) {
+ final String fullTimerName = _metricPrefix + getTableName(tableName) + "." + timer.getTimerName();
+ removeTimer(fullTimerName);
+ }
+
/**
* Logs a value to a meter.
*
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
index 5b543e4319..dd598b2b21 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
@@ -38,7 +38,9 @@ import org.apache.pinot.common.lineage.SegmentLineageAccessHelper;
import org.apache.pinot.common.lineage.SegmentLineageUtils;
import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
import org.apache.pinot.common.metrics.ControllerGauge;
+import org.apache.pinot.common.metrics.ControllerMeter;
import org.apache.pinot.common.metrics.ControllerMetrics;
+import org.apache.pinot.common.metrics.ControllerTimer;
import org.apache.pinot.controller.ControllerConf;
import org.apache.pinot.controller.LeadControllerManager;
import org.apache.pinot.controller.helix.core.PinotHelixResourceManager;
@@ -97,8 +99,6 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
@Override
protected void setUpTask() {
- LOGGER.info("Initializing table metrics for all the tables.");
- setStatusToDefault();
}
@Override
@@ -123,7 +123,7 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
} catch (Exception e) {
LOGGER.error("Caught exception while updating segment status for table {}", tableNameWithType, e);
// Remove the metric for this table
- resetTableMetrics(tableNameWithType);
+ removeMetricsForTable(tableNameWithType);
}
context._processedTables.add(tableNameWithType);
}
@@ -187,7 +187,7 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
if (idealState == null) {
LOGGER.warn("Table {} has null ideal state. Skipping segment status checks", tableNameWithType);
- resetTableMetrics(tableNameWithType);
+ removeMetricsForTable(tableNameWithType);
return;
}
@@ -195,7 +195,7 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
if (context._logDisabledTables) {
LOGGER.warn("Table {} is disabled. Skipping segment status checks", tableNameWithType);
}
- resetTableMetrics(tableNameWithType);
+ removeMetricsForTable(tableNameWithType);
context._disabledTables.add(tableNameWithType);
return;
}
@@ -354,43 +354,27 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
private void removeMetricsForTable(String tableNameWithType) {
LOGGER.info("Removing metrics from {} given it is not a table known by Helix", tableNameWithType);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.NUMBER_OF_REPLICAS);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_OF_REPLICAS);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.IDEALSTATE_ZNODE_SIZE);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.IDEALSTATE_ZNODE_BYTE_SIZE);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENT_COUNT);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENT_COUNT_INCLUDING_REPLACED);
-
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENTS_IN_ERROR_STATE);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENTS_WITH_LESS_REPLICAS);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED);
- _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_REBALANCE_IN_PROGRESS);
- }
-
- private void setStatusToDefault() {
- List<String> allTableNames = _pinotHelixResourceManager.getAllTables();
+ for (ControllerGauge metric : ControllerGauge.values()) {
+ if (!metric.isGlobal()) {
+ _controllerMetrics.removeTableGauge(tableNameWithType, metric);
+ }
+ }
- for (String tableName : allTableNames) {
- resetTableMetrics(tableName);
+ for (ControllerMeter metric : ControllerMeter.values()) {
+ if (!metric.isGlobal()) {
+ _controllerMetrics.removeTableMeter(tableNameWithType, metric);
+ }
}
- }
- private void resetTableMetrics(String tableName) {
- _controllerMetrics.setValueOfTableGauge(tableName, ControllerGauge.NUMBER_OF_REPLICAS, Long.MIN_VALUE);
- _controllerMetrics.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_OF_REPLICAS, Long.MIN_VALUE);
- _controllerMetrics.setValueOfTableGauge(tableName, ControllerGauge.SEGMENTS_IN_ERROR_STATE, Long.MIN_VALUE);
- _controllerMetrics.setValueOfTableGauge(tableName, ControllerGauge.SEGMENTS_WITH_LESS_REPLICAS,
- Long.MIN_VALUE);
- _controllerMetrics.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE, Long.MIN_VALUE);
+ for (ControllerTimer metric : ControllerTimer.values()) {
+ if (!metric.isGlobal()) {
+ _controllerMetrics.removeTableTimer(tableNameWithType, metric);
+ }
+ }
}
@Override
public void cleanUpTask() {
- LOGGER.info("Resetting table metrics for all the tables.");
- setStatusToDefault();
}
@VisibleForTesting
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java
index 99991b3d4c..c7974b9d0b 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/SegmentStatusCheckerTest.java
@@ -452,14 +452,14 @@ public class SegmentStatusCheckerTest {
_segmentStatusChecker.setTableSizeReader(_tableSizeReader);
_segmentStatusChecker.start();
_segmentStatusChecker.run();
- Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
- ControllerGauge.SEGMENTS_IN_ERROR_STATE), Long.MIN_VALUE);
- Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
- ControllerGauge.SEGMENTS_WITH_LESS_REPLICAS), Long.MIN_VALUE);
- Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
- ControllerGauge.NUMBER_OF_REPLICAS), Long.MIN_VALUE);
- Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
- ControllerGauge.PERCENT_OF_REPLICAS), Long.MIN_VALUE);
+ Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
+ ControllerGauge.SEGMENTS_IN_ERROR_STATE));
+ Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
+ ControllerGauge.SEGMENTS_WITH_LESS_REPLICAS));
+ Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
+ ControllerGauge.NUMBER_OF_REPLICAS));
+ Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
+ ControllerGauge.PERCENT_OF_REPLICAS));
Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
ControllerGauge.TABLE_COMPRESSED_SIZE));
}
@@ -820,10 +820,10 @@ public class SegmentStatusCheckerTest {
_segmentStatusChecker.start();
_segmentStatusChecker.run();
- Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
- ControllerGauge.SEGMENTS_IN_ERROR_STATE), Long.MIN_VALUE);
- Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
- ControllerGauge.SEGMENTS_IN_ERROR_STATE), Long.MIN_VALUE);
+ Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
+ ControllerGauge.SEGMENTS_IN_ERROR_STATE));
+ Assert.assertFalse(MetricValueUtils.tableGaugeExists(_controllerMetrics, tableName,
+ ControllerGauge.SEGMENTS_IN_ERROR_STATE));
Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
ControllerGauge.NUMBER_OF_REPLICAS), nReplicasExpectedValue);
Assert.assertEquals(MetricValueUtils.getTableGaugeValue(_controllerMetrics, tableName,
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
index 423f1f21cb..8114579d29 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/IngestionDelayTracker.java
@@ -185,6 +185,8 @@ public class IngestionDelayTracker {
// If we are removing a partition we should stop reading its ideal state.
_partitionsMarkedForVerification.remove(partitionGroupId);
_serverMetrics.removePartitionGauge(_metricName, partitionGroupId, ServerGauge.REALTIME_INGESTION_DELAY_MS);
+ _serverMetrics.removePartitionGauge(_metricName, partitionGroupId,
+ ServerGauge.END_TO_END_REALTIME_INGESTION_DELAY_MS);
}
/*
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ControllerPeriodicTasksIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ControllerPeriodicTasksIntegrationTest.java
index 39e4855f63..6b558b93e7 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ControllerPeriodicTasksIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ControllerPeriodicTasksIntegrationTest.java
@@ -223,8 +223,7 @@ public class ControllerPeriodicTasksIntegrationTest extends BaseClusterIntegrati
return false;
}
if (!checkSegmentStatusCheckerMetrics(controllerMetrics,
- TableNameBuilder.OFFLINE.tableNameWithType(disabledTable), null, Long.MIN_VALUE, Long.MIN_VALUE,
- Long.MIN_VALUE, Long.MIN_VALUE)) {
+ TableNameBuilder.OFFLINE.tableNameWithType(disabledTable), null, 0, 0, 0, 0)) {
return false;
}
String tableNameWithType = TableNameBuilder.OFFLINE.tableNameWithType(getTableName());
diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/SegmentMessageHandlerFactory.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/SegmentMessageHandlerFactory.java
index bf3f2f2f22..7fe52f472b 100644
--- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/SegmentMessageHandlerFactory.java
+++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/SegmentMessageHandlerFactory.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.server.starter.helix;
+import java.util.Arrays;
import java.util.List;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
@@ -32,8 +33,11 @@ import org.apache.pinot.common.messages.ForceCommitMessage;
import org.apache.pinot.common.messages.SegmentRefreshMessage;
import org.apache.pinot.common.messages.SegmentReloadMessage;
import org.apache.pinot.common.messages.TableDeletionMessage;
+import org.apache.pinot.common.metrics.ServerGauge;
import org.apache.pinot.common.metrics.ServerMeter;
import org.apache.pinot.common.metrics.ServerMetrics;
+import org.apache.pinot.common.metrics.ServerQueryPhase;
+import org.apache.pinot.common.metrics.ServerTimer;
import org.apache.pinot.core.data.manager.InstanceDataManager;
import org.apache.pinot.core.util.SegmentRefreshSemaphore;
import org.slf4j.Logger;
@@ -177,6 +181,22 @@ public class SegmentMessageHandlerFactory implements MessageHandlerFactory {
_metrics.addMeteredTableValue(_tableNameWithType, ServerMeter.DELETE_TABLE_FAILURES, 1);
Utils.rethrowException(e);
}
+ try {
+ Arrays.stream(ServerMeter.values())
+ .filter(m -> !m.isGlobal())
+ .forEach(m -> _metrics.removeTableMeter(_tableNameWithType, m));
+ Arrays.stream(ServerGauge.values())
+ .filter(g -> !g.isGlobal())
+ .forEach(g -> _metrics.removeTableGauge(_tableNameWithType, g));
+ Arrays.stream(ServerTimer.values())
+ .filter(t -> !t.isGlobal())
+ .forEach(t -> _metrics.removeTableTimer(_tableNameWithType, t));
+ Arrays.stream(ServerQueryPhase.values())
+ .forEach(p -> _metrics.removePhaseTiming(_tableNameWithType, p));
+ } catch (Exception e) {
+ LOGGER.warn("Error while removing metrics of removed table {}. "
+ + "Some metrics may survive until the next restart.", _tableNameWithType);
+ }
return helixTaskResult;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org