You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/12/07 15:51:57 UTC
(pinot) branch master updated: Metrics for Table Disabled and Consumption Paused (#12000)
This is an automated email from the ASF dual-hosted git repository.
rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new e62db612c9 Metrics for Table Disabled and Consumption Paused (#12000)
e62db612c9 is described below
commit e62db612c91b4b540bcdfeecb412442eac35ced2
Author: Prashant Pandey <84...@users.noreply.github.com>
AuthorDate: Thu Dec 7 21:21:50 2023 +0530
Metrics for Table Disabled and Consumption Paused (#12000)
---
.../configs/controller.yml | 12 ++++++++
.../pinot/common/metrics/ControllerGauge.java | 6 +++-
.../controller/helix/SegmentStatusChecker.java | 35 ++++++++++++++++++++--
.../realtime/PinotLLCRealtimeSegmentManager.java | 6 ++--
4 files changed, 52 insertions(+), 7 deletions(-)
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index c4071887ed..e86243dfc1 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -204,6 +204,18 @@ rules:
cache: true
labels:
version: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.tableConsumptionPaused.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)"
+ name: "pinot_controller_tableConsumptionPaused_$3"
+ cache: true
+ labels:
+ tableName: "$1"
+ tableType: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.tableDisabled.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)"
+ name: "pinot_controller_tableDisabled_$3"
+ cache: true
+ labels:
+ tableName: "$1"
+ tableType: "$2"
## Metrics that fit the catch-all patterns above should not be added to this file.
## In case a metric does not fit the catch-all patterns, add them before this comment
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index 4006ca45b0..3444ffae5f 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -149,7 +149,11 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
// Number of tables that we want to fix but failed to update table config
FAILED_TO_UPDATE_TABLE_CONFIG_COUNT("failedToUpdateTableConfigCount", true),
- LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize", false);
+ LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize", false),
+
+ TABLE_CONSUMPTION_PAUSED("tableConsumptionPaused", false),
+
+ TABLE_DISABLED("tableDisabled", false);
private final String _gaugeName;
private final String _unit;
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
index f64e6c3e75..617564757e 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
@@ -44,6 +44,7 @@ import org.apache.pinot.controller.LeadControllerManager;
import org.apache.pinot.controller.helix.core.PinotHelixResourceManager;
import org.apache.pinot.controller.helix.core.periodictask.ControllerPeriodicTask;
import org.apache.pinot.controller.helix.core.realtime.MissingConsumingSegmentFinder;
+import org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
import org.apache.pinot.controller.util.TableSizeReader;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
@@ -133,7 +134,24 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
protected void postprocess(Context context) {
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.REALTIME_TABLE_COUNT, context._realTimeTableCount);
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.OFFLINE_TABLE_COUNT, context._offlineTableCount);
- _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTableCount);
+ _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTables.size());
+
+ //emit a 0 for tables that are not paused/disabled. This makes alert expressions simpler as we don't have to deal
+ // with missing metrics
+ context._processedTables.forEach(tableNameWithType -> {
+ if (context._pausedTables.contains(tableNameWithType)) {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED, 1);
+ } else {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED, 0);
+ }
+ });
+ context._processedTables.forEach(tableNameWithType -> {
+ if (context._disabledTables.contains(tableNameWithType)) {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED, 1);
+ } else {
+ _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED, 0);
+ }
+ });
// Remove metrics for tables that are no longer in the cluster
_cachedTableNamesWithType.removeAll(context._processedTables);
@@ -186,10 +204,18 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
LOGGER.warn("Table {} is disabled. Skipping segment status checks", tableNameWithType);
}
resetTableMetrics(tableNameWithType);
- context._disabledTableCount++;
+ context._disabledTables.add(tableNameWithType);
return;
}
+ //check if table consumption is paused
+ boolean isTablePaused =
+ Boolean.parseBoolean(idealState.getRecord().getSimpleField(PinotLLCRealtimeSegmentManager.IS_TABLE_PAUSED));
+
+ if (isTablePaused) {
+ context._pausedTables.add(tableNameWithType);
+ }
+
if (idealState.getPartitionSet().isEmpty()) {
int nReplicasFromIdealState = 1;
try {
@@ -335,6 +361,8 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
_controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENTS_IN_ERROR_STATE);
_controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE);
+ _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED);
+ _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED);
}
private void setStatusToDefault() {
@@ -367,7 +395,8 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
private boolean _logDisabledTables;
private int _realTimeTableCount;
private int _offlineTableCount;
- private int _disabledTableCount;
private Set<String> _processedTables = new HashSet<>();
+ private Set<String> _disabledTables = new HashSet<>();
+ private Set<String> _pausedTables = new HashSet<>();
}
}
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 74eb758b26..298b16d605 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -129,14 +129,14 @@ import org.slf4j.LoggerFactory;
* TODO: migrate code in this class to other places for better readability
*/
public class PinotLLCRealtimeSegmentManager {
+
+ // simple field in Ideal State representing pause status for the table
+ public static final String IS_TABLE_PAUSED = "isTablePaused";
private static final Logger LOGGER = LoggerFactory.getLogger(PinotLLCRealtimeSegmentManager.class);
private static final int STARTING_SEQUENCE_NUMBER = 0; // Initial sequence number for new table segments
private static final String METADATA_EVENT_NOTIFIER_PREFIX = "metadata.event.notifier";
- // simple field in Ideal State representing pause status for the table
- private static final String IS_TABLE_PAUSED = "isTablePaused";
-
// Max time to wait for all LLC segments to complete committing their metadata while stopping the controller.
private static final long MAX_LLC_SEGMENT_METADATA_COMMIT_TIME_MILLIS = 30_000L;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org