You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/12/07 15:51:57 UTC

(pinot) branch master updated: Metrics for Table Disabled and Consumption Paused (#12000)

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new e62db612c9 Metrics for Table Disabled and Consumption Paused (#12000)
e62db612c9 is described below

commit e62db612c91b4b540bcdfeecb412442eac35ced2
Author: Prashant Pandey <84...@users.noreply.github.com>
AuthorDate: Thu Dec 7 21:21:50 2023 +0530

    Metrics for Table Disabled and Consumption Paused (#12000)
---
 .../configs/controller.yml                         | 12 ++++++++
 .../pinot/common/metrics/ControllerGauge.java      |  6 +++-
 .../controller/helix/SegmentStatusChecker.java     | 35 ++++++++++++++++++++--
 .../realtime/PinotLLCRealtimeSegmentManager.java   |  6 ++--
 4 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index c4071887ed..e86243dfc1 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -204,6 +204,18 @@ rules:
   cache: true
   labels:
     version: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.tableConsumptionPaused.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)"
+  name: "pinot_controller_tableConsumptionPaused_$3"
+  cache: true
+  labels:
+    tableName: "$1"
+    tableType: "$2"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.tableDisabled.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)"
+  name: "pinot_controller_tableDisabled_$3"
+  cache: true
+  labels:
+    tableName: "$1"
+    tableType: "$2"
 
   ## Metrics that fit the catch-all patterns above should not be added to this file.
   ## In case a metric does not fit the catch-all patterns, add them before this comment
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index 4006ca45b0..3444ffae5f 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -149,7 +149,11 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
   // Number of tables that we want to fix but failed to update table config
   FAILED_TO_UPDATE_TABLE_CONFIG_COUNT("failedToUpdateTableConfigCount", true),
 
-  LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize", false);
+  LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize", false),
+
+  TABLE_CONSUMPTION_PAUSED("tableConsumptionPaused", false),
+
+  TABLE_DISABLED("tableDisabled", false);
 
   private final String _gaugeName;
   private final String _unit;
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
index f64e6c3e75..617564757e 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java
@@ -44,6 +44,7 @@ import org.apache.pinot.controller.LeadControllerManager;
 import org.apache.pinot.controller.helix.core.PinotHelixResourceManager;
 import org.apache.pinot.controller.helix.core.periodictask.ControllerPeriodicTask;
 import org.apache.pinot.controller.helix.core.realtime.MissingConsumingSegmentFinder;
+import org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
 import org.apache.pinot.controller.util.TableSizeReader;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
@@ -133,7 +134,24 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
   protected void postprocess(Context context) {
     _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.REALTIME_TABLE_COUNT, context._realTimeTableCount);
     _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.OFFLINE_TABLE_COUNT, context._offlineTableCount);
-    _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTableCount);
+    _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTables.size());
+
+    //emit a 0 for tables that are not paused/disabled. This makes alert expressions simpler as we don't have to deal
+    // with missing metrics
+    context._processedTables.forEach(tableNameWithType -> {
+      if (context._pausedTables.contains(tableNameWithType)) {
+        _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED, 1);
+      } else {
+        _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED, 0);
+      }
+    });
+    context._processedTables.forEach(tableNameWithType -> {
+      if (context._disabledTables.contains(tableNameWithType)) {
+        _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED, 1);
+      } else {
+        _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED, 0);
+      }
+    });
 
     // Remove metrics for tables that are no longer in the cluster
     _cachedTableNamesWithType.removeAll(context._processedTables);
@@ -186,10 +204,18 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
         LOGGER.warn("Table {} is disabled. Skipping segment status checks", tableNameWithType);
       }
       resetTableMetrics(tableNameWithType);
-      context._disabledTableCount++;
+      context._disabledTables.add(tableNameWithType);
       return;
     }
 
+    //check if table consumption is paused
+    boolean isTablePaused =
+        Boolean.parseBoolean(idealState.getRecord().getSimpleField(PinotLLCRealtimeSegmentManager.IS_TABLE_PAUSED));
+
+    if (isTablePaused) {
+      context._pausedTables.add(tableNameWithType);
+    }
+
     if (idealState.getPartitionSet().isEmpty()) {
       int nReplicasFromIdealState = 1;
       try {
@@ -335,6 +361,8 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
 
     _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENTS_IN_ERROR_STATE);
     _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE);
+    _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED);
+    _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED);
   }
 
   private void setStatusToDefault() {
@@ -367,7 +395,8 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh
     private boolean _logDisabledTables;
     private int _realTimeTableCount;
     private int _offlineTableCount;
-    private int _disabledTableCount;
     private Set<String> _processedTables = new HashSet<>();
+    private Set<String> _disabledTables = new HashSet<>();
+    private Set<String> _pausedTables = new HashSet<>();
   }
 }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 74eb758b26..298b16d605 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -129,14 +129,14 @@ import org.slf4j.LoggerFactory;
  * TODO: migrate code in this class to other places for better readability
  */
 public class PinotLLCRealtimeSegmentManager {
+
+  // simple field in Ideal State representing pause status for the table
+  public static final String IS_TABLE_PAUSED = "isTablePaused";
   private static final Logger LOGGER = LoggerFactory.getLogger(PinotLLCRealtimeSegmentManager.class);
 
   private static final int STARTING_SEQUENCE_NUMBER = 0; // Initial sequence number for new table segments
   private static final String METADATA_EVENT_NOTIFIER_PREFIX = "metadata.event.notifier";
 
-  // simple field in Ideal State representing pause status for the table
-  private static final String IS_TABLE_PAUSED = "isTablePaused";
-
   // Max time to wait for all LLC segments to complete committing their metadata while stopping the controller.
   private static final long MAX_LLC_SEGMENT_METADATA_COMMIT_TIME_MILLIS = 30_000L;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org