You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2022/12/23 22:49:52 UTC

[pinot] branch master updated: emit minion task generation time and error metrics (#10026)

This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 880a5c779f emit minion task generation time and error metrics (#10026)
880a5c779f is described below

commit 880a5c779fc441124ea14013a52d966885a58074
Author: Haitao Zhang <ha...@startree.ai>
AuthorDate: Fri Dec 23 14:49:44 2022 -0800

    emit minion task generation time and error metrics (#10026)
    
    * emit minion task generation time and error metrics
    
    * address comments
---
 .../jmx_prometheus_javaagent/configs/controller.yml | 14 ++++++++++++++
 .../etc/jmx_prometheus_javaagent/configs/pinot.yml  | 14 ++++++++++++++
 .../pinot/common/metrics/ControllerGauge.java       |  2 ++
 .../helix/core/minion/PinotTaskManager.java         | 21 ++++++++++++++++++---
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index edf96a9f2a..d258161b42 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -119,6 +119,20 @@ rules:
     table: "$1"
     tableType: "$2"
     taskType: "$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.timeMsSinceLastSuccessfulMinionTaskGeneration.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+  name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$4"
+  cache: true
+  labels:
+    table: "$1"
+    tableType: "$2"
+    taskType: "$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.lastMinionTaskGenerationEncountersError.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+  name: "pinot_controller_lastMinionTaskGenerationEncountersError_$4"
+  cache: true
+  labels:
+    table: "$1"
+    tableType: "$2"
+    taskType: "$3"
 - pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
   name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
   cache: true
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
index 6a63001bfc..27a78730db 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
@@ -108,6 +108,20 @@ rules:
     table: "$1"
     tableType: "$2"
     taskType: "$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.timeMsSinceLastSuccessfulMinionTaskGeneration.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+  name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$4"
+  cache: true
+  labels:
+    table: "$1"
+    tableType: "$2"
+    taskType: "$3"
+- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.lastMinionTaskGenerationEncountersError.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
+  name: "pinot_controller_lastMinionTaskGenerationEncountersError_$4"
+  cache: true
+  labels:
+    table: "$1"
+    tableType: "$2"
+    taskType: "$3"
 - pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
   name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
   cache: true
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
index 5df7959d5c..b44bd8e999 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java
@@ -53,6 +53,8 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
   DISABLED_TABLE_COUNT("TableCount", true),
   PERIODIC_TASK_NUM_TABLES_PROCESSED("PeriodicTaskNumTablesProcessed", true),
   TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE("TimeMsSinceLastMinionTaskMetadataUpdate", false),
+  TIME_MS_SINCE_LAST_SUCCESSFUL_MINION_TASK_GENERATION("TimeMsSinceLastSuccessfulMinionTaskGeneration", false),
+  LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR("LastMinionTaskGenerationEncountersError", false),
   NUM_MINION_TASKS_IN_PROGRESS("NumMinionTasksInProgress", true),
   NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true),
   NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true),
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
index e8bb3cffd5..04ba148adb 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java
@@ -541,20 +541,35 @@ public class PinotTaskManager extends ControllerPeriodicTask<Void> {
         generateTasks() return a list of TaskGeneratorMostRecentRunInfo for each table
        */
       pinotTaskConfigs = taskGenerator.generateTasks(enabledTableConfigs);
+      long successRunTimestamp = System.currentTimeMillis();
       for (TableConfig tableConfig : enabledTableConfigs) {
         _taskManagerStatusCache.saveTaskGeneratorInfo(tableConfig.getTableName(), taskGenerator.getTaskType(),
-            taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addSuccessRunTs(
-                System.currentTimeMillis()));
+            taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addSuccessRunTs(successRunTimestamp));
+        // before the first task schedule, the follow two gauge metrics will be empty
+        // TODO: find a better way to report task generation information
+        _controllerMetrics.addOrUpdateGauge(
+            ControllerGauge.TIME_MS_SINCE_LAST_SUCCESSFUL_MINION_TASK_GENERATION.getGaugeName() + "."
+                + tableConfig.getTableName() + "." + taskGenerator.getTaskType(),
+            () -> System.currentTimeMillis() - successRunTimestamp);
+        _controllerMetrics.addOrUpdateGauge(
+            ControllerGauge.LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR.getGaugeName() + "."
+                + tableConfig.getTableName() + "." + taskGenerator.getTaskType(), () -> 0L);
       }
     } catch (Exception e) {
       StringWriter errors = new StringWriter();
       try (PrintWriter pw = new PrintWriter(errors)) {
         e.printStackTrace(pw);
       }
+      long successRunTimestamp = System.currentTimeMillis();
       for (TableConfig tableConfig : enabledTableConfigs) {
         _taskManagerStatusCache.saveTaskGeneratorInfo(tableConfig.getTableName(), taskGenerator.getTaskType(),
             taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addErrorRunMessage(
-                System.currentTimeMillis(), errors.toString()));
+                successRunTimestamp, errors.toString()));
+        // before the first task schedule, the follow gauge metric will be empty
+        // TODO: find a better way to report task generation information
+        _controllerMetrics.addOrUpdateGauge(
+            ControllerGauge.LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR.getGaugeName() + "."
+                + tableConfig.getTableName() + "." + taskGenerator.getTaskType(), () -> 1L);
       }
       throw e;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org