You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@storm.apache.org by ag...@apache.org on 2020/01/08 20:19:12 UTC
[storm] branch master updated: STORM-3555 add meter to track errors
killing workers
This is an automated email from the ASF dual-hosted git repository.
agresch pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/storm.git
The following commit(s) were added to refs/heads/master by this push:
new cd5f7f8 STORM-3555 add meter to track errors killing workers
new 17a5cbd Merge pull request #3184 from agresch/agresch_storm_3555
cd5f7f8 is described below
commit cd5f7f80fc146d8935d03421559d49a0aa8fb69a
Author: Aaron Gresch <ag...@yahoo-inc.com>
AuthorDate: Tue Dec 17 14:18:08 2019 -0600
STORM-3555 add meter to track errors killing workers
---
.../main/java/org/apache/storm/daemon/supervisor/Supervisor.java | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/storm-server/src/main/java/org/apache/storm/daemon/supervisor/Supervisor.java b/storm-server/src/main/java/org/apache/storm/daemon/supervisor/Supervisor.java
index 259a13e..5a4d25f 100644
--- a/storm-server/src/main/java/org/apache/storm/daemon/supervisor/Supervisor.java
+++ b/storm-server/src/main/java/org/apache/storm/daemon/supervisor/Supervisor.java
@@ -18,6 +18,7 @@
package org.apache.storm.daemon.supervisor;
+import com.codahale.metrics.Meter;
import java.io.File;
import java.io.IOException;
import java.net.BindException;
@@ -31,6 +32,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
+
import org.apache.commons.io.FileUtils;
import org.apache.storm.Config;
import org.apache.storm.DaemonConfig;
@@ -105,6 +107,7 @@ public class Supervisor implements DaemonCommon, AutoCloseable {
private final ExecutorService heartbeatExecutor;
private final AsyncLocalizer asyncLocalizer;
private final StormMetricsRegistry metricsRegistry;
+ private Meter killErrorMeter;
private final ContainerMemoryTracker containerMemoryTracker;
private final SlotMetrics slotMetrics;
private volatile boolean active;
@@ -339,6 +342,7 @@ public class Supervisor implements DaemonCommon, AutoCloseable {
//This will only get updated once
metricsRegistry.registerMeter("supervisor:num-launched").mark();
metricsRegistry.registerMeter("supervisor:num-shell-exceptions", ShellUtils.numShellExceptions);
+ killErrorMeter = metricsRegistry.registerMeter("supervisor:num-kill-worker-errors");
metricsRegistry.startMetricsReporters(conf);
Utils.addShutdownHookWithForceKillIn1Sec(() -> {
metricsRegistry.stopMetricsReporters();
@@ -528,6 +532,9 @@ public class Supervisor implements DaemonCommon, AutoCloseable {
long start = Time.currentTimeMillis();
while (!k.areAllProcessesDead()) {
if ((Time.currentTimeMillis() - start) > 10_000) {
+ if (killErrorMeter != null) {
+ killErrorMeter.mark();
+ }
throw new RuntimeException("Giving up on killing " + k
+ " after " + (Time.currentTimeMillis() - start) + " ms");
}