You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ratis.apache.org by GitBox <gi...@apache.org> on 2020/11/17 06:05:40 UTC

[GitHub] [incubator-ratis] bshashikant commented on a change in pull request #275: RATIS-1149. Leader should step down and initiate a stateMachine action in case of jvm pauses.

bshashikant commented on a change in pull request #275:
URL: https://github.com/apache/incubator-ratis/pull/275#discussion_r524903696



##########
File path: ratis-server/src/main/java/org/apache/ratis/server/JvmPauseMonitor.java
##########
@@ -0,0 +1,181 @@
+package org.apache.ratis.server;
+
+import org.apache.ratis.conf.RaftProperties;
+import org.apache.ratis.server.impl.RaftServerImpl;
+import org.apache.ratis.server.impl.RaftServerProxy;
+import org.apache.ratis.thirdparty.com.google.common.base.Joiner;
+import org.apache.ratis.thirdparty.com.google.common.base.Stopwatch;
+import org.apache.ratis.thirdparty.com.google.common.collect.Lists;
+import org.apache.ratis.thirdparty.com.google.common.collect.Maps;
+import org.apache.ratis.thirdparty.com.google.common.collect.Sets;
+import org.apache.ratis.util.Daemon;
+import org.apache.ratis.util.JavaUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.ManagementFactory;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+
+public class JvmPauseMonitor {
+  public static final Logger LOG =
+    LoggerFactory.getLogger(JvmPauseMonitor.class);
+  /**
+   * The target sleep time
+   */
+  private static final long SLEEP_INTERVAL_MS = 500;
+  private long totalGcExtraSleepTime = 0;
+
+  private Thread monitorThread;
+  private volatile boolean shouldRun = true;
+  private final RaftServerProxy proxy;
+
+  public boolean isStarted() {
+    return monitorThread != null;
+  }
+
+  public long getTotalGcExtraSleepTime() {
+    return totalGcExtraSleepTime;
+  }
+
+  private String formatMessage(long extraSleepTime,
+                               Map<String, GcTimes> gcTimesAfterSleep,
+                               Map<String, GcTimes> gcTimesBeforeSleep) {
+
+    Set<String> gcBeanNames = Sets.intersection(gcTimesAfterSleep.keySet(),
+      gcTimesBeforeSleep.keySet());
+    List<String> gcDiffs = Lists.newArrayList();
+    for (String name : gcBeanNames) {
+      GcTimes diff = gcTimesAfterSleep.get(name)
+                       .subtract(gcTimesBeforeSleep.get(name));
+      if (diff.gcCount != 0) {
+        gcDiffs.add("GC pool '" + name + "' had collection(s): " + diff
+                                                                     .toString());
+      }
+    }
+
+    String ret = "Detected pause in JVM or host machine (eg GC): "
+                   + "pause of approximately " + extraSleepTime + "ms\n";
+    if (gcDiffs.isEmpty()) {
+      ret += "No GCs detected";
+    } else {
+      ret += Joiner.on("\n").join(gcDiffs);
+    }
+    return ret;
+  }
+
+  public JvmPauseMonitor(RaftServerProxy proxy) {
+    this.proxy = proxy;
+  }
+
+  private Map<String, GcTimes> getGcTimes() {
+    Map<String, GcTimes> map = Maps.newHashMap();
+    List<GarbageCollectorMXBean> gcBeans =
+      ManagementFactory.getGarbageCollectorMXBeans();
+    for (GarbageCollectorMXBean gcBean : gcBeans) {
+      map.put(gcBean.getName(), new GcTimes(gcBean));
+    }
+    return map;
+  }
+
+  private static class GcTimes {
+    private GcTimes(GarbageCollectorMXBean gcBean) {
+      gcCount = gcBean.getCollectionCount();
+      gcTimeMillis = gcBean.getCollectionTime();
+    }
+
+    private GcTimes(long count, long time) {
+      this.gcCount = count;
+      this.gcTimeMillis = time;
+    }
+
+    private GcTimes subtract(GcTimes other) {
+      return new GcTimes(this.gcCount - other.gcCount,
+        this.gcTimeMillis - other.gcTimeMillis);
+    }
+
+    @Override
+    public String toString() {
+      return "count=" + gcCount + " time=" + gcTimeMillis + "ms";
+    }
+
+    private long gcCount;
+    private long gcTimeMillis;
+  }
+
+  class Monitor implements Runnable {
+    private final String name = JvmPauseMonitor.this + "-" + JavaUtils
+                                                               .getClassSimpleName(getClass());
+
+    public void run() {
+      while (shouldRun) {
+        Stopwatch sw = Stopwatch.createUnstarted();
+        Map<String, GcTimes> gcTimesBeforeSleep = getGcTimes();
+        LOG.info("Starting Ratis JVM pause monitor");
+        while (shouldRun) {
+          sw.reset().start();
+          try {
+            Thread.sleep(SLEEP_INTERVAL_MS);
+          } catch (InterruptedException ie) {
+            return;

Review comment:
       I think return is better , as the monitor will be stopped by RaftServerProxy on close.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org