You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ratis.apache.org by sz...@apache.org on 2023/03/01 01:22:30 UTC

[ratis] 14/16: RATIS-1788. Improve the JvmPauseMonitor log messages. (#826)

This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch branch-2_tmp
in repository https://gitbox.apache.org/repos/asf/ratis.git

commit 74830239efd359216406459cc50047d22e2c39f8
Author: Tsz-Wo Nicholas Sze <sz...@apache.org>
AuthorDate: Thu Feb 23 00:32:46 2023 -0800

    RATIS-1788. Improve the JvmPauseMonitor log messages. (#826)
    
    (cherry picked from commit 655db36c68a4c46a59150b548e76f2e92c33bf84)
---
 .../java/org/apache/ratis/util/JvmPauseMonitor.java    | 18 ++++++++----------
 .../main/java/org/apache/ratis/util/TimeDuration.java  | 18 +++++++++++++++++-
 .../org/apache/ratis/server/impl/RaftServerProxy.java  |  4 ++++
 .../java/org/apache/ratis/util/TestTimeDuration.java   | 18 ++++++++++--------
 4 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/ratis-common/src/main/java/org/apache/ratis/util/JvmPauseMonitor.java b/ratis-common/src/main/java/org/apache/ratis/util/JvmPauseMonitor.java
index 1fcfc4d6a..532bd2255 100644
--- a/ratis-common/src/main/java/org/apache/ratis/util/JvmPauseMonitor.java
+++ b/ratis-common/src/main/java/org/apache/ratis/util/JvmPauseMonitor.java
@@ -65,28 +65,26 @@ public class JvmPauseMonitor {
   }
 
   static String toString(Map<String, GcInfo> beforeSleep, TimeDuration extraSleepTime, Map<String, GcInfo> afterSleep) {
-    final StringBuilder b = new StringBuilder("Detected pause in JVM or host machine (eg GC): pause of approximately ")
-        .append(extraSleepTime)
-        .append('.');
-
-    boolean detected = false;
+    final StringBuilder b = new StringBuilder();
+    long ms = 0;
     for(Map.Entry<String, GcInfo> before: beforeSleep.entrySet()) {
       final String name = before.getKey();
       final GcInfo after = afterSleep.get(name);
       if (after != null) {
         final GcInfo diff = after.subtract(before.getValue());
         if (diff.count != 0) {
+          ms += diff.timeMs;
           b.append(System.lineSeparator()).append("GC pool '").append(name)
               .append("' had collection(s): ").append(diff);
-          detected = true;
         }
       }
     }
 
-    if (!detected) {
-      b.append(" No GCs detected.");
-    }
-    return b.toString();
+    final String gc = b.length() == 0? " without any GCs."
+        : " with " + TimeDuration.valueOf(ms, TimeUnit.MILLISECONDS).toString(TimeUnit.SECONDS, 3)
+        + " GC time." + b;
+    return "Detected pause in JVM or host machine approximately "
+        + extraSleepTime.toString(TimeUnit.SECONDS, 3) + gc;
   }
 
   private static final TimeDuration SLEEP_TIME = TimeDuration.valueOf(500, TimeUnit.MILLISECONDS);
diff --git a/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java b/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java
index 58a61cc29..98fb3694e 100644
--- a/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java
+++ b/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java
@@ -32,7 +32,7 @@ import java.util.function.LongUnaryOperator;
 
 /**
  * Time duration is represented by a long together with a {@link TimeUnit}.
- *
+ * <p>
  * This is a value-based class.
  */
 public final class TimeDuration implements Comparable<TimeDuration> {
@@ -375,4 +375,20 @@ public final class TimeDuration implements Comparable<TimeDuration> {
   public String toString() {
     return duration + Abbreviation.valueOf(unit).getDefault();
   }
+
+  /** @return a representation of this object in the given target unit and decimal places. */
+  public String toString(TimeUnit targetUnit, int decimalPlaces) {
+    Objects.requireNonNull(targetUnit, "targetUnit == null");
+    if (targetUnit.compareTo(unit) <= 0) {
+      return to(targetUnit).toString();
+    }
+    final double divisor = unit.convert(1, targetUnit);
+    if (duration % divisor == 0) {
+      return to(targetUnit).toString();
+    }
+    final String decimal = StringUtils.format("%." + decimalPlaces + "f", duration/divisor);
+    final String s = decimal + Abbreviation.valueOf(targetUnit).getDefault();
+    LOG.debug("{}.to({}) = {}", this, targetUnit, s);
+    return s;
+  }
 }
diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java
index 54cf15c65..f6f7223da 100644
--- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java
+++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java
@@ -226,8 +226,12 @@ class RaftServerProxy implements RaftServer {
   private void handleJvmPause(TimeDuration extraSleep, TimeDuration closeThreshold, TimeDuration stepDownThreshold)
       throws IOException {
     if (extraSleep.compareTo(closeThreshold) > 0) {
+      LOG.error("{}: JVM pause detected {} longer than the close-threshold {}, shutting down ...",
+          getId(), extraSleep.toString(TimeUnit.SECONDS, 3), closeThreshold.toString(TimeUnit.SECONDS, 3));
       close();
     } else if (extraSleep.compareTo(stepDownThreshold) > 0) {
+      LOG.warn("{}: JVM pause detected {} longer than the step-down-threshold {}",
+          getId(), extraSleep.toString(TimeUnit.SECONDS, 3), stepDownThreshold.toString(TimeUnit.SECONDS, 3));
       getImpls().forEach(RaftServerImpl::stepDownOnJvmPause);
     }
   }
diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java b/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java
index edb0ad8a2..584c9e811 100644
--- a/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java
+++ b/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java
@@ -27,6 +27,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
 import static org.apache.ratis.util.TimeDuration.Abbreviation;
+import static org.apache.ratis.util.TimeDuration.LOG;
 import static org.apache.ratis.util.TimeDuration.parse;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
@@ -110,22 +111,23 @@ public class TestTimeDuration {
   @Test(timeout = 1000)
   public void testTo() {
     final TimeDuration oneSecond = TimeDuration.valueOf(1, TimeUnit.SECONDS);
-    assertTo(1000, oneSecond, TimeUnit.MILLISECONDS);
-    final TimeDuration nanos = assertTo(1_000_000_000, oneSecond, TimeUnit.NANOSECONDS);
-    assertTo(1000, nanos, TimeUnit.MILLISECONDS);
+    assertTo(1000, "1000ms", oneSecond, TimeUnit.MILLISECONDS);
+    final TimeDuration nanos = assertTo(1_000_000_000, "1000000000ns", oneSecond, TimeUnit.NANOSECONDS);
+    assertTo(1000, "1000ms", nanos, TimeUnit.MILLISECONDS);
 
-    assertTo(0, oneSecond, TimeUnit.MINUTES);
-    assertTo(0, nanos, TimeUnit.MINUTES);
+    assertTo(0, "0.0167min", oneSecond, TimeUnit.MINUTES);
+    assertTo(0, "0.0167min", nanos, TimeUnit.MINUTES);
 
     final TimeDuration millis = TimeDuration.valueOf(1_999, TimeUnit.MILLISECONDS);
-    assertTo(1, millis, TimeUnit.SECONDS);
-    assertTo(0, millis, TimeUnit.MINUTES);
+    assertTo(1, "1.9990s", millis, TimeUnit.SECONDS);
+    assertTo(0, "0.0333min", millis, TimeUnit.MINUTES);
   }
 
-  static TimeDuration assertTo(long expected, TimeDuration timeDuration, TimeUnit toUnit) {
+  static TimeDuration assertTo(long expected, String expectedString, TimeDuration timeDuration, TimeUnit toUnit) {
     final TimeDuration computed = timeDuration.to(toUnit);
     assertEquals(expected, computed.getDuration());
     assertEquals(toUnit, computed.getUnit());
+    assertEquals(expectedString, timeDuration.toString(toUnit, 4));
     return computed;
   }