You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2012/04/12 19:20:10 UTC

svn commit: r1325388 - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/master/ServerManager.java test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java

Author: stack
Date: Thu Apr 12 17:20:10 2012
New Revision: 1325388

URL: http://svn.apache.org/viewvc?rev=1325388&view=rev
Log:
HBASE-5770 Add a clock skew warning threshold

Modified:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java

Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1325388&r1=1325387&r2=1325388&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Thu Apr 12 17:20:10 2012
@@ -96,6 +96,7 @@ public class ServerManager {
   private final DeadServer deadservers;
 
   private final long maxSkew;
+  private final long warningSkew;
 
   /**
    * Set of region servers which are dead but not expired immediately. If one
@@ -122,6 +123,7 @@ public class ServerManager {
     this.services = services;
     Configuration c = master.getConfiguration();
     maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
+    warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
     this.deadservers = new DeadServer();
     this.connection = connect ? HConnectionManager.getConnection(c) : null;
   }
@@ -194,14 +196,14 @@ public class ServerManager {
   }
 
   /**
-   * Checks if the clock skew between the server and the master. If the clock
-   * skew is too much it will throw an Exception.
+   * Checks if the clock skew between the server and the master. If the clock skew exceeds the 
+   * configured max, it will throw an exception; if it exceeds the configured warning threshold, 
+   * it will log a warning but start normally.
    * @param serverName Incoming servers's name
    * @param serverCurrentTime
-   * @throws ClockOutOfSyncException
+   * @throws ClockOutOfSyncException if the skew exceeds the configured max value
    */
-  private void checkClockSkew(final ServerName serverName,
-      final long serverCurrentTime)
+  private void checkClockSkew(final ServerName serverName, final long serverCurrentTime)
   throws ClockOutOfSyncException {
     long skew = System.currentTimeMillis() - serverCurrentTime;
     if (skew > maxSkew) {
@@ -210,6 +212,11 @@ public class ServerManager {
         "Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms";
       LOG.warn(message);
       throw new ClockOutOfSyncException(message);
+    } else if (skew > warningSkew){
+      String message = "Reported time for server " + serverName + " is out of sync with master " +
+        "by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " + 
+        "error threshold is " + maxSkew + "ms)";
+      LOG.warn(message);
     }
   }
 

Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java?rev=1325388&r1=1325387&r2=1325388&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java Thu Apr 12 17:20:10 2012
@@ -19,6 +19,8 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import static org.junit.Assert.fail;
+
 import java.net.InetAddress;
 
 import junit.framework.Assert;
@@ -82,18 +84,25 @@ public class TestClockSkewDetection {
     InetAddress ia1 = InetAddress.getLocalHost();
     sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis());
 
-    long maxSkew = 30000;
+    final Configuration c = HBaseConfiguration.create();
+    long maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
+    long warningSkew = c.getLong("hbase.master.warningclockskew", 1000);
 
     try {
       LOG.debug("regionServerStartup 2");
       InetAddress ia2 = InetAddress.getLocalHost();
       sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2);
-      Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException "
-        + "but didn't.", false);
+      fail("HMaster should have thrown an ClockOutOfSyncException but didn't.");
     } catch(ClockOutOfSyncException e) {
       //we want an exception
       LOG.info("Recieved expected exception: "+e);
     }
+    
+    // make sure values above warning threshold but below max threshold don't kill
+    LOG.debug("regionServerStartup 3");
+    InetAddress ia3 = InetAddress.getLocalHost();
+    sm.regionServerStartup(ia3, 1236, -1, System.currentTimeMillis() - warningSkew * 2);
+    
   }
 
   @org.junit.Rule