You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2013/06/12 20:35:47 UTC

svn commit: r1492337 - in /hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase: HConstants.java regionserver/HRegionServer.java

Author: liyin
Date: Wed Jun 12 18:35:47 2013
New Revision: 1492337

URL: http://svn.apache.org/r1492337
Log:
[0.89-fb] [master] Allow regionServerReport to run with shorter RPC timeouts.

Author: aaiyer

Summary:
Our RS Expiry/Heartbeat mechanism relies on waiting 20 sec
before declaring a RS to be dead. The rationale behind this
is to give the RS 20 consecutive attempts before considering it
dead.

However, if the RPC timeout is set to 60 sec (by default); it is possible
that one regionserverReport takes up to 60 sec (to fail), and that
single failed call can cause the RS to expire.

We should shorten the RPC timeout for the regionserver report to ensure
we give the RS multiple chances before shooting it down.

2) Also including a bug fix around the instrumentation to log regionserver
reports. If the call to master.regionserverReport is unsuccessful; we should
preserve the old behavior and let the exception out. Specifically, the rest
of the code that follows the rsReport should only be executed, if the report
was successful.

Test Plan: unit tests

Reviewers: rshroff, manukranthk

Reviewed By: manukranthk

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D840170

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1492337&r1=1492336&r2=1492337&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Wed Jun 12 18:35:47 2013
@@ -530,11 +530,13 @@ public final class HConstants {
    * timeout for each RPC
    */
   public static String HBASE_RPC_TIMEOUT_KEY = "hbase.rpc.timeout";
+  public static String HBASE_RS_REPORT_TIMEOUT_KEY = "hbase.regionserverReport.timeout";
 
   /**
    * Default value of {@link #HBASE_RPC_TIMEOUT_KEY}
    */
   public static int DEFAULT_HBASE_RPC_TIMEOUT = 60000;
+  public static int DEFAULT_RS_REPORT_TIMEOUT = 3000;
 
   /**
    * pause between rpc or connect retries

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1492337&r1=1492336&r2=1492337&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Jun 12 18:35:47 2013
@@ -320,7 +320,7 @@ public class HRegionServer implements HR
   // A sleeper that sleeps for msgInterval.
   private final Sleeper sleeper;
 
-  private final int rpcTimeout;
+  private final int rpcTimeoutToMaster;
 
   // Address passed in to constructor.  This is not always the address we run
   // with.  For example, if passed port is 0, then we are to pick a port.  The
@@ -430,9 +430,9 @@ public class HRegionServer implements HR
     this.numRegionsToReport =
       conf.getInt("hbase.regionserver.numregionstoreport", 10);
 
-    this.rpcTimeout = conf.getInt(
-        HConstants.HBASE_RPC_TIMEOUT_KEY,
-        HConstants.DEFAULT_HBASE_RPC_TIMEOUT);
+    this.rpcTimeoutToMaster = conf.getInt(
+        HConstants.HBASE_RS_REPORT_TIMEOUT_KEY,
+        HConstants.DEFAULT_RS_REPORT_TIMEOUT);
 
     responseSizeLimit = conf.getLong("hbase.regionserver.results.size.max",
         (long)Integer.MAX_VALUE); // set the max to 2G
@@ -750,6 +750,7 @@ public class HRegionServer implements HR
               LOG.debug(HRegionServer.printFailedRegionserverReport(this.serverInfo,
                   outboundMessages.toArray(EMPTY_HMSG_ARRAY),
                   getMostLoadedRegions(), msgs, (Throwable)e));
+              throw e;
             }
             LOG.debug("Attempted regionserver report with the master");
             lastMsg = System.currentTimeMillis();
@@ -1961,7 +1962,7 @@ public class HRegionServer implements HR
         // should retry indefinitely.
         master = (HMasterRegionInterface)HBaseRPC.getProxy(
           HMasterRegionInterface.class, HBaseRPCProtocolVersion.versionID,
-          masterAddress.getInetSocketAddress(), this.conf, this.rpcTimeout,
+          masterAddress.getInetSocketAddress(), this.conf, this.rpcTimeoutToMaster,
           HBaseRPCOptions.DEFAULT);
       } catch (IOException e) {
         LOG.warn("Unable to connect to master. Retrying. Error was:", e);