You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2013/06/12 20:35:47 UTC
svn commit: r1492337 - in
/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase:
HConstants.java regionserver/HRegionServer.java
Author: liyin
Date: Wed Jun 12 18:35:47 2013
New Revision: 1492337
URL: http://svn.apache.org/r1492337
Log:
[0.89-fb] [master] Allow regionServerReport to run with shorter RPC timeouts.
Author: aaiyer
Summary:
Our RS Expiry/Heartbeat mechanism relies on waiting 20 sec
before declaring a RS to be dead. The rationale behind this
is to give the RS 20 consecutive attempts before considering it
dead.
However, if the RPC timeout is set to 60 sec (by default); it is possible
that one regionserverReport takes up to 60 sec (to fail), and that
single failed call can cause the RS to expire.
We should shorten the RPC timeout for the regionserver report to ensure
we give the RS multiple chances before shooting it down.
2) Also including a bug fix around the instrumentation to log regionserver
reports. If the call to master.regionserverReport is unsuccessful; we should
preserve the old behavior and let the exception out. Specifically, the rest
of the code that follows the rsReport should only be executed, if the report
was successful.
Test Plan: unit tests
Reviewers: rshroff, manukranthk
Reviewed By: manukranthk
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D840170
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1492337&r1=1492336&r2=1492337&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Wed Jun 12 18:35:47 2013
@@ -530,11 +530,13 @@ public final class HConstants {
* timeout for each RPC
*/
public static String HBASE_RPC_TIMEOUT_KEY = "hbase.rpc.timeout";
+ public static String HBASE_RS_REPORT_TIMEOUT_KEY = "hbase.regionserverReport.timeout";
/**
* Default value of {@link #HBASE_RPC_TIMEOUT_KEY}
*/
public static int DEFAULT_HBASE_RPC_TIMEOUT = 60000;
+ public static int DEFAULT_RS_REPORT_TIMEOUT = 3000;
/**
* pause between rpc or connect retries
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1492337&r1=1492336&r2=1492337&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Jun 12 18:35:47 2013
@@ -320,7 +320,7 @@ public class HRegionServer implements HR
// A sleeper that sleeps for msgInterval.
private final Sleeper sleeper;
- private final int rpcTimeout;
+ private final int rpcTimeoutToMaster;
// Address passed in to constructor. This is not always the address we run
// with. For example, if passed port is 0, then we are to pick a port. The
@@ -430,9 +430,9 @@ public class HRegionServer implements HR
this.numRegionsToReport =
conf.getInt("hbase.regionserver.numregionstoreport", 10);
- this.rpcTimeout = conf.getInt(
- HConstants.HBASE_RPC_TIMEOUT_KEY,
- HConstants.DEFAULT_HBASE_RPC_TIMEOUT);
+ this.rpcTimeoutToMaster = conf.getInt(
+ HConstants.HBASE_RS_REPORT_TIMEOUT_KEY,
+ HConstants.DEFAULT_RS_REPORT_TIMEOUT);
responseSizeLimit = conf.getLong("hbase.regionserver.results.size.max",
(long)Integer.MAX_VALUE); // set the max to 2G
@@ -750,6 +750,7 @@ public class HRegionServer implements HR
LOG.debug(HRegionServer.printFailedRegionserverReport(this.serverInfo,
outboundMessages.toArray(EMPTY_HMSG_ARRAY),
getMostLoadedRegions(), msgs, (Throwable)e));
+ throw e;
}
LOG.debug("Attempted regionserver report with the master");
lastMsg = System.currentTimeMillis();
@@ -1961,7 +1962,7 @@ public class HRegionServer implements HR
// should retry indefinitely.
master = (HMasterRegionInterface)HBaseRPC.getProxy(
HMasterRegionInterface.class, HBaseRPCProtocolVersion.versionID,
- masterAddress.getInetSocketAddress(), this.conf, this.rpcTimeout,
+ masterAddress.getInetSocketAddress(), this.conf, this.rpcTimeoutToMaster,
HBaseRPCOptions.DEFAULT);
} catch (IOException e) {
LOG.warn("Unable to connect to master. Retrying. Error was:", e);