You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by je...@apache.org on 2014/04/05 03:05:25 UTC
svn commit: r1584948 - in /hbase/branches/0.98/hbase-server/src:
main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Author: jeffreyz
Date: Sat Apr 5 01:05:25 2014
New Revision: 1584948
URL: http://svn.apache.org/r1584948
Log:
HBASE-10895: unassign a region fails due to the hosting region server is in FailedServerList
Modified:
hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Modified: hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1584948&r1=1584947&r2=1584948&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Sat Apr 5 01:05:25 2014
@@ -1695,6 +1695,7 @@ public class AssignmentManager extends Z
if (t instanceof RemoteException) {
t = ((RemoteException)t).unwrapRemoteException();
}
+ boolean logRetries = true;
if (t instanceof NotServingRegionException
|| t instanceof RegionServerStoppedException
|| t instanceof ServerNotRunningYetException
@@ -1708,34 +1709,48 @@ public class AssignmentManager extends Z
regionOffline(region);
}
return;
- } else if (state != null
- && t instanceof RegionAlreadyInTransitionException) {
- // RS is already processing this region, only need to update the timestamp
- LOG.debug("update " + state + " the timestamp.");
- state.updateTimestampToNow();
- if (maxWaitTime < 0) {
- maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
- + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
- DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
- }
- try {
+ } else if ((t instanceof FailedServerException) || (state != null &&
+ t instanceof RegionAlreadyInTransitionException)) {
+ long sleepTime = 0;
+ Configuration conf = this.server.getConfiguration();
+ if(t instanceof FailedServerException) {
+ sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
+ RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+ } else {
+ // RS is already processing this region, only need to update the timestamp
+ LOG.debug("update " + state + " the timestamp.");
+ state.updateTimestampToNow();
+ if (maxWaitTime < 0) {
+ maxWaitTime =
+ EnvironmentEdgeManager.currentTimeMillis()
+ + conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
+ DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
+ }
long now = EnvironmentEdgeManager.currentTimeMillis();
if (now < maxWaitTime) {
LOG.debug("Region is already in transition; "
+ "waiting up to " + (maxWaitTime - now) + "ms", t);
- Thread.sleep(100);
+ sleepTime = 100;
i--; // reset the try count
+ logRetries = false;
+ }
+ }
+ try {
+ if (sleepTime > 0) {
+ Thread.sleep(sleepTime);
}
} catch (InterruptedException ie) {
LOG.warn("Failed to unassign "
+ region.getRegionNameAsString() + " since interrupted", ie);
Thread.currentThread().interrupt();
- if (!tomActivated) {
+ if (!tomActivated && state != null) {
regionStates.updateRegionState(region, State.FAILED_CLOSE);
}
return;
}
- } else {
+ }
+
+ if (logRetries) {
LOG.info("Server " + server + " returned " + t + " for "
+ region.getRegionNameAsString() + ", try=" + i
+ " of " + this.maximumAttempts, t);
Modified: hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1584948&r1=1584947&r2=1584948&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Sat Apr 5 01:05:25 2014
@@ -141,7 +141,7 @@ public class TestAssignmentManagerOnClus
@Test (timeout=120000)
public void testAssignRegionOnRestartedServer() throws Exception {
String table = "testAssignRegionOnRestartedServer";
- TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40);
+ TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect