You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by je...@apache.org on 2014/04/05 02:58:55 UTC
svn commit: r1584947 - in /hbase/trunk/hbase-server/src:
main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Author: jeffreyz
Date: Sat Apr 5 00:58:55 2014
New Revision: 1584947
URL: http://svn.apache.org/r1584947
Log:
HBASE-10895: unassign a region fails due to the hosting region server is in FailedServerList
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1584947&r1=1584946&r2=1584947&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Sat Apr 5 00:58:55 2014
@@ -1698,10 +1698,10 @@ public class AssignmentManager extends Z
if (t instanceof RemoteException) {
t = ((RemoteException)t).unwrapRemoteException();
}
+ boolean logRetries = true;
if (t instanceof NotServingRegionException
|| t instanceof RegionServerStoppedException
- || t instanceof ServerNotRunningYetException
- || t instanceof FailedServerException) {
+ || t instanceof ServerNotRunningYetException) {
LOG.debug("Offline " + region.getRegionNameAsString()
+ ", it's not any more on " + server, t);
if (transitionInZK) {
@@ -1711,34 +1711,48 @@ public class AssignmentManager extends Z
regionOffline(region);
}
return;
- } else if (state != null
- && t instanceof RegionAlreadyInTransitionException) {
- // RS is already processing this region, only need to update the timestamp
- LOG.debug("update " + state + " the timestamp.");
- state.updateTimestampToNow();
- if (maxWaitTime < 0) {
- maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
- + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
- DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
- }
- try {
+ } else if ((t instanceof FailedServerException) || (state != null &&
+ t instanceof RegionAlreadyInTransitionException)) {
+ long sleepTime = 0;
+ Configuration conf = this.server.getConfiguration();
+ if(t instanceof FailedServerException) {
+ sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
+ RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+ } else {
+ // RS is already processing this region, only need to update the timestamp
+ LOG.debug("update " + state + " the timestamp.");
+ state.updateTimestampToNow();
+ if (maxWaitTime < 0) {
+ maxWaitTime =
+ EnvironmentEdgeManager.currentTimeMillis()
+ + conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
+ DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
+ }
long now = EnvironmentEdgeManager.currentTimeMillis();
if (now < maxWaitTime) {
LOG.debug("Region is already in transition; "
+ "waiting up to " + (maxWaitTime - now) + "ms", t);
- Thread.sleep(100);
+ sleepTime = 100;
i--; // reset the try count
+ logRetries = false;
+ }
+ }
+ try {
+ if (sleepTime > 0) {
+ Thread.sleep(sleepTime);
}
} catch (InterruptedException ie) {
LOG.warn("Failed to unassign "
+ region.getRegionNameAsString() + " since interrupted", ie);
Thread.currentThread().interrupt();
- if (!tomActivated) {
+ if (!tomActivated && state != null) {
regionStates.updateRegionState(region, State.FAILED_CLOSE);
}
return;
}
- } else {
+ }
+
+ if (logRetries) {
LOG.info("Server " + server + " returned " + t + " for "
+ region.getRegionNameAsString() + ", try=" + i
+ " of " + this.maximumAttempts, t);
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1584947&r1=1584946&r2=1584947&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Sat Apr 5 00:58:55 2014
@@ -141,7 +141,7 @@ public class TestAssignmentManagerOnClus
@Test (timeout=120000)
public void testAssignRegionOnRestartedServer() throws Exception {
String table = "testAssignRegionOnRestartedServer";
- TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40);
+ TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
@@ -754,11 +754,11 @@ public class TestAssignmentManagerOnClus
// You can't assign a dead region before SSH
am.assign(hri, true, true);
RegionState state = regionStates.getRegionState(hri);
- assertTrue(state.isOffline());
+ assertTrue(state.isFailedClose());
// You can't unassign a dead region before SSH either
am.unassign(hri, true);
- assertTrue(state.isOffline());
+ assertTrue(state.isFailedClose());
// Enable SSH so that log can be split
master.enableSSH(true);