You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by je...@apache.org on 2014/04/05 03:05:25 UTC

svn commit: r1584948 - in /hbase/branches/0.98/hbase-server/src: main/java/org/apache/hadoop/hbase/master/AssignmentManager.java test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java

Author: jeffreyz
Date: Sat Apr  5 01:05:25 2014
New Revision: 1584948

URL: http://svn.apache.org/r1584948
Log:
HBASE-10895: unassign a region fails due to the hosting region server is in FailedServerList

Modified:
    hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java

Modified: hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1584948&r1=1584947&r2=1584948&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Sat Apr  5 01:05:25 2014
@@ -1695,6 +1695,7 @@ public class AssignmentManager extends Z
         if (t instanceof RemoteException) {
           t = ((RemoteException)t).unwrapRemoteException();
         }
+        boolean logRetries = true;
         if (t instanceof NotServingRegionException
             || t instanceof RegionServerStoppedException
             || t instanceof ServerNotRunningYetException
@@ -1708,34 +1709,48 @@ public class AssignmentManager extends Z
             regionOffline(region);
           }
           return;
-        } else if (state != null
-            && t instanceof RegionAlreadyInTransitionException) {
-          // RS is already processing this region, only need to update the timestamp
-          LOG.debug("update " + state + " the timestamp.");
-          state.updateTimestampToNow();
-          if (maxWaitTime < 0) {
-            maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
-              + this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
-                DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
-          }
-          try {
+        } else if ((t instanceof FailedServerException) || (state != null && 
+            t instanceof RegionAlreadyInTransitionException)) {
+          long sleepTime = 0;
+          Configuration conf = this.server.getConfiguration();
+          if(t instanceof FailedServerException) {
+            sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, 
+                  RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+          } else {
+            // RS is already processing this region, only need to update the timestamp
+            LOG.debug("update " + state + " the timestamp.");
+            state.updateTimestampToNow();
+            if (maxWaitTime < 0) {
+              maxWaitTime =
+                  EnvironmentEdgeManager.currentTimeMillis()
+                      + conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
+                        DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
+            }
             long now = EnvironmentEdgeManager.currentTimeMillis();
             if (now < maxWaitTime) {
               LOG.debug("Region is already in transition; "
                 + "waiting up to " + (maxWaitTime - now) + "ms", t);
-              Thread.sleep(100);
+              sleepTime = 100;
               i--; // reset the try count
+              logRetries = false;
+            }
+          }
+          try {
+            if (sleepTime > 0) {
+              Thread.sleep(sleepTime);
             }
           } catch (InterruptedException ie) {
             LOG.warn("Failed to unassign "
               + region.getRegionNameAsString() + " since interrupted", ie);
             Thread.currentThread().interrupt();
-            if (!tomActivated) {
+            if (!tomActivated && state != null) {
               regionStates.updateRegionState(region, State.FAILED_CLOSE);
             }
             return;
           }
-        } else {
+        }
+
+        if (logRetries) {
           LOG.info("Server " + server + " returned " + t + " for "
             + region.getRegionNameAsString() + ", try=" + i
             + " of " + this.maximumAttempts, t);

Modified: hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1584948&r1=1584947&r2=1584948&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/branches/0.98/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Sat Apr  5 01:05:25 2014
@@ -141,7 +141,7 @@ public class TestAssignmentManagerOnClus
   @Test (timeout=120000)
   public void testAssignRegionOnRestartedServer() throws Exception {
     String table = "testAssignRegionOnRestartedServer";
-    TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40);
+    TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect