You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by je...@apache.org on 2014/03/27 00:22:16 UTC

svn commit: r1582116 - /hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Author: jeffreyz
Date: Wed Mar 26 23:22:15 2014
New Revision: 1582116

URL: http://svn.apache.org/r1582116
Log:
HBASE-10833: Region assignment may fail during cluster start up

Modified:
    hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Modified: hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1582116&r1=1582115&r2=1582116&view=diff
==============================================================================
--- hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.98/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Wed Mar 26 23:22:15 2014
@@ -61,6 +61,8 @@ import org.apache.hadoop.hbase.exception
 import org.apache.hadoop.hbase.executor.EventHandler;
 import org.apache.hadoop.hbase.executor.EventType;
 import org.apache.hadoop.hbase.executor.ExecutorService;
+import org.apache.hadoop.hbase.ipc.RpcClient;
+import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
 import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.master.RegionState.State;
@@ -1844,12 +1846,14 @@ public class AssignmentManager extends Z
       final boolean setOfflineInZK, final boolean forceNewPlan) {
     long startTime = EnvironmentEdgeManager.currentTimeMillis();
     try {
+      Configuration conf = server.getConfiguration();
       RegionState currentState = state;
       int versionOfOfflineNode = -1;
       RegionPlan plan = null;
       long maxWaitTime = -1;
       HRegionInfo region = state.getRegion();
       RegionOpeningState regionOpenState;
+      Throwable previousException = null;
       for (int i = 1; i <= maximumAttempts; i++) {
         if (server.isStopped() || server.isAborted()) {
           LOG.info("Skip assigning " + region.getRegionNameAsString()
@@ -1948,6 +1952,7 @@ public class AssignmentManager extends Z
           if (t instanceof RemoteException) {
             t = ((RemoteException) t).unwrapRemoteException();
           }
+          previousException = t;
 
           // Should we wait a little before retrying? If the server is starting it's yes.
           // If the region is already in transition, it's yes as well: we want to be sure that
@@ -2048,6 +2053,22 @@ public class AssignmentManager extends Z
             currentState = regionStates.updateRegionState(region, State.OFFLINE);
             versionOfOfflineNode = -1;
             plan = newPlan;
+          } else if(plan.getDestination().equals(newPlan.getDestination()) &&
+              previousException instanceof FailedServerException) {
+            try {
+              LOG.info("Trying to re-assign " + region.getRegionNameAsString() + 
+                " to the same failed server.");
+              Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, 
+                RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
+            } catch (InterruptedException ie) {
+              LOG.warn("Failed to assign "
+                  + region.getRegionNameAsString() + " since interrupted", ie);
+              Thread.currentThread().interrupt();
+              if (!tomActivated) {
+                regionStates.updateRegionState(region, State.FAILED_OPEN);
+              }
+              return;
+            }
           }
         }
       }