You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/03/21 17:29:43 UTC

svn commit: r1459384 - in /hbase/trunk: hbase-client/src/main/java/org/apache/hadoop/hbase/master/ hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ hbase-protocol/src/main/protobuf/ hbase-server/src/main/java/org/apache/hadoop/h...

Author: jxiang
Date: Thu Mar 21 16:29:42 2013
New Revision: 1459384

URL: http://svn.apache.org/r1459384
Log:
HBASE-8137 Add failed to open/close region state

Modified:
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
    hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
    hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java Thu Mar 21 16:29:42 2013
@@ -44,7 +44,9 @@ public class RegionState implements org.
     CLOSING,        // server has begun to close but not yet done
     CLOSED,         // server closed region and updated meta
     SPLITTING,      // server started split of a region
-    SPLIT           // server completed split of a region
+    SPLIT,          // server completed split of a region
+    FAILED_OPEN,    // failed to open, and won't retry any more
+    FAILED_CLOSE    // failed to close, and won't retry any more
   }
 
   // Many threads can update the state at the stamp at the same time
@@ -126,6 +128,14 @@ public class RegionState implements org.
     return state == State.SPLIT;
   }
 
+  public boolean isFailedOpen() {
+    return state == State.FAILED_OPEN;
+  }
+
+  public boolean isFailedClose() {
+    return state == State.FAILED_CLOSE;
+  }
+
   public boolean isPendingOpenOrOpeningOnServer(final ServerName sn) {
     return isOnServer(sn) && (isPendingOpen() || isOpening());
   }
@@ -195,6 +205,12 @@ public class RegionState implements org.
     case SPLIT:
       rs = ClusterStatusProtos.RegionState.State.SPLIT;
       break;
+    case FAILED_OPEN:
+      rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN;
+      break;
+    case FAILED_CLOSE:
+      rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE;
+      break;
     default:
       throw new IllegalStateException("");
     }
@@ -239,6 +255,12 @@ public class RegionState implements org.
     case SPLIT:
       state = State.SPLIT;
       break;
+    case FAILED_OPEN:
+      state = State.FAILED_OPEN;
+      break;
+    case FAILED_CLOSE:
+      state = State.FAILED_CLOSE;
+      break;
     default:
       throw new IllegalStateException("");
     }

Modified: hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java (original)
+++ hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java Thu Mar 21 16:29:42 2013
@@ -63,6 +63,8 @@ public final class ClusterStatusProtos {
       CLOSED(6, 6),
       SPLITTING(7, 7),
       SPLIT(8, 8),
+      FAILED_OPEN(9, 9),
+      FAILED_CLOSE(10, 10),
       ;
       
       public static final int OFFLINE_VALUE = 0;
@@ -74,6 +76,8 @@ public final class ClusterStatusProtos {
       public static final int CLOSED_VALUE = 6;
       public static final int SPLITTING_VALUE = 7;
       public static final int SPLIT_VALUE = 8;
+      public static final int FAILED_OPEN_VALUE = 9;
+      public static final int FAILED_CLOSE_VALUE = 10;
       
       
       public final int getNumber() { return value; }
@@ -89,6 +93,8 @@ public final class ClusterStatusProtos {
           case 6: return CLOSED;
           case 7: return SPLITTING;
           case 8: return SPLIT;
+          case 9: return FAILED_OPEN;
+          case 10: return FAILED_CLOSE;
           default: return null;
         }
       }
@@ -119,7 +125,7 @@ public final class ClusterStatusProtos {
       }
       
       private static final State[] VALUES = {
-        OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT, 
+        OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT, FAILED_OPEN, FAILED_CLOSE, 
       };
       
       public static State valueOf(
@@ -4350,28 +4356,28 @@ public final class ClusterStatusProtos {
   static {
     java.lang.String[] descriptorData = {
       "\n\023ClusterStatus.proto\032\013hbase.proto\032\017Clus" +
-      "terId.proto\032\010FS.proto\"\346\001\n\013RegionState\022\037\n" +
+      "terId.proto\032\010FS.proto\"\211\002\n\013RegionState\022\037\n" +
       "\nregionInfo\030\001 \002(\0132\013.RegionInfo\022!\n\005state\030" +
       "\002 \002(\0162\022.RegionState.State\022\r\n\005stamp\030\003 \001(\004" +
-      "\"\203\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
+      "\"\246\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
       "\001\022\013\n\007OPENING\020\002\022\010\n\004OPEN\020\003\022\021\n\rPENDING_CLOS" +
       "E\020\004\022\013\n\007CLOSING\020\005\022\n\n\006CLOSED\020\006\022\r\n\tSPLITTIN" +
-      "G\020\007\022\t\n\005SPLIT\020\010\"W\n\022RegionInTransition\022\036\n\004" +
-      "spec\030\001 \002(\0132\020.RegionSpecifier\022!\n\013regionSt" +
-      "ate\030\002 \002(\0132\014.RegionState\"N\n\016LiveServerInf",
-      "o\022\033\n\006server\030\001 \002(\0132\013.ServerName\022\037\n\nserver" +
-      "Load\030\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatu" +
-      "s\022.\n\014hbaseVersion\030\001 \001(\0132\030.HBaseVersionFi" +
-      "leContent\022$\n\013liveServers\030\002 \003(\0132\017.LiveSer" +
-      "verInfo\022 \n\013deadServers\030\003 \003(\0132\013.ServerNam" +
-      "e\0220\n\023regionsInTransition\030\004 \003(\0132\023.RegionI" +
-      "nTransition\022\035\n\tclusterId\030\005 \001(\0132\n.Cluster" +
-      "Id\022(\n\022masterCoprocessors\030\006 \003(\0132\014.Coproce" +
-      "ssor\022\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbac" +
-      "kupMasters\030\010 \003(\0132\013.ServerName\022\022\n\nbalance",
-      "rOn\030\t \001(\010BF\n*org.apache.hadoop.hbase.pro" +
-      "tobuf.generatedB\023ClusterStatusProtosH\001\240\001" +
-      "\001"
+      "G\020\007\022\t\n\005SPLIT\020\010\022\017\n\013FAILED_OPEN\020\t\022\020\n\014FAILE" +
+      "D_CLOSE\020\n\"W\n\022RegionInTransition\022\036\n\004spec\030" +
+      "\001 \002(\0132\020.RegionSpecifier\022!\n\013regionState\030\002",
+      " \002(\0132\014.RegionState\"N\n\016LiveServerInfo\022\033\n\006" +
+      "server\030\001 \002(\0132\013.ServerName\022\037\n\nserverLoad\030" +
+      "\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatus\022.\n\014" +
+      "hbaseVersion\030\001 \001(\0132\030.HBaseVersionFileCon" +
+      "tent\022$\n\013liveServers\030\002 \003(\0132\017.LiveServerIn" +
+      "fo\022 \n\013deadServers\030\003 \003(\0132\013.ServerName\0220\n\023" +
+      "regionsInTransition\030\004 \003(\0132\023.RegionInTran" +
+      "sition\022\035\n\tclusterId\030\005 \001(\0132\n.ClusterId\022(\n" +
+      "\022masterCoprocessors\030\006 \003(\0132\014.Coprocessor\022" +
+      "\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbackupMa",
+      "sters\030\010 \003(\0132\013.ServerName\022\022\n\nbalancerOn\030\t" +
+      " \001(\010BF\n*org.apache.hadoop.hbase.protobuf" +
+      ".generatedB\023ClusterStatusProtosH\001\240\001\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {

Modified: hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto (original)
+++ hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto Thu Mar 21 16:29:42 2013
@@ -41,6 +41,8 @@ message RegionState {
     CLOSED = 6;        // server closed region and updated meta
     SPLITTING = 7;     // server started split of a region
     SPLIT = 8;         // server completed split of a region
+    FAILED_OPEN = 9;   // failed to open, and won't retry any more
+    FAILED_CLOSE = 10; // failed to close, and won't retry any more
   }
 }
 

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Mar 21 16:29:42 2013
@@ -1529,20 +1529,19 @@ public class AssignmentManager extends Z
   private void unassign(final HRegionInfo region,
       final RegionState state, final int versionOfClosingNode,
       final ServerName dest, final boolean transitionInZK) {
-    // Send CLOSE RPC
     ServerName server = state.getServerName();
-    // ClosedRegionhandler can remove the server from this.regions
-    if (!serverManager.isServerOnline(server)) {
-      if (transitionInZK) {
-        // delete the node. if no node exists need not bother.
-        deleteClosingOrClosedNode(region);
-      }
-      regionOffline(region);
-      return;
-    }
-
     for (int i = 1; i <= this.maximumAttempts; i++) {
+      // ClosedRegionhandler can remove the server from this.regions
+      if (!serverManager.isServerOnline(server)) {
+        if (transitionInZK) {
+          // delete the node. if no node exists need not bother.
+          deleteClosingOrClosedNode(region);
+        }
+        regionOffline(region);
+        return;
+      }
       try {
+        // Send CLOSE RPC
         if (serverManager.sendRegionClose(server, region,
           versionOfClosingNode, dest, transitionInZK)) {
           LOG.debug("Sent CLOSE to " + server + " for region " +
@@ -1557,7 +1556,8 @@ public class AssignmentManager extends Z
         if (t instanceof RemoteException) {
           t = ((RemoteException)t).unwrapRemoteException();
         }
-        if (t instanceof NotServingRegionException) {
+        if (t instanceof NotServingRegionException
+            || t instanceof RegionServerStoppedException) {
           if (transitionInZK) {
             deleteClosingOrClosedNode(region);
           }
@@ -1574,6 +1574,10 @@ public class AssignmentManager extends Z
         // Presume retry or server will expire.
       }
     }
+    // Run out of attempts
+    if (!tomActivated) {
+      regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE);
+    }
   }
 
   /**
@@ -1597,13 +1601,15 @@ public class AssignmentManager extends Z
         }
       case CLOSING:
       case PENDING_CLOSE:
+      case FAILED_CLOSE:
         unassign(region, state, -1, null, false);
+        state = regionStates.getRegionState(region);
+        if (state.isOffline()) break;
+      case FAILED_OPEN:
       case CLOSED:
-        if (!state.isOffline()) {
-          LOG.debug("Forcing OFFLINE; was=" + state);
-          state = regionStates.updateRegionState(
-            region, RegionState.State.OFFLINE);
-        }
+        LOG.debug("Forcing OFFLINE; was=" + state);
+        state = regionStates.updateRegionState(
+          region, RegionState.State.OFFLINE);
       case OFFLINE:
         break;
       default:
@@ -1637,6 +1643,8 @@ public class AssignmentManager extends Z
         LOG.warn("Unable to determine a plan to assign " + region);
         if (tomActivated){
           this.timeoutMonitor.setAllRegionServersOffline(true);
+        } else {
+          regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
         }
         return;
       }
@@ -1662,6 +1670,10 @@ public class AssignmentManager extends Z
         }
       }
       if (setOfflineInZK && versionOfOfflineNode == -1) {
+        LOG.warn("Unable to set offline in ZooKeeper to assign " + region);
+        if (!tomActivated) {
+          regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
+        }
         return;
       }
       if (this.server.isStopped()) {
@@ -1740,6 +1752,9 @@ public class AssignmentManager extends Z
             LOG.warn("Failed to assign "
                 + region.getRegionNameAsString() + " since interrupted", ie);
             Thread.currentThread().interrupt();
+            if (!tomActivated) {
+              regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
+            }
             return;
           }
         } else if (retry) {
@@ -1772,6 +1787,8 @@ public class AssignmentManager extends Z
         if (newPlan == null) {
           if (tomActivated) {
             this.timeoutMonitor.setAllRegionServersOffline(true);
+          } else {
+            regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
           }
           LOG.warn("Unable to find a viable location to assign region " +
               region.getRegionNameAsString());
@@ -1788,6 +1805,10 @@ public class AssignmentManager extends Z
         }
       }
     }
+    // Run out of attempts
+    if (!tomActivated) {
+      regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
+    }
   }
 
   private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
@@ -1914,6 +1935,10 @@ public class AssignmentManager extends Z
     }
 
     if (newPlan) {
+      if (randomPlan.getDestination() == null) {
+        LOG.warn("Can't find a destination for region" + encodedName);
+        return null;
+      }
       LOG.debug("No previous transition plan was found (or we are ignoring " +
         "an existing plan) for " + region.getRegionNameAsString() +
         " so generated a random one; " + randomPlan + "; " +
@@ -2052,10 +2077,18 @@ public class AssignmentManager extends Z
           return;
         }
         state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
-      } else if (force && (state.isPendingClose() || state.isClosing())) {
+      } else if (state.isFailedOpen()) {
+        // The region is not open yet
+        regionOffline(region);
+        return;
+      } else if (force && (state.isPendingClose()
+          || state.isClosing() || state.isFailedClose())) {
         LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
           " which is already " + state.getState()  +
           " but forcing to send a CLOSE RPC again ");
+        if (state.isFailedClose()) {
+          state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
+        }
         state.updateTimestampToNow();
       } else {
         LOG.debug("Attempting to unassign region " +
@@ -2134,15 +2167,20 @@ public class AssignmentManager extends Z
    * @param regionInfo region to wait on assignment for
    * @throws InterruptedException
    */
-  public void waitForAssignment(HRegionInfo regionInfo)
+  public boolean waitForAssignment(HRegionInfo regionInfo)
       throws InterruptedException {
-    while(!this.server.isStopped() &&
-        !regionStates.isRegionAssigned(regionInfo)) {
+    while (!regionStates.isRegionAssigned(regionInfo)) {
+      if (regionStates.isRegionFailedToOpen(regionInfo)
+          || this.server.isStopped()) {
+        return false;
+      }
+
       // We should receive a notification, but it's
       //  better to have a timeout to recheck the condition here:
       //  it lowers the impact of a race condition if any
       regionStates.waitForUpdate(100);
     }
+    return true;
   }
 
   /**
@@ -2690,6 +2728,8 @@ public class AssignmentManager extends Z
 
       case SPLIT:
       case SPLITTING:
+      case FAILED_OPEN:
+      case FAILED_CLOSE:
         break;
 
       default:

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Thu Mar 21 16:29:42 2013
@@ -92,7 +92,6 @@ import org.apache.hadoop.hbase.master.ha
 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
-import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java Thu Mar 21 16:29:42 2013
@@ -133,6 +133,24 @@ public class RegionStates {
   }
 
   /**
+   * @return True if specified region failed to open.
+   */
+  public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
+    RegionState regionState = getRegionTransitionState(hri);
+    State state = regionState != null ? regionState.getState() : null;
+    return state == State.FAILED_OPEN;
+  }
+
+  /**
+   * @return True if specified region failed to close.
+   */
+  public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
+    RegionState regionState = getRegionTransitionState(hri);
+    State state = regionState != null ? regionState.getState() : null;
+    return state == State.FAILED_CLOSE;
+  }
+
+  /**
    * Wait for the state map to be updated by assignment manager.
    */
   public synchronized void waitForUpdate(

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java Thu Mar 21 16:29:42 2013
@@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.master.HM
 import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
 import org.apache.hadoop.hbase.master.MasterFileSystem;
 import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.KeeperException;
@@ -66,17 +67,21 @@ public class DeleteTableHandler extends 
 
     // 1. Wait because of region in transition
     AssignmentManager am = this.masterServices.getAssignmentManager();
+    RegionStates states = am.getRegionStates();
     long waitTime = server.getConfiguration().
       getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
     for (HRegionInfo region : regions) {
       long done = System.currentTimeMillis() + waitTime;
       while (System.currentTimeMillis() < done) {
-        if (!am.getRegionStates().isRegionInTransition(region)) break;
+        if (states.isRegionFailedToOpen(region)) {
+          am.regionOffline(region);
+        }
+        if (!states.isRegionInTransition(region)) break;
         Threads.sleep(waitingTimeForEvents);
         LOG.debug("Waiting on region to clear regions in transition; "
           + am.getRegionStates().getRegionTransitionState(region));
       }
-      if (am.getRegionStates().isRegionInTransition(region)) {
+      if (states.isRegionInTransition(region)) {
         throw new IOException("Waited hbase.master.wait.on.region (" +
           waitTime + "ms) for region to leave region " +
           region.getRegionNameAsString() + " in transitions");

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java Thu Mar 21 16:29:42 2013
@@ -202,11 +202,12 @@ public class DisableTableHandler extends
     protected void populatePool(ExecutorService pool) {
       RegionStates regionStates = assignmentManager.getRegionStates();
       for (HRegionInfo region: regions) {
-        if (regionStates.isRegionInTransition(region)) continue;
+        if (regionStates.isRegionInTransition(region)
+            && !regionStates.isRegionFailedToClose(region)) continue;
         final HRegionInfo hri = region;
         pool.execute(Trace.wrap(new Runnable() {
           public void run() {
-            assignmentManager.unassign(hri);
+            assignmentManager.unassign(hri, true);
           }
         }));
       }

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java Thu Mar 21 16:29:42 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hbase.HRegionIn
 import org.apache.hadoop.hbase.RegionTransition;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.executor.EventType;
+import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
@@ -34,6 +35,22 @@ import org.apache.zookeeper.KeeperExcept
  */
 public class Mocking {
 
+  static void waitForRegionFailedToCloseAndSetToPendingClose(
+      AssignmentManager am, HRegionInfo hri) throws InterruptedException {
+    // Since region server is fake, sendRegionClose will fail, and closing
+    // region will fail. For testing purpose, moving it back to pending close
+    boolean wait = true;
+    while (wait) {
+      RegionState state = am.getRegionStates().getRegionState(hri);
+      if (state != null && state.isFailedClose()){
+        am.getRegionStates().updateRegionState(hri, State.PENDING_CLOSE);
+        wait = false;
+      } else {
+        Thread.sleep(1);
+      }
+    }
+  }
+
   static void waitForRegionPendingOpenInRIT(AssignmentManager am, String encodedName)
     throws InterruptedException {
     // We used to do a check like this:
@@ -53,7 +70,6 @@ public class Mocking {
         Thread.sleep(1);
       }
     }
-
   }
 
   /**

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Thu Mar 21 16:29:42 2013
@@ -201,6 +201,8 @@ public class TestAssignmentManager {
       // let's assume it is going to open on server b:
       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
 
+      Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
+
       // Now fake the region closing successfully over on the regionserver; the
       // regionserver will have set the region in CLOSED state. This will
       // trigger callback into AM. The below zk close call is from the RS close
@@ -249,6 +251,8 @@ public class TestAssignmentManager {
       // let's assume it is going to open on server b:
       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
 
+      Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
+
       // Now fake the region closing successfully over on the regionserver; the
       // regionserver will have set the region in CLOSED state. This will
       // trigger callback into AM. The below zk close call is from the RS close
@@ -298,6 +302,8 @@ public class TestAssignmentManager {
       // let's assume it is going to open on server b:
       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
 
+      Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
+
       // Now fake the region closing successfully over on the regionserver; the
       // regionserver will have set the region in CLOSED state. This will
       // trigger callback into AM. The below zk close call is from the RS close
@@ -341,7 +347,6 @@ public class TestAssignmentManager {
     am.balance(new RegionPlan(hri, from, to));
   }
 
-
   /**
    * Tests AssignmentManager balance function.  Runs a balance moving a region
    * from one server to another mocking regionserver responding over zk.
@@ -375,6 +380,11 @@ public class TestAssignmentManager {
       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
       am.balance(plan);
 
+      // Must be failed to close since the server is fake
+      assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO));
+      // Move it back to pending_close
+      am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
+
       // Now fake the region closing successfully over on the regionserver; the
       // regionserver will have set the region in CLOSED state.  This will
       // trigger callback into AM. The below zk close call is from the RS close

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Thu Mar 21 16:29:42 2013
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -34,6 +36,12 @@ import org.apache.hadoop.hbase.ServerNam
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.AfterClass;
@@ -53,6 +61,12 @@ public class TestAssignmentManagerOnClus
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
+    // Using the test load balancer to control region plans
+    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
+      TestLoadBalancer.class, LoadBalancer.class);
+    conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
+      TestRegionObserver.class, RegionObserver.class);
+
     TEST_UTIL.startMiniCluster(3);
     admin = TEST_UTIL.getHBaseAdmin();
   }
@@ -191,4 +205,110 @@ public class TestAssignmentManagerOnClus
     }
   }
 
+  /**
+   * This tests region close failed
+   */
+  @Test
+  public void testCloseFailed() throws Exception {
+    String table = "testCloseFailed";
+    try {
+      HTableDescriptor desc = new HTableDescriptor(table);
+      desc.addFamily(new HColumnDescriptor(FAMILY));
+      admin.createTable(desc);
+
+      HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+      HRegionInfo hri = new HRegionInfo(
+        desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
+      MetaEditor.addRegionToMeta(meta, hri);
+
+      HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+      master.assignRegion(hri);
+      AssignmentManager am = master.getAssignmentManager();
+      assertTrue(am.waitForAssignment(hri));
+
+      TestRegionObserver.enabled = true;
+      am.unassign(hri);
+      RegionState state = am.getRegionStates().getRegionState(hri);
+      assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
+
+      TestRegionObserver.enabled = false;
+      am.unassign(hri, true);
+      state = am.getRegionStates().getRegionState(hri);
+      assertTrue(RegionState.State.FAILED_CLOSE != state.getState());
+
+      am.assign(hri, true, true);
+      assertTrue(am.waitForAssignment(hri));
+
+      ServerName serverName = master.getAssignmentManager().
+        getRegionStates().getRegionServerOfRegion(hri);
+      TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
+    } finally {
+      TestRegionObserver.enabled = false;
+      TEST_UTIL.deleteTable(Bytes.toBytes(table));
+    }
+  }
+
+  /**
+   * This tests region open failed
+   */
+  @Test
+  public void testOpenFailed() throws Exception {
+    String table = "testOpenFailed";
+    try {
+      HTableDescriptor desc = new HTableDescriptor(table);
+      desc.addFamily(new HColumnDescriptor(FAMILY));
+      admin.createTable(desc);
+
+      HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+      HRegionInfo hri = new HRegionInfo(
+        desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
+      MetaEditor.addRegionToMeta(meta, hri);
+
+      TestLoadBalancer.controledRegion = hri.getEncodedName();
+
+      HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+      master.assignRegion(hri);
+      AssignmentManager am = master.getAssignmentManager();
+      assertFalse(am.waitForAssignment(hri));
+
+      RegionState state = am.getRegionStates().getRegionState(hri);
+      assertEquals(RegionState.State.FAILED_OPEN, state.getState());
+
+      TestLoadBalancer.controledRegion = null;
+      master.assignRegion(hri);
+      assertTrue(am.waitForAssignment(hri));
+
+      ServerName serverName = master.getAssignmentManager().
+        getRegionStates().getRegionServerOfRegion(hri);
+      TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
+    } finally {
+      TestLoadBalancer.controledRegion = null;
+      TEST_UTIL.deleteTable(Bytes.toBytes(table));
+    }
+  }
+
+  static class TestLoadBalancer extends StochasticLoadBalancer {
+    // For this region, if specified, always assign to nowhere
+    static volatile String controledRegion = null;
+
+    @Override
+    public ServerName randomAssignment(HRegionInfo regionInfo,
+        List<ServerName> servers) {
+      if (regionInfo.getEncodedName().equals(controledRegion)) {
+        return null;
+      }
+      return super.randomAssignment(regionInfo, servers);
+    }
+  }
+
+  public static class TestRegionObserver extends BaseRegionObserver {
+    // If enabled, fail all preClose calls
+    static volatile boolean enabled = false;
+
+    @Override
+    public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
+        boolean abortRequested) throws IOException {
+      if (enabled) throw new IOException("fail preClose from coprocessor");
+    }
+  }
 }