You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/03/21 17:29:43 UTC
svn commit: r1459384 - in /hbase/trunk:
hbase-client/src/main/java/org/apache/hadoop/hbase/master/
hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/
hbase-protocol/src/main/protobuf/
hbase-server/src/main/java/org/apache/hadoop/h...
Author: jxiang
Date: Thu Mar 21 16:29:42 2013
New Revision: 1459384
URL: http://svn.apache.org/r1459384
Log:
HBASE-8137 Add failed to open/close region state
Modified:
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java Thu Mar 21 16:29:42 2013
@@ -44,7 +44,9 @@ public class RegionState implements org.
CLOSING, // server has begun to close but not yet done
CLOSED, // server closed region and updated meta
SPLITTING, // server started split of a region
- SPLIT // server completed split of a region
+ SPLIT, // server completed split of a region
+ FAILED_OPEN, // failed to open, and won't retry any more
+ FAILED_CLOSE // failed to close, and won't retry any more
}
// Many threads can update the state at the stamp at the same time
@@ -126,6 +128,14 @@ public class RegionState implements org.
return state == State.SPLIT;
}
+ public boolean isFailedOpen() {
+ return state == State.FAILED_OPEN;
+ }
+
+ public boolean isFailedClose() {
+ return state == State.FAILED_CLOSE;
+ }
+
public boolean isPendingOpenOrOpeningOnServer(final ServerName sn) {
return isOnServer(sn) && (isPendingOpen() || isOpening());
}
@@ -195,6 +205,12 @@ public class RegionState implements org.
case SPLIT:
rs = ClusterStatusProtos.RegionState.State.SPLIT;
break;
+ case FAILED_OPEN:
+ rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN;
+ break;
+ case FAILED_CLOSE:
+ rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE;
+ break;
default:
throw new IllegalStateException("");
}
@@ -239,6 +255,12 @@ public class RegionState implements org.
case SPLIT:
state = State.SPLIT;
break;
+ case FAILED_OPEN:
+ state = State.FAILED_OPEN;
+ break;
+ case FAILED_CLOSE:
+ state = State.FAILED_CLOSE;
+ break;
default:
throw new IllegalStateException("");
}
Modified: hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java (original)
+++ hbase/trunk/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClusterStatusProtos.java Thu Mar 21 16:29:42 2013
@@ -63,6 +63,8 @@ public final class ClusterStatusProtos {
CLOSED(6, 6),
SPLITTING(7, 7),
SPLIT(8, 8),
+ FAILED_OPEN(9, 9),
+ FAILED_CLOSE(10, 10),
;
public static final int OFFLINE_VALUE = 0;
@@ -74,6 +76,8 @@ public final class ClusterStatusProtos {
public static final int CLOSED_VALUE = 6;
public static final int SPLITTING_VALUE = 7;
public static final int SPLIT_VALUE = 8;
+ public static final int FAILED_OPEN_VALUE = 9;
+ public static final int FAILED_CLOSE_VALUE = 10;
public final int getNumber() { return value; }
@@ -89,6 +93,8 @@ public final class ClusterStatusProtos {
case 6: return CLOSED;
case 7: return SPLITTING;
case 8: return SPLIT;
+ case 9: return FAILED_OPEN;
+ case 10: return FAILED_CLOSE;
default: return null;
}
}
@@ -119,7 +125,7 @@ public final class ClusterStatusProtos {
}
private static final State[] VALUES = {
- OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT,
+ OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT, FAILED_OPEN, FAILED_CLOSE,
};
public static State valueOf(
@@ -4350,28 +4356,28 @@ public final class ClusterStatusProtos {
static {
java.lang.String[] descriptorData = {
"\n\023ClusterStatus.proto\032\013hbase.proto\032\017Clus" +
- "terId.proto\032\010FS.proto\"\346\001\n\013RegionState\022\037\n" +
+ "terId.proto\032\010FS.proto\"\211\002\n\013RegionState\022\037\n" +
"\nregionInfo\030\001 \002(\0132\013.RegionInfo\022!\n\005state\030" +
"\002 \002(\0162\022.RegionState.State\022\r\n\005stamp\030\003 \001(\004" +
- "\"\203\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
+ "\"\246\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
"\001\022\013\n\007OPENING\020\002\022\010\n\004OPEN\020\003\022\021\n\rPENDING_CLOS" +
"E\020\004\022\013\n\007CLOSING\020\005\022\n\n\006CLOSED\020\006\022\r\n\tSPLITTIN" +
- "G\020\007\022\t\n\005SPLIT\020\010\"W\n\022RegionInTransition\022\036\n\004" +
- "spec\030\001 \002(\0132\020.RegionSpecifier\022!\n\013regionSt" +
- "ate\030\002 \002(\0132\014.RegionState\"N\n\016LiveServerInf",
- "o\022\033\n\006server\030\001 \002(\0132\013.ServerName\022\037\n\nserver" +
- "Load\030\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatu" +
- "s\022.\n\014hbaseVersion\030\001 \001(\0132\030.HBaseVersionFi" +
- "leContent\022$\n\013liveServers\030\002 \003(\0132\017.LiveSer" +
- "verInfo\022 \n\013deadServers\030\003 \003(\0132\013.ServerNam" +
- "e\0220\n\023regionsInTransition\030\004 \003(\0132\023.RegionI" +
- "nTransition\022\035\n\tclusterId\030\005 \001(\0132\n.Cluster" +
- "Id\022(\n\022masterCoprocessors\030\006 \003(\0132\014.Coproce" +
- "ssor\022\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbac" +
- "kupMasters\030\010 \003(\0132\013.ServerName\022\022\n\nbalance",
- "rOn\030\t \001(\010BF\n*org.apache.hadoop.hbase.pro" +
- "tobuf.generatedB\023ClusterStatusProtosH\001\240\001" +
- "\001"
+ "G\020\007\022\t\n\005SPLIT\020\010\022\017\n\013FAILED_OPEN\020\t\022\020\n\014FAILE" +
+ "D_CLOSE\020\n\"W\n\022RegionInTransition\022\036\n\004spec\030" +
+ "\001 \002(\0132\020.RegionSpecifier\022!\n\013regionState\030\002",
+ " \002(\0132\014.RegionState\"N\n\016LiveServerInfo\022\033\n\006" +
+ "server\030\001 \002(\0132\013.ServerName\022\037\n\nserverLoad\030" +
+ "\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatus\022.\n\014" +
+ "hbaseVersion\030\001 \001(\0132\030.HBaseVersionFileCon" +
+ "tent\022$\n\013liveServers\030\002 \003(\0132\017.LiveServerIn" +
+ "fo\022 \n\013deadServers\030\003 \003(\0132\013.ServerName\0220\n\023" +
+ "regionsInTransition\030\004 \003(\0132\023.RegionInTran" +
+ "sition\022\035\n\tclusterId\030\005 \001(\0132\n.ClusterId\022(\n" +
+ "\022masterCoprocessors\030\006 \003(\0132\014.Coprocessor\022" +
+ "\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbackupMa",
+ "sters\030\010 \003(\0132\013.ServerName\022\022\n\nbalancerOn\030\t" +
+ " \001(\010BF\n*org.apache.hadoop.hbase.protobuf" +
+ ".generatedB\023ClusterStatusProtosH\001\240\001\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
Modified: hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto (original)
+++ hbase/trunk/hbase-protocol/src/main/protobuf/ClusterStatus.proto Thu Mar 21 16:29:42 2013
@@ -41,6 +41,8 @@ message RegionState {
CLOSED = 6; // server closed region and updated meta
SPLITTING = 7; // server started split of a region
SPLIT = 8; // server completed split of a region
+ FAILED_OPEN = 9; // failed to open, and won't retry any more
+ FAILED_CLOSE = 10; // failed to close, and won't retry any more
}
}
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Mar 21 16:29:42 2013
@@ -1529,20 +1529,19 @@ public class AssignmentManager extends Z
private void unassign(final HRegionInfo region,
final RegionState state, final int versionOfClosingNode,
final ServerName dest, final boolean transitionInZK) {
- // Send CLOSE RPC
ServerName server = state.getServerName();
- // ClosedRegionhandler can remove the server from this.regions
- if (!serverManager.isServerOnline(server)) {
- if (transitionInZK) {
- // delete the node. if no node exists need not bother.
- deleteClosingOrClosedNode(region);
- }
- regionOffline(region);
- return;
- }
-
for (int i = 1; i <= this.maximumAttempts; i++) {
+ // ClosedRegionhandler can remove the server from this.regions
+ if (!serverManager.isServerOnline(server)) {
+ if (transitionInZK) {
+ // delete the node. if no node exists need not bother.
+ deleteClosingOrClosedNode(region);
+ }
+ regionOffline(region);
+ return;
+ }
try {
+ // Send CLOSE RPC
if (serverManager.sendRegionClose(server, region,
versionOfClosingNode, dest, transitionInZK)) {
LOG.debug("Sent CLOSE to " + server + " for region " +
@@ -1557,7 +1556,8 @@ public class AssignmentManager extends Z
if (t instanceof RemoteException) {
t = ((RemoteException)t).unwrapRemoteException();
}
- if (t instanceof NotServingRegionException) {
+ if (t instanceof NotServingRegionException
+ || t instanceof RegionServerStoppedException) {
if (transitionInZK) {
deleteClosingOrClosedNode(region);
}
@@ -1574,6 +1574,10 @@ public class AssignmentManager extends Z
// Presume retry or server will expire.
}
}
+ // Run out of attempts
+ if (!tomActivated) {
+ regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE);
+ }
}
/**
@@ -1597,13 +1601,15 @@ public class AssignmentManager extends Z
}
case CLOSING:
case PENDING_CLOSE:
+ case FAILED_CLOSE:
unassign(region, state, -1, null, false);
+ state = regionStates.getRegionState(region);
+ if (state.isOffline()) break;
+ case FAILED_OPEN:
case CLOSED:
- if (!state.isOffline()) {
- LOG.debug("Forcing OFFLINE; was=" + state);
- state = regionStates.updateRegionState(
- region, RegionState.State.OFFLINE);
- }
+ LOG.debug("Forcing OFFLINE; was=" + state);
+ state = regionStates.updateRegionState(
+ region, RegionState.State.OFFLINE);
case OFFLINE:
break;
default:
@@ -1637,6 +1643,8 @@ public class AssignmentManager extends Z
LOG.warn("Unable to determine a plan to assign " + region);
if (tomActivated){
this.timeoutMonitor.setAllRegionServersOffline(true);
+ } else {
+ regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
}
return;
}
@@ -1662,6 +1670,10 @@ public class AssignmentManager extends Z
}
}
if (setOfflineInZK && versionOfOfflineNode == -1) {
+ LOG.warn("Unable to set offline in ZooKeeper to assign " + region);
+ if (!tomActivated) {
+ regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
+ }
return;
}
if (this.server.isStopped()) {
@@ -1740,6 +1752,9 @@ public class AssignmentManager extends Z
LOG.warn("Failed to assign "
+ region.getRegionNameAsString() + " since interrupted", ie);
Thread.currentThread().interrupt();
+ if (!tomActivated) {
+ regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
+ }
return;
}
} else if (retry) {
@@ -1772,6 +1787,8 @@ public class AssignmentManager extends Z
if (newPlan == null) {
if (tomActivated) {
this.timeoutMonitor.setAllRegionServersOffline(true);
+ } else {
+ regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
}
LOG.warn("Unable to find a viable location to assign region " +
region.getRegionNameAsString());
@@ -1788,6 +1805,10 @@ public class AssignmentManager extends Z
}
}
}
+ // Run out of attempts
+ if (!tomActivated) {
+ regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
+ }
}
private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
@@ -1914,6 +1935,10 @@ public class AssignmentManager extends Z
}
if (newPlan) {
+ if (randomPlan.getDestination() == null) {
+ LOG.warn("Can't find a destination for region" + encodedName);
+ return null;
+ }
LOG.debug("No previous transition plan was found (or we are ignoring " +
"an existing plan) for " + region.getRegionNameAsString() +
" so generated a random one; " + randomPlan + "; " +
@@ -2052,10 +2077,18 @@ public class AssignmentManager extends Z
return;
}
state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
- } else if (force && (state.isPendingClose() || state.isClosing())) {
+ } else if (state.isFailedOpen()) {
+ // The region is not open yet
+ regionOffline(region);
+ return;
+ } else if (force && (state.isPendingClose()
+ || state.isClosing() || state.isFailedClose())) {
LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
" which is already " + state.getState() +
" but forcing to send a CLOSE RPC again ");
+ if (state.isFailedClose()) {
+ state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
+ }
state.updateTimestampToNow();
} else {
LOG.debug("Attempting to unassign region " +
@@ -2134,15 +2167,20 @@ public class AssignmentManager extends Z
* @param regionInfo region to wait on assignment for
* @throws InterruptedException
*/
- public void waitForAssignment(HRegionInfo regionInfo)
+ public boolean waitForAssignment(HRegionInfo regionInfo)
throws InterruptedException {
- while(!this.server.isStopped() &&
- !regionStates.isRegionAssigned(regionInfo)) {
+ while (!regionStates.isRegionAssigned(regionInfo)) {
+ if (regionStates.isRegionFailedToOpen(regionInfo)
+ || this.server.isStopped()) {
+ return false;
+ }
+
// We should receive a notification, but it's
// better to have a timeout to recheck the condition here:
// it lowers the impact of a race condition if any
regionStates.waitForUpdate(100);
}
+ return true;
}
/**
@@ -2690,6 +2728,8 @@ public class AssignmentManager extends Z
case SPLIT:
case SPLITTING:
+ case FAILED_OPEN:
+ case FAILED_CLOSE:
break;
default:
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Thu Mar 21 16:29:42 2013
@@ -92,7 +92,6 @@ import org.apache.hadoop.hbase.master.ha
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
-import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java Thu Mar 21 16:29:42 2013
@@ -133,6 +133,24 @@ public class RegionStates {
}
/**
+ * @return True if specified region failed to open.
+ */
+ public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
+ RegionState regionState = getRegionTransitionState(hri);
+ State state = regionState != null ? regionState.getState() : null;
+ return state == State.FAILED_OPEN;
+ }
+
+ /**
+ * @return True if specified region failed to close.
+ */
+ public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
+ RegionState regionState = getRegionTransitionState(hri);
+ State state = regionState != null ? regionState.getState() : null;
+ return state == State.FAILED_CLOSE;
+ }
+
+ /**
* Wait for the state map to be updated by assignment manager.
*/
public synchronized void waitForUpdate(
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java Thu Mar 21 16:29:42 2013
@@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.master.HM
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException;
@@ -66,17 +67,21 @@ public class DeleteTableHandler extends
// 1. Wait because of region in transition
AssignmentManager am = this.masterServices.getAssignmentManager();
+ RegionStates states = am.getRegionStates();
long waitTime = server.getConfiguration().
getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
for (HRegionInfo region : regions) {
long done = System.currentTimeMillis() + waitTime;
while (System.currentTimeMillis() < done) {
- if (!am.getRegionStates().isRegionInTransition(region)) break;
+ if (states.isRegionFailedToOpen(region)) {
+ am.regionOffline(region);
+ }
+ if (!states.isRegionInTransition(region)) break;
Threads.sleep(waitingTimeForEvents);
LOG.debug("Waiting on region to clear regions in transition; "
+ am.getRegionStates().getRegionTransitionState(region));
}
- if (am.getRegionStates().isRegionInTransition(region)) {
+ if (states.isRegionInTransition(region)) {
throw new IOException("Waited hbase.master.wait.on.region (" +
waitTime + "ms) for region to leave region " +
region.getRegionNameAsString() + " in transitions");
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java Thu Mar 21 16:29:42 2013
@@ -202,11 +202,12 @@ public class DisableTableHandler extends
protected void populatePool(ExecutorService pool) {
RegionStates regionStates = assignmentManager.getRegionStates();
for (HRegionInfo region: regions) {
- if (regionStates.isRegionInTransition(region)) continue;
+ if (regionStates.isRegionInTransition(region)
+ && !regionStates.isRegionFailedToClose(region)) continue;
final HRegionInfo hri = region;
pool.execute(Trace.wrap(new Runnable() {
public void run() {
- assignmentManager.unassign(hri);
+ assignmentManager.unassign(hri, true);
}
}));
}
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java Thu Mar 21 16:29:42 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hbase.HRegionIn
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.executor.EventType;
+import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
@@ -34,6 +35,22 @@ import org.apache.zookeeper.KeeperExcept
*/
public class Mocking {
+ static void waitForRegionFailedToCloseAndSetToPendingClose(
+ AssignmentManager am, HRegionInfo hri) throws InterruptedException {
+ // Since region server is fake, sendRegionClose will fail, and closing
+ // region will fail. For testing purpose, moving it back to pending close
+ boolean wait = true;
+ while (wait) {
+ RegionState state = am.getRegionStates().getRegionState(hri);
+ if (state != null && state.isFailedClose()){
+ am.getRegionStates().updateRegionState(hri, State.PENDING_CLOSE);
+ wait = false;
+ } else {
+ Thread.sleep(1);
+ }
+ }
+ }
+
static void waitForRegionPendingOpenInRIT(AssignmentManager am, String encodedName)
throws InterruptedException {
// We used to do a check like this:
@@ -53,7 +70,6 @@ public class Mocking {
Thread.sleep(1);
}
}
-
}
/**
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Thu Mar 21 16:29:42 2013
@@ -201,6 +201,8 @@ public class TestAssignmentManager {
// let's assume it is going to open on server b:
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
+ Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
+
// Now fake the region closing successfully over on the regionserver; the
// regionserver will have set the region in CLOSED state. This will
// trigger callback into AM. The below zk close call is from the RS close
@@ -249,6 +251,8 @@ public class TestAssignmentManager {
// let's assume it is going to open on server b:
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
+ Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
+
// Now fake the region closing successfully over on the regionserver; the
// regionserver will have set the region in CLOSED state. This will
// trigger callback into AM. The below zk close call is from the RS close
@@ -298,6 +302,8 @@ public class TestAssignmentManager {
// let's assume it is going to open on server b:
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
+ Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
+
// Now fake the region closing successfully over on the regionserver; the
// regionserver will have set the region in CLOSED state. This will
// trigger callback into AM. The below zk close call is from the RS close
@@ -341,7 +347,6 @@ public class TestAssignmentManager {
am.balance(new RegionPlan(hri, from, to));
}
-
/**
* Tests AssignmentManager balance function. Runs a balance moving a region
* from one server to another mocking regionserver responding over zk.
@@ -375,6 +380,11 @@ public class TestAssignmentManager {
RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
am.balance(plan);
+ // Must be failed to close since the server is fake
+ assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO));
+ // Move it back to pending_close
+ am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
+
// Now fake the region closing successfully over on the regionserver; the
// regionserver will have set the region in CLOSED state. This will
// trigger callback into AM. The below zk close call is from the RS close
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1459384&r1=1459383&r2=1459384&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Thu Mar 21 16:29:42 2013
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hbase.master;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -34,6 +36,12 @@ import org.apache.hadoop.hbase.ServerNam
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
@@ -53,6 +61,12 @@ public class TestAssignmentManagerOnClus
@BeforeClass
public static void setUpBeforeClass() throws Exception {
+ // Using the test load balancer to control region plans
+ conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
+ TestLoadBalancer.class, LoadBalancer.class);
+ conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
+ TestRegionObserver.class, RegionObserver.class);
+
TEST_UTIL.startMiniCluster(3);
admin = TEST_UTIL.getHBaseAdmin();
}
@@ -191,4 +205,110 @@ public class TestAssignmentManagerOnClus
}
}
+ /**
+ * This tests region close failed
+ */
+ @Test
+ public void testCloseFailed() throws Exception {
+ String table = "testCloseFailed";
+ try {
+ HTableDescriptor desc = new HTableDescriptor(table);
+ desc.addFamily(new HColumnDescriptor(FAMILY));
+ admin.createTable(desc);
+
+ HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+ HRegionInfo hri = new HRegionInfo(
+ desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
+ MetaEditor.addRegionToMeta(meta, hri);
+
+ HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+ master.assignRegion(hri);
+ AssignmentManager am = master.getAssignmentManager();
+ assertTrue(am.waitForAssignment(hri));
+
+ TestRegionObserver.enabled = true;
+ am.unassign(hri);
+ RegionState state = am.getRegionStates().getRegionState(hri);
+ assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
+
+ TestRegionObserver.enabled = false;
+ am.unassign(hri, true);
+ state = am.getRegionStates().getRegionState(hri);
+ assertTrue(RegionState.State.FAILED_CLOSE != state.getState());
+
+ am.assign(hri, true, true);
+ assertTrue(am.waitForAssignment(hri));
+
+ ServerName serverName = master.getAssignmentManager().
+ getRegionStates().getRegionServerOfRegion(hri);
+ TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
+ } finally {
+ TestRegionObserver.enabled = false;
+ TEST_UTIL.deleteTable(Bytes.toBytes(table));
+ }
+ }
+
+ /**
+ * This tests region open failed
+ */
+ @Test
+ public void testOpenFailed() throws Exception {
+ String table = "testOpenFailed";
+ try {
+ HTableDescriptor desc = new HTableDescriptor(table);
+ desc.addFamily(new HColumnDescriptor(FAMILY));
+ admin.createTable(desc);
+
+ HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+ HRegionInfo hri = new HRegionInfo(
+ desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
+ MetaEditor.addRegionToMeta(meta, hri);
+
+ TestLoadBalancer.controledRegion = hri.getEncodedName();
+
+ HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+ master.assignRegion(hri);
+ AssignmentManager am = master.getAssignmentManager();
+ assertFalse(am.waitForAssignment(hri));
+
+ RegionState state = am.getRegionStates().getRegionState(hri);
+ assertEquals(RegionState.State.FAILED_OPEN, state.getState());
+
+ TestLoadBalancer.controledRegion = null;
+ master.assignRegion(hri);
+ assertTrue(am.waitForAssignment(hri));
+
+ ServerName serverName = master.getAssignmentManager().
+ getRegionStates().getRegionServerOfRegion(hri);
+ TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
+ } finally {
+ TestLoadBalancer.controledRegion = null;
+ TEST_UTIL.deleteTable(Bytes.toBytes(table));
+ }
+ }
+
+ static class TestLoadBalancer extends StochasticLoadBalancer {
+ // For this region, if specified, always assign to nowhere
+ static volatile String controledRegion = null;
+
+ @Override
+ public ServerName randomAssignment(HRegionInfo regionInfo,
+ List<ServerName> servers) {
+ if (regionInfo.getEncodedName().equals(controledRegion)) {
+ return null;
+ }
+ return super.randomAssignment(regionInfo, servers);
+ }
+ }
+
+ public static class TestRegionObserver extends BaseRegionObserver {
+ // If enabled, fail all preClose calls
+ static volatile boolean enabled = false;
+
+ @Override
+ public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
+ boolean abortRequested) throws IOException {
+ if (enabled) throw new IOException("fail preClose from coprocessor");
+ }
+ }
}