You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2012/10/01 19:45:05 UTC
svn commit: r1392467 -
/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Author: jxiang
Date: Mon Oct 1 17:45:04 2012
New Revision: 1392467
URL: http://svn.apache.org/viewvc?rev=1392467&view=rev
Log:
HBASE-6881 All regionservers are marked offline even there is still one up
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1392467&r1=1392466&r2=1392467&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Mon Oct 1 17:45:04 2012
@@ -1399,13 +1399,15 @@ public class AssignmentManager extends Z
final boolean setOfflineInZK, final boolean forceNewPlan,
boolean hijack) {
boolean regionAlreadyInTransitionException = false;
+ boolean serverNotRunningYet = false;
+ RegionState currentState = state;
+ long maxRegionServerStartupWaitTime = -1;
for (int i = 0; i < this.maximumAssignmentAttempts; i++) {
int versionOfOfflineNode = -1;
if (setOfflineInZK) {
// get the version of the znode after setting it to OFFLINE.
// versionOfOfflineNode will be -1 if the znode was not set to OFFLINE
- versionOfOfflineNode = setOfflineInZooKeeper(state, hijack,
- regionAlreadyInTransitionException);
+ versionOfOfflineNode = setOfflineInZooKeeper(currentState, hijack);
if (versionOfOfflineNode != -1) {
if (isDisabledorDisablingRegionInRIT(region)) {
return;
@@ -1430,7 +1432,8 @@ public class AssignmentManager extends Z
LOG.debug("Server stopped; skipping assign of " + state);
return;
}
- RegionPlan plan = getRegionPlan(state, !regionAlreadyInTransitionException && forceNewPlan);
+ RegionPlan plan = getRegionPlan(state,
+ !regionAlreadyInTransitionException && !serverNotRunningYet && forceNewPlan);
if (plan == null) {
LOG.debug("Unable to determine a plan to assign " + state);
this.timeoutMonitor.setAllRegionServersOffline(true);
@@ -1440,7 +1443,7 @@ public class AssignmentManager extends Z
LOG.info("Assigning region " + state.getRegion().getRegionNameAsString() +
" to " + plan.getDestination().toString());
// Transition RegionState to PENDING_OPEN
- regionStates.updateRegionState(state.getRegion(),
+ currentState = regionStates.updateRegionState(state.getRegion(),
RegionState.State.PENDING_OPEN, System.currentTimeMillis(),
plan.getDestination());
// Send OPEN RPC. This can fail if the server on other end is is not up.
@@ -1457,34 +1460,64 @@ public class AssignmentManager extends Z
} catch (Throwable t) {
if (t instanceof RemoteException) {
t = ((RemoteException) t).unwrapRemoteException();
- if (t instanceof RegionAlreadyInTransitionException) {
- regionAlreadyInTransitionException = true;
- if (LOG.isDebugEnabled()) {
- LOG.debug("Failed assignment in: " + plan.getDestination() + " due to "
- + t.getMessage());
+ }
+ regionAlreadyInTransitionException = false;
+ serverNotRunningYet = false;
+ if (t instanceof RegionAlreadyInTransitionException) {
+ regionAlreadyInTransitionException = true;
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed assignment in: " + plan.getDestination() + " due to "
+ + t.getMessage());
+ }
+ } else if (t instanceof ServerNotRunningYetException) {
+ if (maxRegionServerStartupWaitTime < 0) {
+ maxRegionServerStartupWaitTime = System.currentTimeMillis() +
+ this.server.getConfiguration().
+ getLong("hbase.regionserver.rpc.startup.waittime", 60000);
+ }
+ try {
+ long now = System.currentTimeMillis();
+ if (now < maxRegionServerStartupWaitTime) {
+ LOG.debug("Server is not yet up; waiting up to " +
+ (maxRegionServerStartupWaitTime - now) + "ms", t);
+ serverNotRunningYet = true;
+ Thread.sleep(100);
+ i--; // reset the try count
+ } else {
+ LOG.debug("Server is not up for a while; try a new one", t);
}
+ } catch (InterruptedException ie) {
+ LOG.warn("Failed to assign "
+ + state.getRegion().getRegionNameAsString() + " since interrupted", ie);
+ Thread.currentThread().interrupt();
+ return;
}
}
LOG.warn("Failed assignment of "
- + state.getRegion().getRegionNameAsString()
- + " to "
- + plan.getDestination()
- + ", trying to assign "
- + (regionAlreadyInTransitionException ? "to the same region server"
- + " because of RegionAlreadyInTransitionException;" : "elsewhere instead; ")
- + "retry=" + i, t);
+ + state.getRegion().getRegionNameAsString()
+ + " to "
+ + plan.getDestination()
+ + ", trying to assign "
+ + (regionAlreadyInTransitionException || serverNotRunningYet
+ ? "to the same region server because of "
+ + "RegionAlreadyInTransitionException/ServerNotRunningYetException;"
+ : "elsewhere instead; ")
+ + "retry=" + i, t);
// Clean out plan we failed execute and one that doesn't look like it'll
// succeed anyways; we need a new plan!
// Transition back to OFFLINE
- regionStates.updateRegionState(
+ currentState = regionStates.updateRegionState(
state.getRegion(), RegionState.State.OFFLINE);
// If region opened on destination of present plan, reassigning to new
// RS may cause double assignments. In case of RegionAlreadyInTransitionException
// reassigning to same RS.
RegionPlan newPlan = plan;
- if (!regionAlreadyInTransitionException) {
+ if (!regionAlreadyInTransitionException && !serverNotRunningYet) {
// Force a new plan and reassign. Will return null if no servers.
- newPlan = getRegionPlan(state, plan.getDestination(), true);
+ // The new plan could be the same as the existing plan since we don't
+ // exclude the server of the original plan, which should not be
+ // excluded since it could be the only server up now.
+ newPlan = getRegionPlan(state, true);
}
if (newPlan == null) {
this.timeoutMonitor.setAllRegionServersOffline(true);
@@ -1537,24 +1570,16 @@ public class AssignmentManager extends Z
* @param state
* @param hijack
* - true if needs to be hijacked and reassigned, false otherwise.
- * @param regionAlreadyInTransitionException
- * - true if we need to retry assignment because of RegionAlreadyInTransitionException.
* @return the version of the offline node if setting of the OFFLINE node was
* successful, -1 otherwise.
*/
- int setOfflineInZooKeeper(final RegionState state, boolean hijack,
- boolean regionAlreadyInTransitionException) {
+ int setOfflineInZooKeeper(final RegionState state, boolean hijack) {
// In case of reassignment the current state in memory need not be
// OFFLINE.
if (!hijack && !state.isClosed() && !state.isOffline()) {
- if (!regionAlreadyInTransitionException ) {
- String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
- this.server.abort(msg, new IllegalStateException(msg));
- return -1;
- } else {
- LOG.debug("Unexpected state : " + state
- + " but retrying to assign because RegionAlreadyInTransitionException.");
- }
+ String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE.";
+ this.server.abort(msg, new IllegalStateException(msg));
+ return -1;
}
boolean allowZNodeCreation = false;
// Under reassignment if the current state is PENDING_OPEN