You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/03/22 20:36:21 UTC
svn commit: r1084316 - in /hbase/branches/0.90: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Author: stack
Date: Tue Mar 22 19:36:21 2011
New Revision: 1084316
URL: http://svn.apache.org/viewvc?rev=1084316&view=rev
Log:
HBASE-3687 Bulk assign on startup should handle a ServerNotRunningException
Modified:
hbase/branches/0.90/CHANGES.txt
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1084316&r1=1084315&r2=1084316&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Tue Mar 22 19:36:21 2011
@@ -47,6 +47,7 @@ Release 0.90.2 - Unreleased
HBASE-3621 The timeout handler in AssignmentManager does an RPC while
holding lock on RIT; a big no-no (Ted Yu via Stack)
HBASE-3575 Update rename table script
+ HBASE-3687 Bulk assign on startup should handle a ServerNotRunningException
IMPROVEMENTS
HBASE-3542 MultiGet methods in Thrift
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1084316&r1=1084315&r2=1084316&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Tue Mar 22 19:36:21 2011
@@ -773,9 +773,27 @@ public class AssignmentManager extends Z
// Move on to open regions.
try {
// Send OPEN RPC. This can fail if the server on other end is is not up.
- this.serverManager.sendRegionOpen(destination, regions);
+ // If we fail, fail the startup by aborting the server. There is one
+ // exception we will tolerate: ServerNotRunningException. This is thrown
+ // between report of regionserver being up and
+ long maxWaitTime = System.currentTimeMillis() +
+ this.master.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime", 60000);
+ while (!this.master.isStopped()) {
+ try {
+ this.serverManager.sendRegionOpen(destination, regions);
+ } catch (org.apache.hadoop.hbase.ipc.ServerNotRunningException e) {
+ // This is the one exception to retry. For all else we should just fail
+ // the startup.
+ long now = System.currentTimeMillis();
+ if (now > maxWaitTime) throw e;
+ LOG.debug("Server is not yet up; waiting up to " +
+ (maxWaitTime - now) + "ms", e);
+ Thread.sleep(1000);
+ }
+ }
} catch (Throwable t) {
- this.master.abort("Failed assignment of regions to " + destination, t);
+ this.master.abort("Failed assignment of regions to " + destination +
+ "; bulk assign FAILED", t);
return;
}
LOG.debug("Bulk assigning done for " + destination.getServerName());