You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/12/31 16:44:53 UTC

svn commit: r1226112 - in /hbase/branches/0.92: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Author: tedyu
Date: Sat Dec 31 15:44:53 2011
New Revision: 1226112

URL: http://svn.apache.org/viewvc?rev=1226112&view=rev
Log:
HBASE-4397  -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs
               are shutdown at the same time (Ming Ma)

Modified:
    hbase/branches/0.92/CHANGES.txt
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java

Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1226112&r1=1226111&r2=1226112&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Sat Dec 31 15:44:53 2011
@@ -532,6 +532,8 @@ Release 0.92.0 - Unreleased
    HBASE-5113  TestDrainingServer expects round robin region assignment but misses a
                config parameter
    HBASE-5105  TestImportTsv failed with hadoop 0.22 (Ming Ma)
+   HBASE-4397  -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs
+               are shutdown at the same time (Ming Ma)
 
   IMPROVEMENTS
    HBASE-3290  Max Compaction Size (Nicolas Spiegelberg via Stack)  

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1226112&r1=1226111&r2=1226112&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Sat Dec 31 15:44:53 2011
@@ -193,7 +193,7 @@ public class AssignmentManager extends Z
     Configuration conf = master.getConfiguration();
     this.timeoutMonitor = new TimeoutMonitor(
       conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000),
-      master,
+      master, serverManager,
       conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 1800000));
     Threads.setDaemonThreadRunning(timeoutMonitor.getThread(),
       master.getServerName() + ".timeoutMonitor");
@@ -1500,6 +1500,7 @@ public class AssignmentManager extends Z
         state.update(RegionState.State.OFFLINE);
         // Force a new plan and reassign.  Will return null if no servers.
         if (getRegionPlan(state, plan.getDestination(), true) == null) {
+          this.timeoutMonitor.setAllRegionServersOffline(true);
           LOG.warn("Unable to find a viable location to assign region " +
             state.getRegion().getRegionNameAsString());
           return;
@@ -2496,6 +2497,8 @@ public class AssignmentManager extends Z
   public class TimeoutMonitor extends Chore {
     private final int timeout;
     private boolean bulkAssign = false;
+    private boolean allRegionServersOffline = false;
+    private ServerManager serverManager;
 
     /**
      * Creates a periodic monitor to check for time outs on region transition
@@ -2507,9 +2510,11 @@ public class AssignmentManager extends Z
      * @param timeout
      */
     public TimeoutMonitor(final int period, final Stoppable stopper,
+        ServerManager serverManager,
         final int timeout) {
       super("AssignmentTimeoutMonitor", period, stopper);
       this.timeout = timeout;
+      this.serverManager = serverManager;
     }
 
     /**
@@ -2523,10 +2528,18 @@ public class AssignmentManager extends Z
       return result;
     }
 
+    private synchronized void setAllRegionServersOffline(
+      boolean allRegionServersOffline) {
+      this.allRegionServersOffline = allRegionServersOffline;
+    }
+
     @Override
     protected void chore() {
       // If bulkAssign in progress, suspend checks
       if (this.bulkAssign) return;
+      boolean allRSsOffline = this.serverManager.getOnlineServersList().
+        isEmpty();
+
       synchronized (regionsInTransition) {
         // Iterate all regions in transition checking for time outs
         long now = System.currentTimeMillis();
@@ -2534,9 +2547,14 @@ public class AssignmentManager extends Z
           if (regionState.getStamp() + timeout <= now) {
            //decide on action upon timeout
             actOnTimeOut(regionState);
+          } else if (this.allRegionServersOffline && !allRSsOffline) {
+            // if some RSs just came back online, we can start the
+            // the assignment right away
+            actOnTimeOut(regionState);
           }
         }
       }
+      setAllRegionServersOffline(allRSsOffline);
     }
 
     private void actOnTimeOut(RegionState regionState) {