You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 19:43:09 UTC

svn commit: r1181939 - in /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master: RegionManager.java RegionServerOperationQueue.java ServerManager.java

Author: nspiegelberg
Date: Tue Oct 11 17:43:08 2011
New Revision: 1181939

URL: http://svn.apache.org/viewvc?rev=1181939&view=rev
Log:
Fix LoadBalancer while processing RS crash (V2)

Summary:
RS Split Log processing (which is a really long operation) and
updating region status in META are processed by the same queue.  This
causes load balancing problems when the crashed server restarts because
load balancing is only queued and not processed.  Disable load balancing
during server log recovery to avoid this issue.  NOTE: On 7/4/11, one of our
production cells had 80% of its regions unassigned because of this bug.

This patch was checked in previously, then ran into unit test failures &
was reverted.  This version addresses those failures.  Namely, removed
"synchronized" from
isEmpty().  This was not necessary because both queues are immutable +
threadsafe classes.   The synchronized RegionServerOperationQueue.process() is
called
heavily, and was causing 30 second stalls that prevented META from being
quickly assigned on startup.

Test Plan: - mvn test
Reviewed By: kannan
Reviewers: kannan, kranganathan, mbautin
CC: hbase@lists, nspiegelberg, kannan
Differential Revision: 298625
Task ID: 622748

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1181939&r1=1181938&r2=1181939&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Oct 11 17:43:08 2011
@@ -310,9 +310,10 @@ public class RegionManager {
         holdRegionForBestRegionServer,
         quickStartRegionServerSet);
 
-    if (regionsToAssign.size() == 0) {
+    if (regionsToAssign.isEmpty()) {
       // There are no regions waiting to be assigned.
-      if (!assignmentByLocality) {
+      if (!assignmentByLocality
+          && master.getRegionServerOperationQueue().isEmpty()) {
         // load balance as before
         this.loadBalancer.loadBalancing(info, mostLoadedRegions, returnMsgs);
       }
@@ -697,7 +698,7 @@ public class RegionManager {
       regionsClosed++;
     }
     LOG.info("Skipped assigning " + skipped + " region(s) to " +
-      info.getServerName() + "because already in transition");
+      info.getServerName() + " because already in transition");
   }
 
   /*

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java?rev=1181939&r1=1181938&r2=1181939&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/RegionServerOperationQueue.java Tue Oct 11 17:43:08 2011
@@ -192,6 +192,13 @@ public class RegionServerOperationQueue 
   }
 
   /**
+   * @return if the RSO queue has any entries
+   */
+  public boolean isEmpty() {
+    return this.toDoQueue.isEmpty() && this.delayedToDoQueue.isEmpty();
+  }
+
+  /**
    * Clean up the queues.
    */
   public synchronized void shutdown() {

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1181939&r1=1181938&r2=1181939&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Tue Oct 11 17:43:08 2011
@@ -131,7 +131,7 @@ public class ServerManager {
       }
       LOG.info(numServers + " region servers, " + numDeadServers +
         " dead, average load " + averageLoad +
-        (deadServersList != null? deadServers: ""));
+        (deadServersList != null? deadServersList: ""));
     }
   }