You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/08/26 00:37:10 UTC

svn commit: r1161785 - in /hbase/branches/0.90: ./ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/regionserver/handler/ src/test/java/org/apache/hadoop/hbase/master/

Author: tedyu
Date: Thu Aug 25 22:37:10 2011
New Revision: 1161785

URL: http://svn.apache.org/viewvc?rev=1161785&view=rev
Log:
HBASE-4124  ZK restarted while a region is being assigned, new active HM re-assigns
               it but the RS warns 'already online on this server'. (Gao Jinchao)

Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
    hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Thu Aug 25 22:37:10 2011
@@ -24,6 +24,8 @@ Release 0.90.5 - Unreleased
                on branch (Jieshan Bean)
    HBASE-4253  Intermittent test failure because of missing config parameter in new
                HTable(tablename) (Ramkrishna)
+   HBASE-4124  ZK restarted while a region is being assigned, new active HM re-assigns
+               it but the RS warns 'already online on this server'. (Gao Jinchao)
 
   IMPROVEMENT
    HBASE-4205  Enhance HTable javadoc (Eric Charles)

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Aug 25 22:37:10 2011
@@ -326,7 +326,11 @@ public class AssignmentManager extends Z
     synchronized (regionsInTransition) {
       switch (data.getEventType()) {
       case RS_ZK_REGION_CLOSING:
-        if (isOnDeadServer(regionInfo, deadServers)) {
+        //If zk node of the region was updated by a live server, 
+        //we should skip this region and just add it into RIT. 
+        if (isOnDeadServer(regionInfo, deadServers) && 
+          (null == data.getServerName() ||
+            !serverManager.isServerOnline(data.getServerName()))){
           // If was on dead server, its closed now.  Force to OFFLINE and this
           // will get it reassigned if appropriate
           forceOffline(regionInfo, data);
@@ -372,7 +376,9 @@ public class AssignmentManager extends Z
             "; letting RIT timeout so will be assigned elsewhere");
           break;
         }
-        if (isOnDeadServer(regionInfo, deadServers)) {
+        if (isOnDeadServer(regionInfo, deadServers) && 
+            (null == data.getServerName() ||
+              !serverManager.isServerOnline(data.getServerName()))) {
           // If was on a dead server, then its not open any more; needs handling.
           forceOffline(regionInfo, data);
         } else {
@@ -1640,6 +1646,17 @@ public class AssignmentManager extends Z
           boolean assign =
             ServerShutdownHandler.processDeadRegion(regionInfo, result, this,
               this.catalogTracker);
+          RegionTransitionData data = ZKAssign.getData(watcher, regionInfo.getEncodedName()); 
+          
+          //If zk node of this region has been updated by a live server, 
+          //we consider that this region is being handled. 
+          //So we should skip it and process it in processRegionsInTransition.
+          if (data != null && data.getServerName() != null &&
+            serverManager.isServerOnline(data.getServerName())){
+              LOG.info("The region " + regionInfo.getEncodedName() +
+                "is being handled on " + data.getServerName());
+            continue;
+          }
           if (assign) {
             ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
               master.getServerName()); 

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java Thu Aug 25 22:37:10 2011
@@ -83,6 +83,10 @@ public class OpenRegionHandler extends E
       if (region != null) {
         LOG.warn("Attempted open of " + name +
           " but already online on this server");
+          
+        //This region should be assigned to another region server by RIT,  
+        //so we need to close it.
+        cleanupFailedOpen(region);
         return;
       }
 

Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java (original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java Thu Aug 25 22:37:10 2011
@@ -18,11 +18,12 @@
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.master;
-
+import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
 import java.util.Collection;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -41,9 +42,11 @@ import org.apache.hadoop.hbase.executor.
 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
 import org.apache.hadoop.hbase.master.handler.TotesHRegionInfo;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.Before;
@@ -64,6 +67,10 @@ public class TestZKBasedOpenCloseRegion 
 
   @BeforeClass public static void beforeAllTests() throws Exception {
     Configuration c = TEST_UTIL.getConfiguration();
+    // Need to drop the timeout much lower
+    c.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
+    c.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
+    
     c.setBoolean("dfs.support.append", true);
     c.setInt("hbase.regionserver.info.port", 0);
     TEST_UTIL.startMiniCluster(2);
@@ -129,7 +136,30 @@ public class TestZKBasedOpenCloseRegion 
     while (!reopenEventProcessed.get()) {
       Threads.sleep(100);
     }
-
+    
+    //Test a region is reopened on a same region server.
+    reopenEventProcessed.set(false);    
+ 
+    List<MasterThread> masterThreads = cluster.getMasterThreads();
+    assertEquals(1, masterThreads.size());
+    
+    HMaster master = masterThreads.get(0).getMaster();
+    assertTrue(master.isActiveMaster());
+    
+    hri = getNonMetaRegion(regionServer.getOnlineRegions());
+    openListener =
+      new ReopenEventListener(hri.getRegionNameAsString(),
+          reopenEventProcessed, EventType.RS_ZK_REGION_OPENED);
+    cluster.getMaster().executorService.
+      registerListener(EventType.RS_ZK_REGION_OPENED, openListener);
+      
+    master.assignmentManager.regionPlans.put(hri.getEncodedName(),
+          new RegionPlan(hri, null, regionServer.getServerInfo()));
+    master.assignRegion(hri);
+    
+    while (!reopenEventProcessed.get()) {
+      Threads.sleep(100);
+    }
     LOG.info("Done with testReOpenRegion");
   }