You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/08/26 00:37:10 UTC
svn commit: r1161785 - in /hbase/branches/0.90: ./
src/main/java/org/apache/hadoop/hbase/master/
src/main/java/org/apache/hadoop/hbase/regionserver/handler/
src/test/java/org/apache/hadoop/hbase/master/
Author: tedyu
Date: Thu Aug 25 22:37:10 2011
New Revision: 1161785
URL: http://svn.apache.org/viewvc?rev=1161785&view=rev
Log:
HBASE-4124 ZK restarted while a region is being assigned, new active HM re-assigns
it but the RS warns 'already online on this server'. (Gao Jinchao)
Modified:
hbase/branches/0.90/CHANGES.txt
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java
Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Thu Aug 25 22:37:10 2011
@@ -24,6 +24,8 @@ Release 0.90.5 - Unreleased
on branch (Jieshan Bean)
HBASE-4253 Intermittent test failure because of missing config parameter in new
HTable(tablename) (Ramkrishna)
+ HBASE-4124 ZK restarted while a region is being assigned, new active HM re-assigns
+ it but the RS warns 'already online on this server'. (Gao Jinchao)
IMPROVEMENT
HBASE-4205 Enhance HTable javadoc (Eric Charles)
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Aug 25 22:37:10 2011
@@ -326,7 +326,11 @@ public class AssignmentManager extends Z
synchronized (regionsInTransition) {
switch (data.getEventType()) {
case RS_ZK_REGION_CLOSING:
- if (isOnDeadServer(regionInfo, deadServers)) {
+ //If zk node of the region was updated by a live server,
+ //we should skip this region and just add it into RIT.
+ if (isOnDeadServer(regionInfo, deadServers) &&
+ (null == data.getServerName() ||
+ !serverManager.isServerOnline(data.getServerName()))){
// If was on dead server, its closed now. Force to OFFLINE and this
// will get it reassigned if appropriate
forceOffline(regionInfo, data);
@@ -372,7 +376,9 @@ public class AssignmentManager extends Z
"; letting RIT timeout so will be assigned elsewhere");
break;
}
- if (isOnDeadServer(regionInfo, deadServers)) {
+ if (isOnDeadServer(regionInfo, deadServers) &&
+ (null == data.getServerName() ||
+ !serverManager.isServerOnline(data.getServerName()))) {
// If was on a dead server, then its not open any more; needs handling.
forceOffline(regionInfo, data);
} else {
@@ -1640,6 +1646,17 @@ public class AssignmentManager extends Z
boolean assign =
ServerShutdownHandler.processDeadRegion(regionInfo, result, this,
this.catalogTracker);
+ RegionTransitionData data = ZKAssign.getData(watcher, regionInfo.getEncodedName());
+
+ //If zk node of this region has been updated by a live server,
+ //we consider that this region is being handled.
+ //So we should skip it and process it in processRegionsInTransition.
+ if (data != null && data.getServerName() != null &&
+ serverManager.isServerOnline(data.getServerName())){
+ LOG.info("The region " + regionInfo.getEncodedName() +
+ "is being handled on " + data.getServerName());
+ continue;
+ }
if (assign) {
ZKAssign.createOrForceNodeOffline(watcher, regionInfo,
master.getServerName());
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java Thu Aug 25 22:37:10 2011
@@ -83,6 +83,10 @@ public class OpenRegionHandler extends E
if (region != null) {
LOG.warn("Attempted open of " + name +
" but already online on this server");
+
+ //This region should be assigned to another region server by RIT,
+ //so we need to close it.
+ cleanupFailedOpen(region);
return;
}
Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java?rev=1161785&r1=1161784&r2=1161785&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java (original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java Thu Aug 25 22:37:10 2011
@@ -18,11 +18,12 @@
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
-
+import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.Collection;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -41,9 +42,11 @@ import org.apache.hadoop.hbase.executor.
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.master.handler.TotesHRegionInfo;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
@@ -64,6 +67,10 @@ public class TestZKBasedOpenCloseRegion
@BeforeClass public static void beforeAllTests() throws Exception {
Configuration c = TEST_UTIL.getConfiguration();
+ // Need to drop the timeout much lower
+ c.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
+ c.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
+
c.setBoolean("dfs.support.append", true);
c.setInt("hbase.regionserver.info.port", 0);
TEST_UTIL.startMiniCluster(2);
@@ -129,7 +136,30 @@ public class TestZKBasedOpenCloseRegion
while (!reopenEventProcessed.get()) {
Threads.sleep(100);
}
-
+
+ //Test a region is reopened on a same region server.
+ reopenEventProcessed.set(false);
+
+ List<MasterThread> masterThreads = cluster.getMasterThreads();
+ assertEquals(1, masterThreads.size());
+
+ HMaster master = masterThreads.get(0).getMaster();
+ assertTrue(master.isActiveMaster());
+
+ hri = getNonMetaRegion(regionServer.getOnlineRegions());
+ openListener =
+ new ReopenEventListener(hri.getRegionNameAsString(),
+ reopenEventProcessed, EventType.RS_ZK_REGION_OPENED);
+ cluster.getMaster().executorService.
+ registerListener(EventType.RS_ZK_REGION_OPENED, openListener);
+
+ master.assignmentManager.regionPlans.put(hri.getEncodedName(),
+ new RegionPlan(hri, null, regionServer.getServerInfo()));
+ master.assignRegion(hri);
+
+ while (!reopenEventProcessed.get()) {
+ Threads.sleep(100);
+ }
LOG.info("Done with testReOpenRegion");
}