You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/05/21 19:19:02 UTC
svn commit: r1484875 - in /hbase/trunk:
hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/
hbase-server/src/main/java/org/apache/hadoop/hbase/master/
hbase-server/src/test/java/org/apache/hadoop/hbase/master/
Author: jxiang
Date: Tue May 21 17:19:02 2013
New Revision: 1484875
URL: http://svn.apache.org/r1484875
Log:
HBASE-8545 Meta stuck in transition when it is assigned to a just restarted dead region sever
Modified:
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java Tue May 21 17:19:02 2013
@@ -798,7 +798,6 @@ public class ZKAssign {
// Node no longer exists. Return -1. It means unsuccessful transition.
return -1;
}
- RegionTransition rt = getRegionTransition(existingBytes);
// Verify it is the expected version
if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
@@ -808,7 +807,9 @@ public class ZKAssign {
"the node existed but was version " + stat.getVersion() +
" not the expected version " + expectedVersion));
return -1;
- } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
+ }
+
+ if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
&& endState.equals(EventType.RS_ZK_REGION_OPENING)
&& expectedVersion == -1 && stat.getVersion() != 0) {
// the below check ensures that double assignment doesnot happen.
@@ -822,6 +823,18 @@ public class ZKAssign {
return -1;
}
+ RegionTransition rt = getRegionTransition(existingBytes);
+
+ // Verify the server transition happens on is not changed
+ if (!rt.getServerName().equals(serverName)) {
+ LOG.warn(zkw.prefix("Attempt to transition the " +
+ "unassigned node for " + encoded +
+ " from " + beginState + " to " + endState + " failed, " +
+ "the server that tried to transition was " + serverName +
+ " not the expected " + rt.getServerName()));
+ return -1;
+ }
+
// Verify it is in expected state
EventType et = rt.getEventType();
if (!et.equals(beginState)) {
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Tue May 21 17:19:02 2013
@@ -534,7 +534,7 @@ public class AssignmentManager extends Z
EventType et = rt.getEventType();
// Get ServerName. Could not be null.
final ServerName sn = rt.getServerName();
- String encodedRegionName = regionInfo.getEncodedName();
+ final String encodedRegionName = regionInfo.getEncodedName();
LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
@@ -592,6 +592,8 @@ public class AssignmentManager extends Z
public void process() throws IOException {
ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
try {
+ RegionPlan plan = new RegionPlan(regionInfo, null, sn);
+ addPlan(encodedRegionName, plan);
assign(rs, false, false);
} finally {
lock.unlock();
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Tue May 21 17:19:02 2013
@@ -399,7 +399,7 @@ public class TestAssignmentManager {
assertNotSame(-1, versionid);
// This uglyness below is what the openregionhandler on RS side does.
versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
- SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
+ SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
EventType.RS_ZK_REGION_OPENING, versionid);
assertNotSame(-1, versionid);
// Move znode from OPENING to OPENED as RS does on successful open.
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Tue May 21 17:19:02 2013
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.List;
+import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +35,7 @@ import org.apache.hadoop.hbase.HConstant
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
@@ -43,6 +45,7 @@ import org.apache.hadoop.hbase.coprocess
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
@@ -111,6 +114,64 @@ public class TestAssignmentManagerOnClus
}
/**
+ * This tests region assignment on a simulated restarted server
+ */
+ @Test
+ public void testAssignRegionOnRestartedServer() throws Exception {
+ String table = "testAssignRegionOnRestartedServer";
+ ServerName deadServer = null;
+ HMaster master = null;
+ try {
+ HTableDescriptor desc = new HTableDescriptor(table);
+ desc.addFamily(new HColumnDescriptor(FAMILY));
+ admin.createTable(desc);
+
+ HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+ HRegionInfo hri = new HRegionInfo(
+ desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
+ MetaEditor.addRegionToMeta(meta, hri);
+
+ master = TEST_UTIL.getHBaseCluster().getMaster();
+ Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
+ assertFalse("There should be some servers online", onlineServers.isEmpty());
+
+ // Use the first server as the destination server
+ ServerName destServer = onlineServers.iterator().next();
+
+ // Created faked dead server
+ deadServer = new ServerName(destServer.getHostname(),
+ destServer.getPort(), destServer.getStartcode() - 100L);
+ master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
+
+ AssignmentManager am = master.getAssignmentManager();
+ RegionPlan plan = new RegionPlan(hri, null, deadServer);
+ am.addPlan(hri.getEncodedName(), plan);
+ master.assignRegion(hri);
+
+ int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
+ destServer, EventType.M_ZK_REGION_OFFLINE,
+ EventType.RS_ZK_REGION_OPENING, 0);
+ assertEquals("TansitionNode should fail", -1, version);
+
+ // Give region 2 seconds to assign, which may not be enough.
+ // However, if HBASE-8545 is broken, this test will be flaky.
+ // Otherwise, this test should never be flaky.
+ Thread.sleep(2000);
+
+ assertTrue("Region should still be in transition",
+ am.getRegionStates().isRegionInTransition(hri));
+ assertEquals("Assign node should still be in version 0", 0,
+ ZKAssign.getVersion(master.getZooKeeper(), hri));
+ } finally {
+ if (deadServer != null) {
+ master.serverManager.expireServer(deadServer);
+ }
+
+ TEST_UTIL.deleteTable(Bytes.toBytes(table));
+ }
+ }
+
+ /**
* This tests offlining a region
*/
@Test