You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2014/10/08 05:40:40 UTC
git commit: HBASE-12196 SSH should retry in case failed to assign
regions
Repository: hbase
Updated Branches:
refs/heads/master cab081932 -> f2fc311b1
HBASE-12196 SSH should retry in case failed to assign regions
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/f2fc311b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/f2fc311b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/f2fc311b
Branch: refs/heads/master
Commit: f2fc311b19f19031e24547a7e4750a17115ac383
Parents: cab0819
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Tue Oct 7 15:07:36 2014 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Tue Oct 7 20:23:32 2014 -0700
----------------------------------------------------------------------
.../master/handler/ServerShutdownHandler.java | 6 ++
.../master/TestAssignmentManagerOnCluster.java | 73 ++++++++++++++++++++
2 files changed, 79 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/f2fc311b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
index 1691c9d..c443968 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
@@ -273,6 +273,12 @@ public class ServerShutdownHandler extends EventHandler {
} catch (InterruptedException ie) {
LOG.error("Caught " + ie + " during round-robin assignment");
throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
+ } catch (IOException ioe) {
+ LOG.info("Caught " + ioe + " during region assignment, will retry");
+ // Only do HLog splitting if shouldSplitHlog and in DLR mode
+ serverManager.processDeadServer(serverName,
+ this.shouldSplitHlog && distributedLogReplay);
+ return;
}
if (this.shouldSplitHlog && distributedLogReplay) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/f2fc311b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
index 4d24268..27c7073 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
@@ -28,8 +28,10 @@ import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -79,6 +81,7 @@ import org.junit.experimental.categories.Category;
/**
* This tests AssignmentManager with a testing cluster.
*/
+@SuppressWarnings("deprecation")
@Category({MasterTests.class, MediumTests.class})
public class TestAssignmentManagerOnCluster {
private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
@@ -831,6 +834,58 @@ public class TestAssignmentManagerOnCluster {
}
/**
+ * Test SSH waiting for extra region server for assignment
+ */
+ @Test (timeout=300000)
+ public void testSSHWaitForServerToAssignRegion() throws Exception {
+ TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion");
+ MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+ boolean startAServer = false;
+ try {
+ HTableDescriptor desc = new HTableDescriptor(table);
+ desc.addFamily(new HColumnDescriptor(FAMILY));
+ admin.createTable(desc);
+
+ HMaster master = cluster.getMaster();
+ final ServerManager serverManager = master.getServerManager();
+ MyLoadBalancer.countRegionServers = Integer.valueOf(
+ serverManager.countOfRegionServers());
+ HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table);
+ assertNotNull("First region should be assigned", rs);
+ final ServerName serverName = rs.getServerName();
+ // Wait till SSH tried to assign regions a several times
+ int counter = MyLoadBalancer.counter.get() + 5;
+ cluster.killRegionServer(serverName);
+ startAServer = true;
+ cluster.waitForRegionServerToStop(serverName, -1);
+ while (counter > MyLoadBalancer.counter.get()) {
+ Thread.sleep(1000);
+ }
+ cluster.startRegionServer();
+ startAServer = false;
+ // Wait till the dead server is processed by SSH
+ TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() throws Exception {
+ return serverManager.isServerDead(serverName)
+ && !serverManager.areDeadServersInProgress();
+ }
+ });
+ TEST_UTIL.waitUntilNoRegionsInTransition(300000);
+
+ rs = TEST_UTIL.getRSForFirstRegionInTable(table);
+ assertTrue("First region should be re-assigned to a different server",
+ rs != null && !serverName.equals(rs.getServerName()));
+ } finally {
+ MyLoadBalancer.countRegionServers = null;
+ TEST_UTIL.deleteTable(table);
+ if (startAServer) {
+ cluster.startRegionServer();
+ }
+ }
+ }
+
+ /**
* Test force unassign/assign a region of a disabled table
*/
@Test (timeout=60000)
@@ -1121,6 +1176,9 @@ public class TestAssignmentManagerOnCluster {
// For this region, if specified, always assign to nowhere
static volatile String controledRegion = null;
+ static volatile Integer countRegionServers = null;
+ static AtomicInteger counter = new AtomicInteger(0);
+
@Override
public ServerName randomAssignment(HRegionInfo regionInfo,
List<ServerName> servers) {
@@ -1129,6 +1187,21 @@ public class TestAssignmentManagerOnCluster {
}
return super.randomAssignment(regionInfo, servers);
}
+
+ @Override
+ public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
+ List<HRegionInfo> regions, List<ServerName> servers) {
+ if (countRegionServers != null && services != null) {
+ int regionServers = services.getServerManager().countOfRegionServers();
+ if (regionServers < countRegionServers.intValue()) {
+ // Let's wait till more region servers join in.
+ // Before that, fail region assignments.
+ counter.incrementAndGet();
+ return null;
+ }
+ }
+ return super.roundRobinAssignment(regions, servers);
+ }
}
public static class MyMaster extends HMaster {