You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2012/10/30 07:14:06 UTC
svn commit: r1403626 - in /hbase/branches/0.89-fb/src:
main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/regionserver/
main/java/org/apache/hadoop/hbase/util/
test/java/org/apache/hadoop/hbase/master/
Author: liyin
Date: Tue Oct 30 06:14:06 2012
New Revision: 1403626
URL: http://svn.apache.org/viewvc?rev=1403626&view=rev
Log:
[master] [0.89-fb] Ensure that new master does not split logs of recently checked in RS
Author: aaiyer
Summary: Sev on cell 13 saw a scenario where, upon master failover, the new master was splitting the logs for regionservers that were still running. This happens when there is an error in the SplitLog for one of the logs and the master does not acknowledge new servers.
Test Plan:
run MR tests.
One failure. Also fails without the diff.
Also, adding a unit test.
Reviewers: kranganathan, kannan
Reviewed By: kranganathan
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D611291
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct 30 06:14:06 2012
@@ -1061,7 +1061,9 @@ public class HMaster extends HasThread i
Path logDir = status.getPath();
String serverName = logDir.getName();
LOG.info("Found log folder : " + serverName);
- if (!clusterStateRecovery.liveRegionServersAtStartup().contains(serverName)) {
+ if (!clusterStateRecovery.liveRegionServersAtStartup().contains(serverName)
+ // If a server now checked in with the new master, don't kill it.
+ && serverManager.getServerInfo(serverName) == null) {
LOG.info("Log folder " + status.getPath() + " doesn't belong " +
"to a known region server, splitting");
serverNames.add(serverName);
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java Tue Oct 30 06:14:06 2012
@@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.master.Sp
import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.InjectionEvent;
+import org.apache.hadoop.hbase.util.InjectionHandler;
import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
import org.apache.hadoop.hbase.zookeeper.ZKSplitLog.TaskState;
@@ -136,6 +138,8 @@ public class SplitLogWorker implements R
try {
FileStatus st;
try {
+ InjectionHandler.processEventIO(InjectionEvent.SPLITLOGWORKER_SPLIT_LOG_START);
+
st = fs.getFileStatus(new Path(filename));
t1 = System.currentTimeMillis();
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java Tue Oct 30 06:14:06 2012
@@ -32,5 +32,6 @@ public enum InjectionEvent {
HMASTER_ALTER_TABLE,
HMASTER_ENABLE_TABLE,
HMASTER_DISABLE_TABLE,
- ZKUNASSIGNEDWATCHER_REGION_OPENED
+ ZKUNASSIGNEDWATCHER_REGION_OPENED,
+ SPLITLOGWORKER_SPLIT_LOG_START
}
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java Tue Oct 30 06:14:06 2012
@@ -48,8 +48,10 @@ import org.apache.hadoop.hbase.io.hfile.
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.InjectionEvent;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+import org.apache.hadoop.hbase.util.InjectionHandler;
import org.junit.Test;
/**
@@ -248,6 +250,33 @@ public class TestLogSplitOnMasterFailove
runTest();
}
+ @Test(timeout=180000)
+ public void testWithDistributedLogSplittingAndErrors() throws Exception {
+ // add a split log worker to handle InjectionEvent.SPLITLOGWORKER_SPLIT_LOG_START.
+ ZooKeeperWrapper.setNamespaceForTesting();
+ conf.setBoolean(HConstants.DISTRIBUTED_LOG_SPLITTING_KEY, true);
+ InjectionHandler.set(new SplitLogKillInjectionHandler());
+ runTest();
+ }
+
+ static class SplitLogKillInjectionHandler extends InjectionHandler {
+ static int count = 0;
+
+ @Override
+ // kill split log workers the first few times.
+ protected void _processEventIO(InjectionEvent event, Object... args) throws IOException{
+ if (event == InjectionEvent.SPLITLOGWORKER_SPLIT_LOG_START) {
+ count++;
+ LOG.debug("Processing a split log event. Count = " + count);
+ Threads.sleep(50); // make it take a bit of time. sleep 50ms.
+ if (count < 5) {
+ throw new IOException("Failing for the test");
+ }
+ }
+ }
+ }
+
+
private void runTest() throws Exception {
startMiniCluster(NUM_MASTERS, NUM_RS);
Thread.currentThread().setName(getClass().getSimpleName());
@@ -301,7 +330,15 @@ public class TestLogSplitOnMasterFailove
masters = miniCluster().getMasters();
assertEquals(1, masters.size());
-
+
+ // Start a few new regionservers.
+ final int EXTRA_RS = 2;
+ for (int i = NUM_RS; i < NUM_RS + EXTRA_RS; ++i) {
+ miniCluster().startRegionServer();
+ otherRsNames.add(
+ miniCluster().getRegionServer(i).getServerInfo().getServerName());
+ }
+
// wait for an active master to show up and be ready
assertTrue(miniCluster().waitForActiveAndReadyMaster());