You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2012/10/30 07:14:06 UTC

svn commit: r1403626 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hbase/master/

Author: liyin
Date: Tue Oct 30 06:14:06 2012
New Revision: 1403626

URL: http://svn.apache.org/viewvc?rev=1403626&view=rev
Log:
[master] [0.89-fb] Ensure that new master does not split logs of recently checked in RS

Author: aaiyer

Summary: Sev on cell 13 saw a scenario where, upon master failover, the new master was splitting the logs for regionservers that were still running. This happens when there is an error in the SplitLog for one of the logs and the master does not acknowledge new servers.

Test Plan:
run MR tests.

One failure. Also fails without the diff.

Also, adding a unit test.

Reviewers: kranganathan, kannan

Reviewed By: kranganathan

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D611291

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct 30 06:14:06 2012
@@ -1061,7 +1061,9 @@ public class HMaster extends HasThread i
             Path logDir = status.getPath();
             String serverName = logDir.getName();
             LOG.info("Found log folder : " + serverName);
-            if (!clusterStateRecovery.liveRegionServersAtStartup().contains(serverName)) {
+            if (!clusterStateRecovery.liveRegionServersAtStartup().contains(serverName)
+                // If a server now checked in with the new master, don't kill it.
+                && serverManager.getServerInfo(serverName) == null) {
               LOG.info("Log folder " + status.getPath() + " doesn't belong " +
                   "to a known region server, splitting");
               serverNames.add(serverName);

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java Tue Oct 30 06:14:06 2012
@@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.master.Sp
 import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
 import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.InjectionEvent;
+import org.apache.hadoop.hbase.util.InjectionHandler;
 import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog.TaskState;
@@ -136,6 +138,8 @@ public class SplitLogWorker implements R
         try {
           FileStatus st;
           try {
+          InjectionHandler.processEventIO(InjectionEvent.SPLITLOGWORKER_SPLIT_LOG_START);
+          
           st = fs.getFileStatus(new Path(filename));
 
           t1  = System.currentTimeMillis();

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/InjectionEvent.java Tue Oct 30 06:14:06 2012
@@ -32,5 +32,6 @@ public enum InjectionEvent {
   HMASTER_ALTER_TABLE,
   HMASTER_ENABLE_TABLE,
   HMASTER_DISABLE_TABLE,
-  ZKUNASSIGNEDWATCHER_REGION_OPENED
+  ZKUNASSIGNEDWATCHER_REGION_OPENED,
+  SPLITLOGWORKER_SPLIT_LOG_START
 }

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java?rev=1403626&r1=1403625&r2=1403626&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestLogSplitOnMasterFailover.java Tue Oct 30 06:14:06 2012
@@ -48,8 +48,10 @@ import org.apache.hadoop.hbase.io.hfile.
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.InjectionEvent;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+import org.apache.hadoop.hbase.util.InjectionHandler;
 import org.junit.Test;
 
 /**
@@ -248,6 +250,33 @@ public class TestLogSplitOnMasterFailove
     runTest();
   }
 
+  @Test(timeout=180000)
+  public void testWithDistributedLogSplittingAndErrors() throws Exception {
+    // add a split log worker to handle InjectionEvent.SPLITLOGWORKER_SPLIT_LOG_START.
+    ZooKeeperWrapper.setNamespaceForTesting();
+    conf.setBoolean(HConstants.DISTRIBUTED_LOG_SPLITTING_KEY, true);
+    InjectionHandler.set(new SplitLogKillInjectionHandler());
+    runTest();
+  }
+  
+  static  class SplitLogKillInjectionHandler extends InjectionHandler {
+      static int count = 0;
+      
+      @Override
+      // kill split log workers the first few times. 
+      protected void _processEventIO(InjectionEvent event, Object... args) throws IOException{
+        if (event == InjectionEvent.SPLITLOGWORKER_SPLIT_LOG_START) {
+          count++;
+          LOG.debug("Processing a split log event. Count = " + count);
+          Threads.sleep(50); // make it take a bit of time. sleep 50ms.
+          if (count < 5) {
+            throw new IOException("Failing for the test");
+          }
+        }
+      }
+   }
+    
+  
   private void runTest() throws Exception {
     startMiniCluster(NUM_MASTERS, NUM_RS);
     Thread.currentThread().setName(getClass().getSimpleName());
@@ -301,7 +330,15 @@ public class TestLogSplitOnMasterFailove
 
     masters = miniCluster().getMasters();
     assertEquals(1, masters.size());
-
+    
+    // Start a few new regionservers.
+    final int EXTRA_RS = 2;
+    for (int i = NUM_RS; i < NUM_RS + EXTRA_RS; ++i) {
+      miniCluster().startRegionServer();
+      otherRsNames.add(
+          miniCluster().getRegionServer(i).getServerInfo().getServerName());
+    }
+    
     // wait for an active master to show up and be ready
     assertTrue(miniCluster().waitForActiveAndReadyMaster());