You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/04/18 02:22:54 UTC

svn commit: r1327337 - in /hbase/trunk/src/test/java/org/apache/hadoop/hbase: MiniHBaseCluster.java regionserver/TestFSErrorsExposed.java

Author: mbautin
Date: Wed Apr 18 00:22:54 2012
New Revision: 1327337

URL: http://svn.apache.org/viewvc?rev=1327337&view=rev
Log:
[jira] [HBASE-5763] Fix random failures in TestFSErrorsExposed

Summary:
TestFSErrorsExposed frequently fails due to unclean mini-cluster shutdown.
Bringing datanodes back up, waiting for some time, and preemptively killing all
regionservers and the master before shutdown.

This is the trunk fix. The 89-fb patch is at D2739.

Test Plan: Run TestFSErrorsExposed 100 times

Reviewers: stack, tedyu, jdcryans, lhofhansl, jmhsieh, JIRA

Reviewed By: tedyu

Differential Revision: https://reviews.facebook.net/D2793

Modified:
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java?rev=1327337&r1=1327336&r2=1327337&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java Wed Apr 18 00:22:54 2012
@@ -36,6 +36,8 @@ import org.apache.hadoop.hbase.regionser
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.io.MapWritable;
@@ -529,4 +531,17 @@ public class MiniHBaseCluster {
     }
     return count;
   }
+
+  /**
+   * Do a simulated kill all masters and regionservers. Useful when it is
+   * impossible to bring the mini-cluster back for clean shutdown.
+   */
+  public void killAll() {
+    for (RegionServerThread rst : getRegionServerThreads()) {
+      rst.getRegionServer().abort("killAll");
+    }
+    for (MasterThread masterThread : getMasterThreads()) {
+      masterThread.getMaster().abort("killAll", new Throwable());
+    }
+  }
 }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java?rev=1327337&r1=1327336&r2=1327337&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java Wed Apr 18 00:22:54 2012
@@ -50,7 +50,6 @@ import org.apache.hadoop.hbase.util.Byte
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
-
 /**
  * Test cases that ensure that file system level errors are bubbled up
  * appropriately to clients, rather than swallowed.
@@ -163,13 +162,16 @@ public class TestFSErrorsExposed {
    * removes the data from HDFS underneath it, and ensures that
    * errors are bubbled to the client.
    */
-  @Test
+  @Test(timeout=5 * 60 * 1000)
   public void testFullSystemBubblesFSErrors() throws Exception {
     try {
       // We set it not to run or it will trigger server shutdown while sync'ing
       // because all the datanodes are bad
       util.getConfiguration().setInt(
           "hbase.regionserver.optionallogflushinterval", Integer.MAX_VALUE);
+
+      util.getConfiguration().setInt("hbase.client.retries.number", 3);
+
       util.startMiniCluster(1);
       byte[] tableName = Bytes.toBytes("table");
       byte[] fam = Bytes.toBytes("fam");
@@ -204,7 +206,11 @@ public class TestFSErrorsExposed {
         assertTrue(e.getMessage().contains("Could not seek"));
       }
 
+      // Restart data nodes so that HBase can shut down cleanly.
+      util.getDFSCluster().restartDataNodes();
+
     } finally {
+      util.getMiniHBaseCluster().killAll();
       util.shutdownMiniCluster();
     }
   }
@@ -232,7 +238,7 @@ public class TestFSErrorsExposed {
       for (SoftReference<FaultyInputStream> is: inStreams) {
         is.get().startFaults();
       }
-    } 
+    }
   }
 
   static class FaultyInputStream extends FSDataInputStream {
@@ -265,4 +271,3 @@ public class TestFSErrorsExposed {
   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
 }
-