You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 19:44:55 UTC

svn commit: r1181971 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerFileSystemFailure.java

Author: nspiegelberg
Date: Tue Oct 11 17:44:55 2011
New Revision: 1181971

URL: http://svn.apache.org/viewvc?rev=1181971&view=rev
Log:
Fixed the TestHRegionServerFileSystemFailure test.

Summary:
This test was taking too long to run and at times would time
out. The primary problem with the test was that all of HDFS (namenode +
 datanodes) were killed and hence the regionserver's DFClient would
retry for a long time before dying and it would take a long time before
giving up. Also, at the same time we were still loading the cluster with
data even after the main part of the test was over. In this diff I stop
the loading just before we bring down the mini cluster and I also only
kill the namenode, this leaves the regionservers in a bit of sane state
since they can still write to the datanodes.

Also, this test was primarily designed for namenode failures, so the
test still serves its purpose.

The test now runs in under 2mins on my dev cluster.

Test Plan: 1) Run the test multiple times. (Ran it about 600 times finally)

Reviewers: kannan, liyintang, mbautin

Reviewed By: kannan

CC: hbase@lists, , kannan, pritam, liyintang

Differential Revision: 307053

Revert Plan:
Tags:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerFileSystemFailure.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1181971&r1=1181970&r2=1181971&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Oct 11 17:44:55 2011
@@ -679,6 +679,14 @@ public class HRegionServer implements HR
     if (!killed) {
       this.zooKeeperWrapper.close();
       join();
+      if ((this.fs != null) && (stopRequested.get() || abortRequested)) {
+        // Finally attempt to close the Filesystem, to flush out any open streams.
+        try {
+          this.fs.close();
+        } catch (IOException ie) {
+          LOG.error("Could not close FileSystem", ie);
+        }
+      }
     }
     LOG.info(Thread.currentThread().getName() + " exiting");
   }
@@ -937,14 +945,6 @@ public class HRegionServer implements HR
         FSUtils.checkFileSystemAvailable(this.fs, false);
       } catch (IOException e) {
         abort("File System not available", e);
-        // Wait for all threads to exit cleanly.
-        join();
-        // Finally attempt to close the Filesystem, to flush out any open streams.
-        try {
-          this.fs.close();
-        } catch (IOException ie) {
-          LOG.error("Could not close FileSystem", ie);
-        }
         this.fsOk = false;
       }
     }

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerFileSystemFailure.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerFileSystemFailure.java?rev=1181971&r1=1181970&r2=1181971&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerFileSystemFailure.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerFileSystemFailure.java Tue Oct 11 17:44:55 2011
@@ -1,13 +1,18 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import java.io.IOException;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 
 import static org.junit.Assert.*;
 
@@ -18,13 +23,18 @@ import org.junit.Test;
 public class TestHRegionServerFileSystemFailure {
   private static final Log LOG = LogFactory
       .getLog(TestHRegionServerFileSystemFailure.class);
-  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static HBaseTestingUtility TEST_UTIL;
   private static byte[][] FAMILIES = { Bytes.toBytes("f1"),
       Bytes.toBytes("f2"), Bytes.toBytes("f3"), Bytes.toBytes("f4") };
   private static final int nLoaders = 10;
+  private static Configuration conf;
 
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
+    conf = new HBaseConfiguration().create();
+    conf.setBoolean("ipc.client.ping", true);
+    conf.setInt("ipc.ping.interval", 5000);
+    TEST_UTIL = new HBaseTestingUtility(conf);
     TEST_UTIL.startMiniCluster(3);
   }
 
@@ -60,28 +70,35 @@ public class TestHRegionServerFileSystem
   public void testHRegionServerFileSystemFailure() throws Exception {
     // Build some data.
     byte[] tableName = Bytes.toBytes("testCloseHRegion");
-    TEST_UTIL.createTable(tableName, FAMILIES);
-    HTable table = new HTable(tableName);
-    for (int i = 0; i < FAMILIES.length; i++) {
-      byte[] columnFamily = FAMILIES[i];
-      TEST_UTIL.createMultiRegions(table, columnFamily);
-    }
+    HTable table = TEST_UTIL.createTable(tableName, FAMILIES);
 
     for (int i = 0; i < nLoaders; i++) {
       new TableLoader(table).start();
     }
 
     // Wait for loaders to build up some data.
-    Thread.sleep(10000);
+    Thread.sleep(1000);
 
-    // Pick a regionserver.
-    Configuration conf = TEST_UTIL.getConfiguration();
-    HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
 
     // Bring down HDFS.
-    TEST_UTIL.shutdownMiniDFSCluster();
+    TEST_UTIL.getDFSCluster().shutdownNameNode();
 
-    // Verify checkFileSystem returns false and doesn't throw Exceptions.
-    assertFalse(server.checkFileSystem());
+    // Verify checkFileSystem returns false.
+    List <RegionServerThread> servers = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads();
+    for(RegionServerThread serverThread : servers) {
+      HRegionServer server = serverThread.getRegionServer();
+      if (serverThread.isAlive() && !server.isStopRequested()) {
+        assertFalse(server.checkFileSystem());
+        break;
+      }
+    }
+
+    // Bring namenode, hbasemaster back up so we cleanup properly.
+    TEST_UTIL.getDFSCluster().restartNameNode();
+    HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
+    if (!master.isAlive()) {
+      master = HMaster.constructMaster(MiniHBaseCluster.MiniHBaseClusterMaster.class, conf);
+      master.start();
+    }
   }
 }