You are viewing a plain text version of this content. The canonical link for it is here.
Posted to by on 2017/09/11 07:42:42 UTC

[15/50] [abbrv] hbase git commit: HBASE-18543 [AMv2] Fixed and re-enabled TestMasterFailover

HBASE-18543 [AMv2] Fixed and re-enabled TestMasterFailover

  * testSimpleMasterFailover - fixed and verified
  * testPendingOpenOrCloseWhenMasterFailover - removed as logic is based on old code and no longer relevant. TestServerCrashProcedure tests assignments with crashing master and region servers
  * testMetaInTransitionWhenMasterFailover - verified that it is fixed by patch for HBASE-18511.

Signed-off-by: Michael Stack <>


Branch: refs/heads/HBASE-18467
Commit: 5847c901a743586f3afc45fc6e73e5c07a3c7b70
Parents: 6752eba
Author: Umesh Agashe <>
Authored: Thu Sep 7 13:42:36 2017 -0700
Committer: Michael Stack <>
Committed: Thu Sep 7 16:45:52 2017 -0700

 .../hadoop/hbase/master/ | 177 ++-----------------
 1 file changed, 10 insertions(+), 167 deletions(-)
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/
index cf57aa3..9cbc197 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/
@@ -1,4 +1,4 @@
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -28,23 +28,13 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
-import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
@@ -53,32 +43,15 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.testclassification.FlakeyTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
-import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 @Category({FlakeyTests.class, LargeTests.class})
-@Ignore // Needs to be rewritten for AMv2. Uses tricks not ordained when up on AMv2.
 public class TestMasterFailover {
   private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
-  HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
-      final HTableDescriptor htd)
-  throws IOException {
-    HRegion r = HBaseTestingUtility.createRegionAndWAL(hri, rootdir, c, htd);
-    // The above call to create a region will create an wal file.  Each
-    // log file create will also create a running thread to do syncing.  We need
-    // to close out this log else we will have a running thread trying to sync
-    // the file system continuously which is ugly when dfs is taken away at the
-    // end of the test.
-    HBaseTestingUtility.closeRegionAndWAL(r);
-    return r;
-  }
   // TODO: Next test to add is with testing permutations of the RIT or the RS
   //       killed are hosting ROOT and hbase:meta regions.
@@ -92,7 +65,6 @@ public class TestMasterFailover {
    * Starts with three masters.  Kills a backup master.  Then kills the active
    * master.  Ensures the final master becomes active and we can still contact
    * the cluster.
-   * @throws Exception
   @Test (timeout=240000)
   public void testSimpleMasterFailover() throws Exception {
@@ -157,7 +129,7 @@ public class TestMasterFailover {
     assertEquals(2, masterThreads.size());
     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();"Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
-    assertEquals(4, rsCount);
+    assertEquals(3, rsCount);
     // Check that ClusterStatus reports the correct active and backup masters
@@ -190,143 +162,13 @@ public class TestMasterFailover {
     int rss = status.getServersSize();"Active master " + mastername.getServerName() + " managing " +
       rss +  " region servers");
-    assertEquals(4, rss);
+    assertEquals(3, rss);
     // Stop the cluster
-   * Test region in pending_open/close when master failover
-   */
-  @Test (timeout=180000)
-  public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
-    final int NUM_MASTERS = 1;
-    final int NUM_RS = 1;
-    // Create config to use for this cluster
-    Configuration conf = HBaseConfiguration.create();
-    // Start the cluster
-    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
-    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
-    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
-    log("Cluster started");
-    // get all the master threads
-    List<MasterThread> masterThreads = cluster.getMasterThreads();
-    assertEquals(1, masterThreads.size());
-    // only one master thread, let's wait for it to be initialized
-    assertTrue(cluster.waitForActiveAndReadyMaster());
-    HMaster master = masterThreads.get(0).getMaster();
-    assertTrue(master.isActiveMaster());
-    assertTrue(master.isInitialized());
-    // Create a table with a region online
-    Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
-    onlineTable.close();
-    // Create a table in META, so it has a region offline
-    HTableDescriptor offlineTable = new HTableDescriptor(
-      TableName.valueOf(Bytes.toBytes("offlineTable")));
-    offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
-    FileSystem filesystem = FileSystem.get(conf);
-    Path rootdir = FSUtils.getRootDir(conf);
-    FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
-    fstd.createTableDescriptor(offlineTable);
-    HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
-    createRegion(hriOffline, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
-    log("Regions in hbase:meta and namespace have been created");
-    // at this point we only expect 3 regions to be assigned out
-    // (catalogs and namespace, + 1 online region)
-    assertEquals(3, cluster.countServedRegions());
-    HRegionInfo hriOnline = null;
-    try (RegionLocator locator =
-        TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
-      hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
-    }
-    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
-    RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
-    // Put the online region in pending_close. It is actually already opened.
-    // This is to simulate that the region close RPC is not sent out before failover
-    RegionState oldState = regionStates.getRegionState(hriOnline);
-    RegionState newState = new RegionState(hriOnline, State.CLOSING, oldState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-    // Put the offline region in pending_open. It is actually not opened yet.
-    // This is to simulate that the region open RPC is not sent out before failover
-    oldState = new RegionState(hriOffline, State.OFFLINE);
-    newState = new RegionState(hriOffline, State.OPENING, newState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-    HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
-    createRegion(failedClose, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
-    oldState = new RegionState(failedClose, State.CLOSING);
-    newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-    HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
-    createRegion(failedOpen, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
-    // Simulate a region transitioning to failed open when the region server reports the
-    // transition as FAILED_OPEN
-    oldState = new RegionState(failedOpen, State.OPENING);
-    newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-    HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
-"Failed open NUll server " + failedOpenNullServer.getEncodedName());
-    createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
-    // Simulate a region transitioning to failed open when the master couldn't find a plan for
-    // the region
-    oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
-    newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-    // Stop the master
-    log("Aborting master");
-    cluster.abortMaster(0);
-    cluster.waitOnMaster(0);
-    log("Master has aborted");
-    // Start up a new master
-    log("Starting up a new master");
-    master = cluster.startMaster().getMaster();
-    log("Waiting for master to be ready");
-    cluster.waitForActiveAndReadyMaster();
-    log("Master is ready");
-    // Wait till no region in transition any more
-    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
-    // Get new region states since master restarted
-    regionStates = master.getAssignmentManager().getRegionStates();
-    // Both pending_open (RPC sent/not yet) regions should be online
-    assertTrue(regionStates.isRegionOnline(hriOffline));
-    assertTrue(regionStates.isRegionOnline(hriOnline));
-    assertTrue(regionStates.isRegionOnline(failedClose));
-    assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
-    assertTrue(regionStates.isRegionOnline(failedOpen));
-    log("Done with verification, shutting down cluster");
-    // Done, shutdown the cluster
-    TEST_UTIL.shutdownMiniCluster();
-  }
-  /**
    * Test meta in transition when master failover
   @Test(timeout = 180000)
@@ -361,9 +203,9 @@ public class TestMasterFailover {
     // meta should remain where it was
     RegionState metaState =
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
       metaState.getServerName(), rs.getServerName());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
       metaState.getState(), State.OPEN);
     // Start up a new master
@@ -376,9 +218,9 @@ public class TestMasterFailover {
     // ensure meta is still deployed on RS
     metaState =
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
       metaState.getServerName(), rs.getServerName());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
       metaState.getState(), State.OPEN);
     // Update meta state as OPENING, then kill master
@@ -408,9 +250,9 @@ public class TestMasterFailover {
     metaState =
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
       metaState.getServerName(), rs.getServerName());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
       metaState.getState(), State.OPEN);
     // Update meta state as CLOSING, then kill master
@@ -431,6 +273,7 @@ public class TestMasterFailover {
     // Start up a new master
     log("Starting up a new master");
     activeMaster = cluster.startMaster().getMaster();
+    assertNotNull(activeMaster);
     log("Waiting for master to be ready");
     log("Master is ready");