You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2017/09/11 07:42:42 UTC
[15/50] [abbrv] hbase git commit: HBASE-18543 [AMv2] Fixed and
re-enabled TestMasterFailover
HBASE-18543 [AMv2] Fixed and re-enabled TestMasterFailover
* testSimpleMasterFailover - fixed and verified
* testPendingOpenOrCloseWhenMasterFailover - removed as logic is based on old code and no longer relevant. TestServerCrashProcedure tests assignments with crashing master and region servers
* testMetaInTransitionWhenMasterFailover - verified that it is fixed by patch for HBASE-18511.
Signed-off-by: Michael Stack <st...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5847c901
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5847c901
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5847c901
Branch: refs/heads/HBASE-18467
Commit: 5847c901a743586f3afc45fc6e73e5c07a3c7b70
Parents: 6752eba
Author: Umesh Agashe <ua...@cloudera.com>
Authored: Thu Sep 7 13:42:36 2017 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Thu Sep 7 16:45:52 2017 -0700
----------------------------------------------------------------------
.../hadoop/hbase/master/TestMasterFailover.java | 177 ++-----------------
1 file changed, 10 insertions(+), 167 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/5847c901/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index cf57aa3..9cbc197 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -1,4 +1,4 @@
-/**
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -28,23 +28,13 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
-import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
@@ -53,32 +43,15 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.testclassification.FlakeyTests;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
-import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
-import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({FlakeyTests.class, LargeTests.class})
-@Ignore // Needs to be rewritten for AMv2. Uses tricks not ordained when up on AMv2.
public class TestMasterFailover {
private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
- HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
- final HTableDescriptor htd)
- throws IOException {
- HRegion r = HBaseTestingUtility.createRegionAndWAL(hri, rootdir, c, htd);
- // The above call to create a region will create an wal file. Each
- // log file create will also create a running thread to do syncing. We need
- // to close out this log else we will have a running thread trying to sync
- // the file system continuously which is ugly when dfs is taken away at the
- // end of the test.
- HBaseTestingUtility.closeRegionAndWAL(r);
- return r;
- }
-
// TODO: Next test to add is with testing permutations of the RIT or the RS
// killed are hosting ROOT and hbase:meta regions.
@@ -92,7 +65,6 @@ public class TestMasterFailover {
* Starts with three masters. Kills a backup master. Then kills the active
* master. Ensures the final master becomes active and we can still contact
* the cluster.
- * @throws Exception
*/
@Test (timeout=240000)
public void testSimpleMasterFailover() throws Exception {
@@ -157,7 +129,7 @@ public class TestMasterFailover {
assertEquals(2, masterThreads.size());
int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
- assertEquals(4, rsCount);
+ assertEquals(3, rsCount);
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
@@ -190,143 +162,13 @@ public class TestMasterFailover {
int rss = status.getServersSize();
LOG.info("Active master " + mastername.getServerName() + " managing " +
rss + " region servers");
- assertEquals(4, rss);
+ assertEquals(3, rss);
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
/**
- * Test region in pending_open/close when master failover
- */
- @Test (timeout=180000)
- public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
- final int NUM_MASTERS = 1;
- final int NUM_RS = 1;
-
- // Create config to use for this cluster
- Configuration conf = HBaseConfiguration.create();
-
- // Start the cluster
- HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
- TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
- MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
- log("Cluster started");
-
- // get all the master threads
- List<MasterThread> masterThreads = cluster.getMasterThreads();
- assertEquals(1, masterThreads.size());
-
- // only one master thread, let's wait for it to be initialized
- assertTrue(cluster.waitForActiveAndReadyMaster());
- HMaster master = masterThreads.get(0).getMaster();
- assertTrue(master.isActiveMaster());
- assertTrue(master.isInitialized());
-
- // Create a table with a region online
- Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
- onlineTable.close();
- // Create a table in META, so it has a region offline
- HTableDescriptor offlineTable = new HTableDescriptor(
- TableName.valueOf(Bytes.toBytes("offlineTable")));
- offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
-
- FileSystem filesystem = FileSystem.get(conf);
- Path rootdir = FSUtils.getRootDir(conf);
- FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
- fstd.createTableDescriptor(offlineTable);
-
- HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
- createRegion(hriOffline, rootdir, conf, offlineTable);
- MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
-
- log("Regions in hbase:meta and namespace have been created");
-
- // at this point we only expect 3 regions to be assigned out
- // (catalogs and namespace, + 1 online region)
- assertEquals(3, cluster.countServedRegions());
- HRegionInfo hriOnline = null;
- try (RegionLocator locator =
- TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
- hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
- }
- RegionStates regionStates = master.getAssignmentManager().getRegionStates();
- RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
-
- // Put the online region in pending_close. It is actually already opened.
- // This is to simulate that the region close RPC is not sent out before failover
- RegionState oldState = regionStates.getRegionState(hriOnline);
- RegionState newState = new RegionState(hriOnline, State.CLOSING, oldState.getServerName());
- stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
- // Put the offline region in pending_open. It is actually not opened yet.
- // This is to simulate that the region open RPC is not sent out before failover
- oldState = new RegionState(hriOffline, State.OFFLINE);
- newState = new RegionState(hriOffline, State.OPENING, newState.getServerName());
- stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
- HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
- createRegion(failedClose, rootdir, conf, offlineTable);
- MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
-
- oldState = new RegionState(failedClose, State.CLOSING);
- newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
- stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
- HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
- createRegion(failedOpen, rootdir, conf, offlineTable);
- MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
-
- // Simulate a region transitioning to failed open when the region server reports the
- // transition as FAILED_OPEN
- oldState = new RegionState(failedOpen, State.OPENING);
- newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
- stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
- HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
- LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
- createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
- MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
-
- // Simulate a region transitioning to failed open when the master couldn't find a plan for
- // the region
- oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
- newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
- stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
- // Stop the master
- log("Aborting master");
- cluster.abortMaster(0);
- cluster.waitOnMaster(0);
- log("Master has aborted");
-
- // Start up a new master
- log("Starting up a new master");
- master = cluster.startMaster().getMaster();
- log("Waiting for master to be ready");
- cluster.waitForActiveAndReadyMaster();
- log("Master is ready");
-
- // Wait till no region in transition any more
- TEST_UTIL.waitUntilNoRegionsInTransition(60000);
-
- // Get new region states since master restarted
- regionStates = master.getAssignmentManager().getRegionStates();
-
- // Both pending_open (RPC sent/not yet) regions should be online
- assertTrue(regionStates.isRegionOnline(hriOffline));
- assertTrue(regionStates.isRegionOnline(hriOnline));
- assertTrue(regionStates.isRegionOnline(failedClose));
- assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
- assertTrue(regionStates.isRegionOnline(failedOpen));
-
- log("Done with verification, shutting down cluster");
-
- // Done, shutdown the cluster
- TEST_UTIL.shutdownMiniCluster();
- }
-
- /**
* Test meta in transition when master failover
*/
@Test(timeout = 180000)
@@ -361,9 +203,9 @@ public class TestMasterFailover {
// meta should remain where it was
RegionState metaState =
MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
- assertEquals("hbase:meta should be onlined on RS",
+ assertEquals("hbase:meta should be online on RS",
metaState.getServerName(), rs.getServerName());
- assertEquals("hbase:meta should be onlined on RS",
+ assertEquals("hbase:meta should be online on RS",
metaState.getState(), State.OPEN);
// Start up a new master
@@ -376,9 +218,9 @@ public class TestMasterFailover {
// ensure meta is still deployed on RS
metaState =
MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
- assertEquals("hbase:meta should be onlined on RS",
+ assertEquals("hbase:meta should be online on RS",
metaState.getServerName(), rs.getServerName());
- assertEquals("hbase:meta should be onlined on RS",
+ assertEquals("hbase:meta should be online on RS",
metaState.getState(), State.OPEN);
// Update meta state as OPENING, then kill master
@@ -408,9 +250,9 @@ public class TestMasterFailover {
metaState =
MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
- assertEquals("hbase:meta should be onlined on RS",
+ assertEquals("hbase:meta should be online on RS",
metaState.getServerName(), rs.getServerName());
- assertEquals("hbase:meta should be onlined on RS",
+ assertEquals("hbase:meta should be online on RS",
metaState.getState(), State.OPEN);
// Update meta state as CLOSING, then kill master
@@ -431,6 +273,7 @@ public class TestMasterFailover {
// Start up a new master
log("Starting up a new master");
activeMaster = cluster.startMaster().getMaster();
+ assertNotNull(activeMaster);
log("Waiting for master to be ready");
cluster.waitForActiveAndReadyMaster();
log("Master is ready");