You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/25 00:30:49 UTC
svn commit: r1188421 - in /hbase/branches/0.89-fb/src:
main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/regionserver/
main/java/org/apache/hadoop/hbase/replication/master/ main/java/org/apac...
Author: nspiegelberg
Date: Mon Oct 24 22:30:48 2011
New Revision: 1188421
URL: http://svn.apache.org/viewvc?rev=1188421&view=rev
Log:
First version of TestMasterFailover for hbase-89
**** 89 MASTER ONLY ****
Summary: An initial unit test with multiple masters for hbase-89. This is only
one test from the open source trunk's TestMasterFailover. Multiple changes are
being made in master code to be able to "kill" a master within a unit test
without shutting down the cluster.
Test Plan: Unit tests (including the new one), dev cluster.
Reviewers: kannan, kranganathan, nspiegelberg
Reviewed By: kannan
CC: hbase-eng@lists, mbautin, kannan, pkhemani
Differential Revision: 341948
Revert Plan: OK
Task ID: 558162
Added:
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/Chore.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RootScanner.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/Chore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/Chore.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/Chore.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/Chore.java Mon Oct 24 22:30:48 2011
@@ -38,9 +38,22 @@ import org.apache.hadoop.hbase.util.Slee
public abstract class Chore extends Thread {
private final Log LOG = LogFactory.getLog(this.getClass());
private final Sleeper sleeper;
+
+ /**
+ * This variable might belong to someone else, e.g. HMaster. Setting this
+ * variable might trigger cluster shutdown. To shut down this thread only,
+ * use {@link #threadStop}.
+ */
protected volatile AtomicBoolean stop;
/**
+ * Unlike {@link #stop}, this indicates that the current thread should shut
+ * down. We use this flag when the master requests a shutdown of base
+ * scanners, but we don't want to shut down the whole cluster.
+ */
+ private volatile boolean threadStop = false;
+
+ /**
* @param p Period at which we should run. Will be adjusted appropriately
* should we find work and it takes time to complete.
* @param s When this flag is set to true, this thread will cleanup and exit
@@ -59,7 +72,7 @@ public abstract class Chore extends Thre
public void run() {
try {
boolean initialChoreComplete = false;
- while (!this.stop.get()) {
+ while (!this.stop.get() && !threadStop) {
long startTime = System.currentTimeMillis();
try {
if (!initialChoreComplete) {
@@ -112,4 +125,9 @@ public abstract class Chore extends Thre
protected void sleep() {
this.sleeper.sleep();
}
+
+ protected void shutdownThisThread() {
+ threadStop = true;
+ }
+
}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java Mon Oct 24 22:30:48 2011
@@ -56,8 +56,8 @@ import org.apache.hadoop.hbase.util.JVMC
*/
public class LocalHBaseCluster {
static final Log LOG = LogFactory.getLog(LocalHBaseCluster.class);
- private final List<JVMClusterUtil.MasterThread> masterThreads =
- new CopyOnWriteArrayList<JVMClusterUtil.MasterThread>();
+ private final List<HMaster> masters =
+ new CopyOnWriteArrayList<HMaster>();
private final List<JVMClusterUtil.RegionServerThread> regionThreads =
new CopyOnWriteArrayList<JVMClusterUtil.RegionServerThread>();
private final static int DEFAULT_NO = 1;
@@ -147,20 +147,18 @@ public class LocalHBaseCluster {
return rst;
}
- public JVMClusterUtil.MasterThread addMaster() throws IOException {
- return addMaster(new Configuration(conf), this.masterThreads.size());
+ public HMaster addMaster() throws IOException {
+ return addMaster(new Configuration(conf), this.masters.size());
}
- public JVMClusterUtil.MasterThread addMaster(Configuration c, final int index)
+ public HMaster addMaster(Configuration c, final int index)
throws IOException {
// Create each master with its own Configuration instance so each has
// its HConnection instance rather than share (see HBASE_INSTANCES down in
// the guts of HConnectionManager.
- JVMClusterUtil.MasterThread mt =
- JVMClusterUtil.createMasterThread(c,
- this.masterClass, index);
- this.masterThreads.add(mt);
- return mt;
+ HMaster m = JVMClusterUtil.createMaster(c, this.masterClass, index);
+ this.masters.add(m);
+ return m;
}
/**
@@ -175,10 +173,41 @@ public class LocalHBaseCluster {
* @return the HMaster thread
*/
public HMaster getMaster() {
- if (masterThreads.size() != 1) {
+ if (masters.size() != 1) {
throw new AssertionError("one master expected");
}
- return this.masterThreads.get(0).getMaster();
+ return this.masters.get(0);
+ }
+
+ /**
+ * @return Read-only list of master threads.
+ */
+ public List<HMaster> getMasters() {
+ return Collections.unmodifiableList(this.masters);
+ }
+
+ /**
+ * Wait for the specified master to stop
+ * Removes this thread from list of running threads.
+ * @param serverNumber
+ * @return Name of master that just went down.
+ */
+ public String waitOnMasterStop(int serverNumber) {
+ HMaster master = masters.remove(serverNumber);
+ boolean interrupted = false;
+ while (master.isAlive()) {
+ try {
+ LOG.info("Waiting on " +
+ master.getServerName().toString());
+ master.join();
+ } catch (InterruptedException e) {
+ interrupted = true;
+ }
+ }
+ if (interrupted) {
+ Thread.currentThread().interrupt();
+ }
+ return master.getName();
}
/**
@@ -241,8 +270,8 @@ public class LocalHBaseCluster {
}
}
}
- if (this.masterThreads != null) {
- for (Thread t : this.masterThreads) {
+ if (this.masters != null) {
+ for (Thread t : this.masters) {
if (t.isAlive()) {
try {
t.join();
@@ -258,14 +287,14 @@ public class LocalHBaseCluster {
* Start the cluster.
*/
public void startup() throws IOException {
- JVMClusterUtil.startup(this.masterThreads, this.regionThreads);
+ JVMClusterUtil.startup(this.masters, this.regionThreads);
}
/**
* Shut down the mini HBase cluster
*/
public void shutdown() {
- JVMClusterUtil.shutdown(this.masterThreads, this.regionThreads);
+ JVMClusterUtil.shutdown(this.masters, this.regionThreads);
}
/**
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java Mon Oct 24 22:30:48 2011
@@ -611,6 +611,7 @@ abstract class BaseScanner extends Chore
public void interruptAndStop() {
synchronized(scannerLock){
if (isAlive()) {
+ shutdownThisThread();
super.interrupt();
LOG.info("Interrupted");
}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Mon Oct 24 22:30:48 2011
@@ -133,13 +133,24 @@ public class HMaster extends Thread impl
private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
private static final String LOCALITY_SNAPSHOT_FILE_NAME = "regionLocality-snapshot";
- // We start out with closed flag on. Its set to off after construction.
- // Use AtomicBoolean rather than plain boolean because we want other threads
- // able to set shutdown flag. Using AtomicBoolean can pass a reference
- // rather than have them have to know about the hosting Master class.
- final AtomicBoolean closed = new AtomicBoolean(true);
- // TODO: Is this separate flag necessary?
- private final AtomicBoolean shutdownRequested = new AtomicBoolean(false);
+ /**
+ * We start out with closed flag on. Its set to off after construction. Use
+ * AtomicBoolean rather than plain boolean because we want other threads able
+ * to set this flag and trigger a cluster shutdown. Using AtomicBoolean can
+ * pass a reference rather than have them have to know about the hosting
+ * Master class. This also has disadvantages, because this instance passed
+ * to another object can be confused with another thread's own shutdown flag.
+ */
+ private final AtomicBoolean closed = new AtomicBoolean(true);
+
+ /**
+ * This flag indicates that cluster shutdown has been requested. This is
+ * different from {@link #closed} in that it is initially false, but is
+ * set to true at shutdown and remains true from then on. For killing one
+ * instance of the master, see {@link #killed}.
+ */
+ private final AtomicBoolean clusterShutdownRequested =
+ new AtomicBoolean(false);
private final Configuration conf;
private final Path rootdir;
@@ -148,7 +159,7 @@ public class HMaster extends Thread impl
private final int numRetries;
// Metrics is set when we call run.
- private final MasterMetrics metrics;
+ private MasterMetrics metrics;
final Lock splitLogLock = new ReentrantLock();
final boolean distributedLogSplitting;
@@ -171,12 +182,12 @@ public class HMaster extends Thread impl
private final HServerAddress address;
private final ServerConnection connection;
- private final ServerManager serverManager;
- private final RegionManager regionManager;
+ private ServerManager serverManager;
+ private RegionManager regionManager;
private long lastFragmentationQuery = -1L;
private Map<String, Integer> fragmentation = null;
- private final RegionServerOperationQueue regionServerOperationQueue;
+ private RegionServerOperationQueue regionServerOperationQueue;
// True if this is the master that started the cluster.
boolean isClusterStartup;
@@ -187,9 +198,14 @@ public class HMaster extends Thread impl
private long applyPreferredAssignmentPeriod = 0l;
private long holdRegionForBestLocalityPeriod = 0l;
- // flag set after we become the active master (used for testing)
+ /** True if the master is being killed. No cluster shutdown is done. */
+ private volatile boolean killed = false;
+
+ /** Flag set after we become the active master (used for testing). */
private volatile boolean isActiveMaster = false;
+ public static final String MASTER_ID_CONF_KEY = "hbase.test.master.id";
+
/**
* Constructor
* @param conf configuration
@@ -202,7 +218,8 @@ public class HMaster extends Thread impl
// number of RS ephemeral nodes. RS ephemeral nodes are created only after
// the primary master has written the address to ZK. So this has to be done
// before we race to write our address to zookeeper.
- zooKeeperWrapper = ZooKeeperWrapper.createInstance(conf, HMaster.class.getName());
+ zooKeeperWrapper = ZooKeeperWrapper.createInstance(conf,
+ getZKWrapperName());
isClusterStartup = (zooKeeperWrapper.scanRSDirectory().size() == 0);
// Get my address and create an rpc server instance. The rpc-server port
@@ -254,12 +271,12 @@ public class HMaster extends Thread impl
// TODO: Bring up the UI to redirect to active Master.
zooKeeperWrapper.registerListener(this);
this.zkMasterAddressWatcher =
- new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
+ new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.clusterShutdownRequested);
zooKeeperWrapper.registerListener(zkMasterAddressWatcher);
// if we're a backup master, stall until a primary to writes his address
- if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
-
+ if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
+ HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
// ephemeral node expiry will be detected between about 40 to 60 seconds;
// plus add a little extra since only ZK leader can expire nodes, and
// leader maybe a little bit delayed in getting info about the pings.
@@ -275,26 +292,46 @@ public class HMaster extends Thread impl
throw new IOException("Interrupted waiting for master address");
}
}
+ }
+
+ private void stallIfBackupMaster() {
+ if (!this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address,
+ true)) {
+ LOG.info("Failed to write master address to ZooKeeper, not starting (" +
+ "closed=" + closed.get() + ")");
+ return;
+ }
- this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address, true);
isActiveMaster = true;
this.regionServerOperationQueue =
new RegionServerOperationQueue(this.conf, this.closed);
serverManager = new ServerManager(this);
-
// Start the unassigned watcher - which will create the unassigned region
// in ZK. This is needed before RegionManager() constructor tries to assign
// the root region.
- ZKUnassignedWatcher.start(this.conf, this);
+ try {
+ ZKUnassignedWatcher.start(this.conf, this);
+ } catch (IOException e) {
+ LOG.error("Failed to start ZK unassigned region watcher", e);
+ throw new RuntimeException(e);
+ }
+
// start the "close region" executor service
- HBaseEventType.RS2ZK_REGION_CLOSED.startMasterExecutorService(address.toString());
+ HBaseEventType.RS2ZK_REGION_CLOSED.startMasterExecutorService(
+ address.toString());
// start the "open region" executor service
- HBaseEventType.RS2ZK_REGION_OPENED.startMasterExecutorService(address.toString());
+ HBaseEventType.RS2ZK_REGION_OPENED.startMasterExecutorService(
+ address.toString());
// start the region manager
- regionManager = new RegionManager(this);
+ try {
+ regionManager = new RegionManager(this);
+ } catch (IOException e) {
+ LOG.error("Failed to instantiate region manager");
+ throw new RuntimeException(e);
+ }
setName(MASTER);
this.metrics = new MasterMetrics(MASTER, this.serverManager);
@@ -499,8 +536,8 @@ public class HMaster extends Thread impl
return this.fs;
}
- public AtomicBoolean getShutdownRequested() {
- return this.shutdownRequested;
+ public AtomicBoolean getClusterShutdownRequested() {
+ return this.clusterShutdownRequested;
}
AtomicBoolean getClosed() {
@@ -562,6 +599,12 @@ public class HMaster extends Thread impl
/** Main processing loop */
@Override
public void run() {
+ stallIfBackupMaster();
+
+ if (closed.get()) {
+ LOG.info("Master is closing, not starting the main loop");
+ return;
+ }
MonitoredTask startupStatus =
TaskMonitor.get().createStatus("Master startup");
startupStatus.setDescription("Master startup");
@@ -581,7 +624,7 @@ public class HMaster extends Thread impl
/* Main processing loop */
FINISHED: while (!this.closed.get()) {
// check if we should be shutting down
- if (this.shutdownRequested.get()) {
+ if (clusterShutdownRequested.get()) {
// The region servers won't all exit until we stop scanning the
// meta regions
this.regionManager.stopScanners();
@@ -613,14 +656,26 @@ public class HMaster extends Thread impl
}
startupStatus.cleanup();
- if (!this.shutdownRequested.get()) { // shutdown not by request
- shutdown(); // indicated that master is shutting down
- startShutdown(); // get started with shutdown: stop scanners etc.
+ if (!this.clusterShutdownRequested.get()) { // shutdown not by request
+ if (!killed) {
+ // This would trigger a cluster shutdown, so we are not doing this when
+ // the master is being "killed" in a unit test.
+ shutdown();
+ }
+
+ // Get started with shutdown: stop scanners, etc. This does not lead to
+ // a cluster shutdown.
+ startShutdown();
+ } else if (killed) {
+ startShutdown();
+ }
+
+ if (!killed) {
+ // Wait for all the remaining region servers to report in. Only doing
+ // this when the cluster is shutting down.
+ this.serverManager.letRegionServersShutdown();
}
- // Wait for all the remaining region servers to report in.
- this.serverManager.letRegionServersShutdown();
-
/*
* Clean up and close up shop
*/
@@ -633,7 +688,15 @@ public class HMaster extends Thread impl
}
}
this.rpcServer.stop();
- this.regionManager.stop();
+
+ if (killed) {
+ regionManager.joinThreads();
+ } else {
+ // Compared to the above, this will clear the RS directory. We are not
+ // doing that when the master is being "killed" in a unit test.
+ regionManager.stop();
+ }
+
this.zooKeeperWrapper.close();
HBaseExecutorService.shutdown();
LOG.info("HMaster main thread exiting");
@@ -965,7 +1028,7 @@ public class HMaster extends Thread impl
// splitLogManager must be started before starting rpcServer because
// region-servers dying will trigger log splitting
this.splitLogManager = new SplitLogManager(zooKeeperWrapper, conf,
- this.shutdownRequested, address.toString());
+ this.clusterShutdownRequested, address.toString());
this.splitLogManager.finishInitialization();
}
// Start the server so that region servers are running before we start
@@ -991,7 +1054,7 @@ public class HMaster extends Thread impl
}
/*
- * Start shutting down the master
+ * Start shutting down the master. This does NOT trigger a cluster shutdown.
*/
void startShutdown() {
this.closed.set(true);
@@ -1059,7 +1122,7 @@ public class HMaster extends Thread impl
@Override
public void shutdown() {
LOG.info("Cluster shutdown requested. Starting to quiesce servers");
- this.shutdownRequested.set(true);
+ this.clusterShutdownRequested.set(true);
this.zooKeeperWrapper.setClusterState(false);
if (splitLogManager != null) {
this.splitLogManager.stop();
@@ -1736,7 +1799,7 @@ public class HMaster extends Thread impl
cluster.startup();
} else {
HMaster master = constructMaster(masterClass, conf);
- if (master.shutdownRequested.get()) {
+ if (master.clusterShutdownRequested.get()) {
LOG.info("Won't bring the Master up as a shutdown is requested");
return;
}
@@ -1819,7 +1882,25 @@ public class HMaster extends Thread impl
}
public void stopMaster() {
+ LOG.info("Master stop requested, isActiveMaster=" + isActiveMaster);
closed.set(true);
+ // If we are a backup master, we need to interrupt wait
+ if (!isActiveMaster) {
+ zkMasterAddressWatcher.cancelMasterZNodeWait();
+ }
+ }
+
+ public void killMaster() {
+ killed = true;
+ stopMaster();
+ }
+
+ boolean isKilled() {
+ return killed;
+ }
+
+ String getZKWrapperName() {
+ return HMaster.class.getName() + conf.get(MASTER_ID_CONF_KEY, "");
}
public SplitLogManager getSplitLogManager() {
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java Mon Oct 24 22:30:48 2011
@@ -51,7 +51,7 @@ class MetaScanner extends BaseScanner {
* @param master
*/
public MetaScanner(HMaster master) {
- super(master, false, master.getShutdownRequested());
+ super(master, false, master.getClusterShutdownRequested());
}
// Don't retry if we get an error while scanning. Errors are most often
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java Mon Oct 24 22:30:48 2011
@@ -117,7 +117,7 @@ public class ProcessRegionOpen extends P
}
ZooKeeperWrapper zkWrapper =
ZooKeeperWrapper.getInstance(master.getConfiguration(),
- HMaster.class.getName());
+ master.getZKWrapperName());
zkWrapper.deleteUnassignedRegion(regionInfo.getEncodedName());
return true;
}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java Mon Oct 24 22:30:48 2011
@@ -201,7 +201,7 @@ public class RegionManager {
this.master = master;
this.zkWrapper =
- ZooKeeperWrapper.getInstance(conf, HMaster.class.getName());
+ ZooKeeperWrapper.getInstance(conf, master.getZKWrapperName());
this.maxAssignInOneGo = conf.getInt("hbase.regions.percheckin", 10);
this.loadBalancer = new LoadBalancer(conf);
@@ -242,7 +242,7 @@ public class RegionManager {
void reassignRootRegion() {
unsetRootRegion();
- if (!master.getShutdownRequested().get()) {
+ if (!master.getClusterShutdownRequested().get()) {
synchronized (regionsInTransition) {
String regionName = HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString();
byte[] data = null;
@@ -781,22 +781,29 @@ public class RegionManager {
/** Stop the region assigner */
public void stop() {
+ joinThreads();
+ master.getZooKeeperWrapper().clearRSDirectory();
+ master.getZooKeeperWrapper().close();
+ }
+
+ /**
+ * Terminate all threads but don't clean up any state.
+ */
+ public void joinThreads() {
try {
if (rootScannerThread.isAlive()) {
rootScannerThread.join(); // Wait for the root scanner to finish.
}
- } catch (Exception iex) {
+ } catch (InterruptedException iex) {
LOG.warn("root scanner", iex);
}
try {
if (metaScannerThread.isAlive()) {
metaScannerThread.join(); // Wait for meta scanner to finish.
}
- } catch(Exception iex) {
+ } catch (InterruptedException iex) {
LOG.warn("meta scanner", iex);
}
- master.getZooKeeperWrapper().clearRSDirectory();
- master.getZooKeeperWrapper().close();
}
/**
@@ -1342,7 +1349,7 @@ public class RegionManager {
*/
public void waitForRootRegionLocation() {
synchronized (rootRegionLocation) {
- while (!master.getShutdownRequested().get() &&
+ while (!master.getClusterShutdownRequested().get() &&
!master.isClosed() && rootRegionLocation.get() == null) {
// rootRegionLocation will be filled in when we get an 'open region'
// regionServerReport message from the HRegionServer that has been
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RootScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RootScanner.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RootScanner.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/RootScanner.java Mon Oct 24 22:30:48 2011
@@ -31,7 +31,7 @@ class RootScanner extends BaseScanner {
* @param master
*/
public RootScanner(HMaster master) {
- super(master, true, master.getShutdownRequested());
+ super(master, true, master.getClusterShutdownRequested());
}
/**
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Mon Oct 24 22:30:48 2011
@@ -147,13 +147,13 @@ public class ServerManager {
60 * 1000);
this.minimumServerCount = c.getInt("hbase.regions.server.count.min", 0);
this.serverMonitorThread = new ServerMonitor(metaRescanInterval,
- this.master.getShutdownRequested());
+ this.master.getClusterShutdownRequested());
String n = Thread.currentThread().getName();
Threads.setDaemonThreadRunning(this.serverMonitorThread,
n + ".serverMonitor");
this.oldLogCleaner = new OldLogsCleaner(
c.getInt("hbase.master.meta.thread.rescanfrequency",60 * 1000),
- this.master.getShutdownRequested(), c,
+ this.master.getClusterShutdownRequested(), c,
master.getFileSystem(), master.getOldLogDir());
Threads.setDaemonThreadRunning(oldLogCleaner,
n + ".oldLogCleaner");
@@ -282,7 +282,7 @@ public class ServerManager {
this.quiescedServers.incrementAndGet();
}
}
- if (this.master.getShutdownRequested().get()) {
+ if (this.master.getClusterShutdownRequested().get()) {
if (quiescedServers.get() >= serversToServerInfo.size()) {
// If the only servers we know about are meta servers, then we can
// proceed with shutdown
@@ -300,7 +300,7 @@ public class ServerManager {
return new HMsg [] {HMsg.REGIONSERVER_QUIESCE};
}
}
- if (this.master.isClosed()) {
+ if (this.master.isClosed() && !master.isKilled()) {
// Tell server to shut down if we are shutting down. This should
// happen after check of MSG_REPORT_EXITING above, since region server
// will send us one of these messages after it gets MSG_REGIONSERVER_STOP
@@ -395,7 +395,7 @@ public class ServerManager {
": MSG_REPORT_EXITING");
// Get all the regions the server was serving reassigned
// (if we are not shutting down).
- if (!master.closed.get()) {
+ if (!master.isClosed()) {
for (int i = 1; i < msgs.length; i++) {
LOG.info("Processing " + msgs[i] + " from " +
serverInfo.getServerName());
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java Mon Oct 24 22:30:48 2011
@@ -89,14 +89,16 @@ class ZKMasterAddressWatcher implements
* Wait for master address to be available. This sets a watch in ZooKeeper and
* blocks until the master address ZNode gets deleted.
*/
- public synchronized void waitForMasterAddressAvailability() {
- while (zookeeper.readMasterAddress(zookeeper) != null) {
+ private synchronized void waitForMasterAddressAvailability() {
+ while (!requestShutdown.get() &&
+ zookeeper.readMasterAddress(zookeeper) != null) {
try {
LOG.debug("Waiting for master address ZNode to be deleted " +
"(Also watching cluster state node)");
this.zookeeper.setClusterStateWatch();
wait();
} catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
}
}
}
@@ -112,7 +114,7 @@ class ZKMasterAddressWatcher implements
waitForMasterAddressAvailability();
// Check if we need to shutdown instead of taking control
if (this.requestShutdown.get()) {
- LOG.debug("Won't start Master because cluster is shuting down");
+ LOG.debug("Won't start Master because of requested shutdown");
return false;
}
if(this.zookeeper.writeMasterAddress(address)) {
@@ -133,4 +135,10 @@ class ZKMasterAddressWatcher implements
public void setZookeeper(ZooKeeperWrapper zookeeper) {
this.zookeeper = zookeeper;
}
+
+ synchronized void cancelMasterZNodeWait() {
+ requestShutdown.set(true);
+ notifyAll();
+ }
+
}
\ No newline at end of file
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java Mon Oct 24 22:30:48 2011
@@ -56,7 +56,7 @@ public class ZKUnassignedWatcher impleme
throws IOException {
this.serverName = master.getHServerAddress().toString();
this.serverManager = master.getServerManager();
- zkWrapper = ZooKeeperWrapper.getInstance(conf, HMaster.class.getName());
+ zkWrapper = ZooKeeperWrapper.getInstance(conf, master.getZKWrapperName());
String unassignedZNode = zkWrapper.getRegionInTransitionZNode();
// If the UNASSIGNED ZNode exists and this is a fresh cluster start, then
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java Mon Oct 24 22:30:48 2011
@@ -199,7 +199,7 @@ public class SplitLogWorker implements R
* try to grab every task that has been put up
*/
private void taskLoop() {
- while (true) {
+ while (!exitWorker) {
int seq_start = taskReadySeq;
List<String> paths = getTaskList();
if (paths == null) {
@@ -213,7 +213,7 @@ public class SplitLogWorker implements R
// don't call ZKSplitLog.getNodeName() because that will lead to
// double encoding of the path name
grabTask(watcher.getZNode(watcher.splitLogZNode, paths.get(idx)));
- if (exitWorker == true) {
+ if (exitWorker) {
return;
}
}
@@ -222,9 +222,10 @@ public class SplitLogWorker implements R
try {
taskReadyLock.wait();
} catch (InterruptedException e) {
- LOG.warn("SplitLogWorker inteurrpted while waiting for task," +
- " exiting", e);
- assert exitWorker == true;
+ LOG.info("SplitLogWorker interrupted while waiting for task," +
+ " exiting: " + e.toString() + (exitWorker ? "" :
+ " (ERROR: exitWorker is not set, exiting anyway)"));
+ exitWorker = true;
return;
}
}
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java Mon Oct 24 22:30:48 2011
@@ -49,11 +49,14 @@ public class ReplicationLogCleaner imple
private Configuration conf;
private ReplicationZookeeperWrapper zkHelper;
private Set<String> hlogs = new HashSet<String>();
+ private final String zkWrapperName;
/**
* Instantiates the cleaner, does nothing more.
*/
- public ReplicationLogCleaner() {}
+ public ReplicationLogCleaner(String zkWrapperName) {
+ this.zkWrapperName = zkWrapperName;
+ }
@Override
public boolean isLogDeletable(Path filePath) {
@@ -121,8 +124,7 @@ public class ReplicationLogCleaner imple
this.ttlCleaner.setConf(conf);
try {
this.zkHelper = new ReplicationZookeeperWrapper(
- ZooKeeperWrapper.createInstance(this.conf,
- HMaster.class.getName()),
+ ZooKeeperWrapper.createInstance(this.conf, zkWrapperName),
this.conf, new AtomicBoolean(true), null);
} catch (IOException e) {
LOG.error(e);
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java Mon Oct 24 22:30:48 2011
@@ -96,41 +96,7 @@ public class JVMClusterUtil {
}
/**
- * Datastructure to hold Master Thread and Master instance
- */
- public static class MasterThread extends Thread {
- private final HMaster master;
-
- public MasterThread(final HMaster m, final int index) {
- super(m, "Master:" + index);
- this.master = m;
- }
-
- /** @return the master */
- public HMaster getMaster() {
- return this.master;
- }
-
- /**
- * Block until the master has come online, indicating it is ready
- * to be used.
- */
- public void waitForServerOnline() {
- // The server is marked online after init begins but before race to become
- // the active master.
- while (!this.master.isMasterRunning() &&
- !this.master.getShutdownRequested().get()) {
- try {
- Thread.sleep(1000);
- } catch (InterruptedException e) {
- // continue waiting
- }
- }
- }
- }
-
- /**
- * Creates a {@link MasterThread}.
+ * Creates a {@link HMaster}.
* Call 'start' on the returned thread to make it run.
* @param c Configuration to use.
* @param hmc Class to create.
@@ -138,13 +104,16 @@ public class JVMClusterUtil {
* @throws IOException
* @return Master added.
*/
- public static JVMClusterUtil.MasterThread createMasterThread(
+ public static HMaster createMaster(
final Configuration c, final Class<? extends HMaster> hmc,
final int index)
throws IOException {
- HMaster server;
+ Configuration masterConf = new Configuration(c);
+ masterConf.setInt(HMaster.MASTER_ID_CONF_KEY, index);
+
+ HMaster master;
try {
- server = hmc.getConstructor(Configuration.class).newInstance(c);
+ master = hmc.getConstructor(Configuration.class).newInstance(masterConf);
} catch (InvocationTargetException ite) {
Throwable target = ite.getTargetException();
throw new RuntimeException("Failed construction of Master: " +
@@ -155,7 +124,7 @@ public class JVMClusterUtil {
ioe.initCause(e);
throw ioe;
}
- return new JVMClusterUtil.MasterThread(server, index);
+ return master;
}
/**
@@ -164,10 +133,10 @@ public class JVMClusterUtil {
* @param regionServers
* @return Address to use contacting master.
*/
- public static String startup(final List<JVMClusterUtil.MasterThread> masters,
+ public static String startup(final List<HMaster> masters,
final List<JVMClusterUtil.RegionServerThread> regionservers) throws IOException {
if (masters != null) {
- for (JVMClusterUtil.MasterThread t : masters) {
+ for (HMaster t : masters) {
t.start();
}
}
@@ -181,9 +150,9 @@ public class JVMClusterUtil {
}
// Wait for an active master
while (true) {
- for (JVMClusterUtil.MasterThread t : masters) {
- if (t.master.isActiveMaster()) {
- return t.master.getServerName().toString();
+ for (HMaster t : masters) {
+ if (t.isActiveMaster()) {
+ return t.getServerName().toString();
}
}
try {
@@ -198,15 +167,17 @@ public class JVMClusterUtil {
* @param master
* @param regionservers
*/
- public static void shutdown(final List<MasterThread> masters,
+ public static void shutdown(final List<HMaster> masters,
final List<RegionServerThread> regionservers) {
LOG.debug("Shutting down HBase Cluster");
if (masters != null) {
- for (JVMClusterUtil.MasterThread t : masters) {
- if (t.master.isActiveMaster()) {
- t.master.shutdown();
+ for (HMaster t : masters) {
+ if (t.isActiveMaster()) {
+ // This will trigger cluster shutdown.
+ t.shutdown();
} else {
- t.master.stopMaster();
+ // This will only stop this particular master.
+ t.stopMaster();
}
}
}
@@ -226,7 +197,7 @@ public class JVMClusterUtil {
}
if (masters != null) {
- for (JVMClusterUtil.MasterThread t : masters) {
+ for (HMaster t : masters) {
while (t.isAlive()) {
try {
t.join();
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Mon Oct 24 22:30:48 2011
@@ -165,8 +165,9 @@ public class ZooKeeperWrapper implements
// creates only one instance
public static ZooKeeperWrapper createInstance(Configuration conf, String name)
throws IOException {
- if (getInstance(conf, name) != null) {
- return getInstance(conf, name);
+ ZooKeeperWrapper zkw = getInstance(conf, name);
+ if (zkw != null) {
+ return zkw;
}
ZooKeeperWrapper.createLock.lock();
try {
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Mon Oct 24 22:30:48 2011
@@ -226,12 +226,11 @@ public class HBaseTestingUtility {
* Can only create one.
* @param dir Where to home your dfs cluster.
* @param servers How many DNs to start.
- * @throws Exception
* @see {@link #shutdownMiniDFSCluster()}
* @return The mini dfs cluster created.
*/
public MiniDFSCluster startMiniDFSCluster(int servers, final File dir)
- throws Exception {
+ throws IOException {
// This does the following to home the minidfscluster
// base_dir = new File(System.getProperty("test.build.data", "build/test/data"), "dfs/");
// Some tests also do this:
@@ -270,7 +269,7 @@ public class HBaseTestingUtility {
}
private MiniZooKeeperCluster startMiniZKCluster(final File dir)
- throws Exception {
+ throws IOException, InterruptedException {
if (this.zkCluster != null) {
throw new IOException("Cluster already running at " + dir);
}
@@ -298,7 +297,25 @@ public class HBaseTestingUtility {
* @see {@link #shutdownMiniDFSCluster()}
*/
public MiniHBaseCluster startMiniCluster() throws Exception {
- return startMiniCluster(1);
+ return startMiniCluster(1, 1);
+ }
+
+ /**
+ * Start up a minicluster of hbase, optionally dfs, and zookeeper.
+ * Modifies Configuration. Homes the cluster data directory under a random
+ * subdirectory in a directory under System property test.build.data.
+ * Directory is cleaned up on exit.
+ * @param numSlaves Number of slaves to start up. We'll start this many
+ * datanodes and regionservers. If numSlaves is > 1, then make sure
+ * hbase.regionserver.info.port is -1 (i.e. no ui per regionserver) otherwise
+ * bind errors.
+ * @throws Exception
+ * @see {@link #shutdownMiniCluster()}
+ * @return Mini hbase cluster instance created.
+ */
+ public MiniHBaseCluster startMiniCluster(final int numSlaves)
+ throws IOException, InterruptedException {
+ return startMiniCluster(1, numSlaves);
}
/**
@@ -306,16 +323,19 @@ public class HBaseTestingUtility {
* Modifies Configuration. Homes the cluster data directory under a random
* subdirectory in a directory under System property test.build.data.
* Directory is cleaned up on exit.
- * @param servers Number of servers to start up. We'll start this many
- * datanodes and regionservers. If servers is > 1, then make sure
+ * @param numMasters Number of masters to start up. We'll start this many
+ * hbase masters. If numMasters > 1, you can find the active/primary master
+ * with {@link MiniHBaseCluster#getMaster()}.
+ * @param numSlaves Number of slave servers to start up. We'll start this
+ * many datanodes and regionservers. If servers is > 1, then make sure
* hbase.regionserver.info.port is -1 (i.e. no ui per regionserver) otherwise
* bind errors.
* @throws Exception
* @see {@link #shutdownMiniCluster()}
* @return Mini hbase cluster instance created.
*/
- public MiniHBaseCluster startMiniCluster(final int servers)
- throws Exception {
+ public MiniHBaseCluster startMiniCluster(final int numMasters,
+ final int numSlaves) throws IOException, InterruptedException {
LOG.info("Starting up minicluster");
// If we already put up a cluster, fail.
isRunningCluster();
@@ -325,7 +345,7 @@ public class HBaseTestingUtility {
System.setProperty(TEST_DIRECTORY_KEY, this.clusterTestBuildDir.getPath());
// Bring up mini dfs cluster. This spews a bunch of warnings about missing
// scheme. Complaints are 'Scheme is undefined for build/test/data/dfs/name1'.
- startMiniDFSCluster(servers, this.clusterTestBuildDir);
+ startMiniDFSCluster(numSlaves, this.clusterTestBuildDir);
// Mangle conf so fs parameter points to minidfs we just started up
FileSystem fs = this.dfsCluster.getFileSystem();
@@ -344,7 +364,7 @@ public class HBaseTestingUtility {
this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
fs.mkdirs(hbaseRootdir);
FSUtils.setVersion(fs, hbaseRootdir);
- this.hbaseCluster = new MiniHBaseCluster(this.conf, servers);
+ this.hbaseCluster = new MiniHBaseCluster(this.conf, numMasters, numSlaves);
// Don't leave here till we've done a successful scan of the .META.
HTable t = new HTable(this.conf, HConstants.META_TABLE_NAME);
ResultScanner s = t.getScanner(new Scan());
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java?rev=1188421&r1=1188420&r2=1188421&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java Mon Oct 24 22:30:48 2011
@@ -509,4 +509,56 @@ public class MiniHBaseCluster {
throws IOException {
((MiniHBaseClusterMaster)getMaster()).addMessage(hrs.getHServerInfo(), msg);
}
+
+ /**
+ * @return List of master threads.
+ */
+ public List<HMaster> getMasters() {
+ return this.hbaseCluster.getMasters();
+ }
+
+ /**
+ * Kill the specified master cleanly. Does not result in a cluster shutdown.
+ *
+ * @param serverNumber Used as index into a list.
+ * @return the master that was stopped
+ */
+ public HMaster killMaster(int serverNumber) {
+ HMaster server = hbaseCluster.getMasters().get(serverNumber);
+ LOG.info("Killing master " + server.toString());
+ server.killMaster();
+ return server;
+ }
+
+ /**
+ * Wait for the specified master to stop. Removes this thread from list
+ * of running threads.
+ * @param serverNumber
+ * @return Name of master that just went down.
+ */
+ public String waitOnMasterStop(final int serverNumber) {
+ return this.hbaseCluster.waitOnMasterStop(serverNumber);
+ }
+
+ /**
+ * Blocks until there is an active master and that master has completed
+ * initialization.
+ *
+ * @return true if an active master becomes available. false if there are no
+ * masters left.
+ * @throws InterruptedException
+ */
+ public boolean waitForActiveAndReadyMaster() throws InterruptedException {
+ List<HMaster> masters;
+ while ((masters = getMasters()).size() > 0) {
+ for (HMaster master : masters) {
+ if (master.isActiveMaster()) {
+ return true;
+ }
+ }
+ Thread.sleep(200);
+ }
+ return false;
+ }
+
}
Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1188421&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Mon Oct 24 22:30:48 2011
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.junit.Test;
+
+public class TestMasterFailover {
+ private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
+
+ /**
+ * Simple test of master failover.
+ * <p>
+ * Starts with three masters. Kills a backup master. Then kills the active
+ * master. Ensures the final master becomes active and we can still contact
+ * the cluster.
+ * @throws Exception
+ */
+ @Test
+ public void testSimpleMasterFailover() throws Exception {
+
+ final int NUM_MASTERS = 3;
+ final int NUM_RS = 3;
+
+ // Start the cluster
+ HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+ MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+
+ // get all the master threads
+ List<HMaster> HMasters = cluster.getMasters();
+
+ // make sure all masters came online
+ for (HMaster mt : HMasters) {
+ assertTrue(mt.isAlive());
+ }
+
+ // verify only one is the active master and we have right number
+ int numActive = 0;
+ int activeIndex = -1;
+ String activeName = null;
+ for (int i = 0; i < HMasters.size(); i++) {
+ if (HMasters.get(i).isActiveMaster()) {
+ numActive++;
+ activeIndex = i;
+ activeName = HMasters.get(i).getServerName();
+ }
+ }
+ assertEquals(1, numActive);
+ assertEquals(NUM_MASTERS, HMasters.size());
+
+ // attempt to stop one of the inactive masters
+ int backupIndex = (activeIndex + 1) % HMasters.size();
+ LOG.debug("\n\nStopping backup master (#" + backupIndex + ")\n");
+ cluster.killMaster(backupIndex);
+ cluster.waitOnMasterStop(backupIndex);
+
+ // verify still one active master and it's the same
+ for (int i = 0; i < HMasters.size(); i++) {
+ if (HMasters.get(i).isActiveMaster()) {
+ assertTrue(activeName.equals(
+ HMasters.get(i).getServerName()));
+ activeIndex = i;
+ }
+ }
+ assertTrue(activeIndex != -1);
+ assertEquals(1, numActive);
+ assertEquals(2, HMasters.size());
+
+ // kill the active master
+ LOG.debug("\n\nStopping the active master (#" + activeIndex + ")\n");
+ cluster.killMaster(activeIndex);
+ cluster.waitOnMasterStop(activeIndex);
+
+ // wait for an active master to show up and be ready
+ assertTrue(cluster.waitForActiveAndReadyMaster());
+
+ LOG.debug("\n\nVerifying backup master is now active\n");
+ // should only have one master now
+ assertEquals(1, HMasters.size());
+ // and he should be active
+ assertTrue(HMasters.get(0).isActiveMaster());
+
+ // Stop the cluster
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+}