You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jd...@apache.org on 2009/10/29 02:24:03 UTC
svn commit: r830820 - in /hadoop/hbase/trunk: ./
src/java/org/apache/hadoop/hbase/master/ src/test/org/apache/hadoop/hbase/
Author: jdcryans
Date: Thu Oct 29 01:24:03 2009
New Revision: 830820
URL: http://svn.apache.org/viewvc?rev=830820&view=rev
Log:
HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Oct 29 01:24:03 2009
@@ -143,6 +143,7 @@
HBASE-1756 Refactor HLog (changing package first)
HBASE-1926 Remove unused xmlenc jar from trunk
HBASE-1936 HLog group commit
+ HBASE-1921 When the Master's session times out and there's only one, cluster is wedged
OPTIMIZATIONS
HBASE-410 [testing] Speed up the test suite
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Thu Oct 29 01:24:03 2009
@@ -126,7 +126,7 @@
// Metrics is set when we call run.
private final MasterMetrics metrics;
// Our zk client.
- private final ZooKeeperWrapper zooKeeperWrapper;
+ private ZooKeeperWrapper zooKeeperWrapper;
// Watcher for master address and for cluster shutdown.
private final ZKMasterAddressWatcher zkMasterAddressWatcher;
// A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo.
@@ -187,7 +187,7 @@
this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
this.zkMasterAddressWatcher =
new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
- this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address);
+ this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address, true);
serverManager = new ServerManager(this);
regionManager = new RegionManager(this);
@@ -1131,8 +1131,26 @@
(event.getType().equals(EventType.NodeDeleted) &&
event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
!shutdownRequested.get()) {
- LOG.error("Master lost its znode, killing itself now");
- System.exit(1);
+
+ LOG.info("Master lost its znode, trying to get a new one");
+
+ // Can we still be the master? If not, goodbye
+
+ zooKeeperWrapper.close();
+ try {
+ zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
+ this.zkMasterAddressWatcher.setZookeeper(zooKeeperWrapper);
+ if(!this.zkMasterAddressWatcher.
+ writeAddressToZooKeeper(this.address,false)) {
+ throw new Exception("Another Master is currently active");
+ }
+
+ // Verify the cluster to see if anything happened while we were away
+ joinCluster();
+ } catch (Exception e) {
+ LOG.error("Killing master because of", e);
+ System.exit(1);
+ }
}
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Thu Oct 29 01:24:03 2009
@@ -125,8 +125,6 @@
regionsToFlush = Collections.synchronizedSortedMap(
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
(Bytes.BYTES_COMPARATOR));
-
- private final ZooKeeperWrapper zooKeeperWrapper;
private final int zooKeeperNumRetries;
private final int zooKeeperPause;
@@ -143,7 +141,6 @@
// Scans the meta table
metaScannerThread = new MetaScanner(master);
- zooKeeperWrapper = master.getZooKeeperWrapper();
zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES);
zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE);
@@ -602,8 +599,8 @@
} catch(Exception iex) {
LOG.warn("meta scanner", iex);
}
- zooKeeperWrapper.clearRSDirectory();
- zooKeeperWrapper.close();
+ master.getZooKeeperWrapper().clearRSDirectory();
+ master.getZooKeeperWrapper().close();
}
/**
@@ -1121,7 +1118,7 @@
private void writeRootRegionLocationToZooKeeper(HServerAddress address) {
for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) {
- if (zooKeeperWrapper.writeRootRegionLocation(address)) {
+ if (master.getZooKeeperWrapper().writeRootRegionLocation(address)) {
return;
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java Thu Oct 29 01:24:03 2009
@@ -41,13 +41,14 @@
*/
class ZKMasterAddressWatcher implements Watcher {
private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class);
- private final ZooKeeperWrapper zookeeper;
+
+ private ZooKeeperWrapper zookeeper;
private final AtomicBoolean requestShutdown;
/**
* Create this watcher using passed ZooKeeperWrapper instance.
* @param zk ZooKeeper
- * @param requestShutdown Flag to set to request shutdown.
+ * @param flag Flag to set to request shutdown.
*/
ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) {
this.requestShutdown = flag;
@@ -98,17 +99,30 @@
* address (or until cluster shutdown).
* @param address Address whose format is HServerAddress.toString
*/
- void writeAddressToZooKeeper(final HServerAddress address) {
- while (true) {
+ boolean writeAddressToZooKeeper(
+ final HServerAddress address, boolean retry) {
+ do {
waitForMasterAddressAvailability();
// Check if we need to shutdown instead of taking control
- if (this.requestShutdown.get()) return;
+ if (this.requestShutdown.get()) {
+ LOG.debug("Won't start Master because cluster is shuting down");
+ return false;
+ }
if(this.zookeeper.writeMasterAddress(address)) {
this.zookeeper.setClusterState(true);
// Watch our own node
this.zookeeper.readMasterAddress(this);
- return;
+ return true;
}
- }
+ } while(retry);
+ return false;
+ }
+
+ /**
+ * Reset the ZK in case a new connection is required
+ * @param zookeeper new instance
+ */
+ public void setZookeeper(ZooKeeperWrapper zookeeper) {
+ this.zookeeper = zookeeper;
}
}
\ No newline at end of file
Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java Thu Oct 29 01:24:03 2009
@@ -95,41 +95,46 @@
connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
}
- public void testRegionServerSessionExpired() {
- try {
- this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
- new HTable(conf, HConstants.META_TABLE_NAME);
-
- ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
- String quorumServers = zkw.getQuorumServers();
- int sessionTimeout = 5 * 1000; // 5 seconds
-
- HRegionServer rs = cluster.getRegionServer(0);
- ZooKeeperWrapper rsZK = rs.getZooKeeperWrapper();
- long sessionID = rsZK.getSessionID();
- byte[] password = rsZK.getSessionPassword();
-
- ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance, sessionID, password);
- zk.close();
+ public void testRegionServerSessionExpired() throws Exception{
+ this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
+ new HTable(conf, HConstants.META_TABLE_NAME);
+ HRegionServer rs = cluster.getRegionServer(0);
+ sessionExpirationHelper(rs.getZooKeeperWrapper());
+ }
- Thread.sleep(sessionTimeout * 3L);
+ public void testMasterSessionExpired() throws Exception {
+ new HTable(conf, HConstants.META_TABLE_NAME);
+ HMaster master = cluster.getMaster();
+ sessionExpirationHelper(master.getZooKeeperWrapper());
+ }
+
+ public void sessionExpirationHelper(ZooKeeperWrapper nodeZK) throws Exception{
+ ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
+ String quorumServers = zkw.getQuorumServers();
+ int sessionTimeout = 5 * 1000; // 5 seconds
+
+ byte[] password = nodeZK.getSessionPassword();
+ long sessionID = nodeZK.getSessionID();
+
+ ZooKeeper zk = new ZooKeeper(quorumServers,
+ sessionTimeout, EmptyWatcher.instance, sessionID, password);
+ zk.close();
+
+ Thread.sleep(sessionTimeout * 3L);
+
+ new HTable(conf, HConstants.META_TABLE_NAME);
+
+ HBaseAdmin admin = new HBaseAdmin(conf);
+ HTableDescriptor desc = new HTableDescriptor("test");
+ HColumnDescriptor family = new HColumnDescriptor("fam");
+ desc.addFamily(family);
+ admin.createTable(desc);
+
+ HTable table = new HTable("test");
+ Put put = new Put(Bytes.toBytes("testrow"));
+ put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
+ table.put(put);
- new HTable(conf, HConstants.META_TABLE_NAME);
-
- HBaseAdmin admin = new HBaseAdmin(conf);
- HTableDescriptor desc = new HTableDescriptor("test");
- HColumnDescriptor family = new HColumnDescriptor("fam");
- desc.addFamily(family);
- admin.createTable(desc);
-
- HTable table = new HTable("test");
- Put put = new Put(Bytes.toBytes("testrow"));
- put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
- table.put(put);
- } catch (Exception e) {
- e.printStackTrace();
- fail();
- }
}
public void testMultipleZK() {