You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2012/05/18 19:44:05 UTC
svn commit: r1340185 - in /hbase/trunk: bin/
src/main/java/org/apache/hadoop/hbase/master/
src/main/java/org/apache/hadoop/hbase/regionserver/
src/main/java/org/apache/hadoop/hbase/zookeeper/
src/test/java/org/apache/hadoop/hbase/zookeeper/
Author: stack
Date: Fri May 18 17:44:04 2012
New Revision: 1340185
URL: http://svn.apache.org/viewvc?rev=1340185&view=rev
Log:
HBASE-5926 Delete the master znode after a master crash
Modified:
hbase/trunk/bin/hbase-daemon.sh
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
Modified: hbase/trunk/bin/hbase-daemon.sh
URL: http://svn.apache.org/viewvc/hbase/trunk/bin/hbase-daemon.sh?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/bin/hbase-daemon.sh (original)
+++ hbase/trunk/bin/hbase-daemon.sh Fri May 18 17:44:04 2012
@@ -73,9 +73,13 @@ hbase_rotate_log ()
cleanZNode() {
if [ -f $HBASE_ZNODE_FILE ]; then
- #call ZK to delete the node
- ZNODE=`cat $HBASE_ZNODE_FILE`
- $bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
+ if [ "$command" = "master" ]; then
+ $bin/hbase master clear > /dev/null 2>&1
+ else
+ #call ZK to delete the node
+ ZNODE=`cat $HBASE_ZNODE_FILE`
+ $bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
+ fi
rm $HBASE_ZNODE_FILE
fi
}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java Fri May 18 17:44:04 2012
@@ -25,6 +25,7 @@ import java.util.concurrent.atomic.Atomi
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.DeserializationException;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
@@ -141,12 +142,17 @@ class ActiveMasterManager extends ZooKee
try {
String backupZNode =
ZKUtil.joinZNode(this.watcher.backupMasterAddressesZNode, this.sn.toString());
- if (MasterAddressTracker.setMasterAddress(this.watcher, this.watcher.getMasterAddressZNode(), this.sn)) {
+ if (MasterAddressTracker.setMasterAddress(this.watcher,
+ this.watcher.getMasterAddressZNode(), this.sn)) {
+
// If we were a backup master before, delete our ZNode from the backup
// master directory since we are the active now
LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
+ // Save the znode in a file, this will allow to check if we crash in the launch scripts
+ ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
+
// We are the master, return
startupStatus.setStatus("Successfully registered as active master.");
this.clusterHasActiveMaster.set(true);
@@ -189,6 +195,10 @@ class ActiveMasterManager extends ZooKee
currentMaster + "; master was restarted? Deleting node.");
// Hurry along the expiration of the znode.
ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
+
+ // We may have failed to delete the znode at the previous step, but
+ // we delete the file anyway: a second attempt to delete the znode is likely to fail again.
+ ZNodeClearer.deleteMyEphemeralNodeOnDisk();
} else {
msg = "Another master is the active master, " + currentMaster +
"; waiting to become the next active master";
@@ -249,6 +259,9 @@ class ActiveMasterManager extends ZooKee
}
if (activeMaster != null && activeMaster.equals(this.sn)) {
ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
+ // We may have failed to delete the znode at the previous step, but
+ // we delete the file anyway: a second attempt to delete the znode is likely to fail again.
+ ZNodeClearer.deleteMyEphemeralNodeOnDisk();
}
} catch (KeeperException e) {
LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java Fri May 18 17:44:04 2012
@@ -31,6 +31,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.LocalHBaseCluster;
import org.apache.hadoop.hbase.MasterNotRunningException;
@@ -47,9 +48,10 @@ public class HMasterCommandLine extends
private static final Log LOG = LogFactory.getLog(HMasterCommandLine.class);
private static final String USAGE =
- "Usage: Master [opts] start|stop\n" +
+ "Usage: Master [opts] start|stop|clear\n" +
" start Start Master. If local mode, start Master and RegionServer in same JVM\n" +
" stop Start cluster shutdown; Master signals RegionServer shutdown\n" +
+ " clear Delete the master znode in ZooKeeper after a master crashes\n "+
" where [opts] are:\n" +
" --minServers=<servers> Minimum RegionServers needed to host user tables.\n" +
" --backup Master should start in backup mode";
@@ -105,6 +107,8 @@ public class HMasterCommandLine extends
return startMaster();
} else if ("stop".equals(command)) {
return stopMaster();
+ } else if ("clear".equals(command)) {
+ return (ZNodeClearer.clear(getConf()) ? 0 : -1);
} else {
usage("Invalid command: " + command);
return -1;
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Fri May 18 17:44:04 2012
@@ -19,9 +19,6 @@
*/
package org.apache.hadoop.hbase.regionserver;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.Thread.UncaughtExceptionHandler;
@@ -82,6 +79,7 @@ import org.apache.hadoop.hbase.TableDesc
import org.apache.hadoop.hbase.UnknownRowLockException;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.YouAreDeadException;
+import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.MetaReader;
@@ -865,7 +863,7 @@ public class HRegionServer implements C
}
// We may have failed to delete the znode at the previous step, but
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
- deleteMyEphemeralNodeOnDisk();
+ ZNodeClearer.deleteMyEphemeralNodeOnDisk();
this.zooKeeper.close();
LOG.info("stopping server " + this.serverNameFromMasterPOV +
"; zookeeper connection closed.");
@@ -1054,7 +1052,7 @@ public class HRegionServer implements C
createMyEphemeralNode();
// Save it in a file, this will allow to see if we crash
- writeMyEphemeralNodeOnDisk();
+ ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
// Master sent us hbase.rootdir to use. Should be fully qualified
// path with file system specification included. Set 'fs.defaultFS'
@@ -1086,52 +1084,11 @@ public class HRegionServer implements C
}
}
- private String getMyEphemeralNodePath() {
- return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
- }
-
- private String getMyEphemeralNodeFileName() {
- return System.getenv().get("HBASE_ZNODE_FILE");
- }
-
private void createMyEphemeralNode() throws KeeperException {
ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
HConstants.EMPTY_BYTE_ARRAY);
}
- private void writeMyEphemeralNodeOnDisk() throws IOException {
- String fileName = getMyEphemeralNodeFileName();
-
- if (fileName == null) {
- LOG.warn("No filename given to save the znode used, it won't be saved " +
- "(Environment variable HBASE_ZNODE_FILE is not set).");
- return;
- }
-
- FileWriter fstream = new FileWriter(fileName);
- BufferedWriter out = new BufferedWriter(fstream);
- try {
- out.write(getMyEphemeralNodePath() + "\n");
- } finally {
- try {
- out.close();
- } finally {
- fstream.close();
- }
- }
- }
-
- private void deleteMyEphemeralNodeOnDisk(){
- String fileName = getMyEphemeralNodeFileName();
-
- if (fileName == null){
- return;
- }
-
- File f = new File(fileName);
- f.delete();
- }
-
private void deleteMyEphemeralNode() throws KeeperException {
ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
}
@@ -3915,4 +3872,8 @@ public class HRegionServer implements C
return stoppable.isStopped();
}
}
+
+ private String getMyEphemeralNodePath() {
+ return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
+ }
}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java Fri May 18 17:44:04 2012
@@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.protobuf.
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
/**
* Manages the location of the current active Master for the RegionServer.
@@ -153,4 +154,28 @@ public class MasterAddressTracker extend
mbuilder.setMaster(snbuilder.build());
return ProtobufUtil.prependPBMagic(mbuilder.build().toByteArray());
}
+
+ /**
+ * delete the master znode if its content is same as the parameter
+ */
+ public static boolean deleteIfEquals(ZooKeeperWatcher zkw, final String content) {
+ if (content == null){
+ throw new IllegalArgumentException("Content must not be null");
+ }
+
+ try {
+ Stat stat = new Stat();
+ byte[] data = ZKUtil.getDataNoWatch(zkw, zkw.getMasterAddressZNode(), stat);
+ ServerName sn = ServerName.parseFrom(data);
+ if (sn != null && content.equals(sn.toString())) {
+ return (ZKUtil.deleteNode(zkw, zkw.getMasterAddressZNode(), stat.getVersion()));
+ }
+ } catch (KeeperException e) {
+ LOG.warn("Can't get or delete the master znode", e);
+ } catch (DeserializationException e) {
+ LOG.warn("Can't get or delete the master znode", e);
+ }
+
+ return false;
+ }
}
\ No newline at end of file
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java Fri May 18 17:44:04 2012
@@ -26,16 +26,20 @@ import static org.junit.Assert.assertNul
import static org.junit.Assert.assertTrue;
import java.io.IOException;
+import java.lang.reflect.Method;
import java.util.Random;
import java.util.concurrent.Semaphore;
+import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.master.TestActiveMasterManager.NodeDeletionListener;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs.Ids;
@@ -313,6 +317,38 @@ public class TestZooKeeperNodeTracker {
public void process(WatchedEvent event) {}
}
+ @Test
+ public void testCleanZNode() throws Exception {
+ ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
+ "testNodeTracker", new TestZooKeeperNodeTracker.StubAbortable());
+
+ final ServerName sn = new ServerName("127.0.0.1:52",45L);
+
+ ZKUtil.createAndFailSilent(zkw,
+ TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_ZNODE_PARENT,
+ HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT));
+
+ final String nodeName = zkw.getMasterAddressZNode();
+
+ // Check that we manage the case when there is no data
+ ZKUtil.createAndFailSilent(zkw, nodeName);
+ MasterAddressTracker.deleteIfEquals(zkw, sn.toString());
+ Assert.assertFalse(ZKUtil.getData(zkw, nodeName) == null);
+
+ // Check that we don't delete if we're not supposed to
+ ZKUtil.setData(zkw, nodeName, MasterAddressTracker.toByteArray(sn));
+ MasterAddressTracker.deleteIfEquals(zkw, new ServerName("127.0.0.2:52",45L).toString());
+ Assert.assertFalse(ZKUtil.getData(zkw, nodeName) == null);
+
+ // Check that we delete when we're supposed to
+ ZKUtil.setData(zkw, nodeName,MasterAddressTracker.toByteArray(sn));
+ MasterAddressTracker.deleteIfEquals(zkw, sn.toString());
+ Assert.assertTrue( ZKUtil.getData(zkw, nodeName)== null );
+
+ // Check that we support the case when the znode does not exist
+ MasterAddressTracker.deleteIfEquals(zkw, sn.toString()); // must not throw an exception
+ }
+
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();