You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2012/05/18 19:44:05 UTC

svn commit: r1340185 - in /hbase/trunk: bin/ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/regionserver/ src/main/java/org/apache/hadoop/hbase/zookeeper/ src/test/java/org/apache/hadoop/hbase/zookeeper/

Author: stack
Date: Fri May 18 17:44:04 2012
New Revision: 1340185

URL: http://svn.apache.org/viewvc?rev=1340185&view=rev
Log:
HBASE-5926 Delete the master znode after a master crash

Modified:
    hbase/trunk/bin/hbase-daemon.sh
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java

Modified: hbase/trunk/bin/hbase-daemon.sh
URL: http://svn.apache.org/viewvc/hbase/trunk/bin/hbase-daemon.sh?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/bin/hbase-daemon.sh (original)
+++ hbase/trunk/bin/hbase-daemon.sh Fri May 18 17:44:04 2012
@@ -73,9 +73,13 @@ hbase_rotate_log ()
 
 cleanZNode() {
   if [ -f $HBASE_ZNODE_FILE ]; then
-    #call ZK to delete the node
-    ZNODE=`cat $HBASE_ZNODE_FILE`
-    $bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
+    if [ "$command" = "master" ]; then
+      $bin/hbase master clear > /dev/null 2>&1
+    else
+      #call ZK to delete the node
+      ZNODE=`cat $HBASE_ZNODE_FILE`
+      $bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
+    fi
     rm $HBASE_ZNODE_FILE
   fi
 }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java Fri May 18 17:44:04 2012
@@ -25,6 +25,7 @@ import java.util.concurrent.atomic.Atomi
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.ZNodeClearer;
 import org.apache.hadoop.hbase.DeserializationException;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
@@ -141,12 +142,17 @@ class ActiveMasterManager extends ZooKee
       try {
         String backupZNode =
             ZKUtil.joinZNode(this.watcher.backupMasterAddressesZNode, this.sn.toString());
-        if (MasterAddressTracker.setMasterAddress(this.watcher, this.watcher.getMasterAddressZNode(), this.sn)) {
+        if (MasterAddressTracker.setMasterAddress(this.watcher,
+            this.watcher.getMasterAddressZNode(), this.sn)) {
+
           // If we were a backup master before, delete our ZNode from the backup
           // master directory since we are the active now
           LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
           ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
 
+          // Save the znode in a file, this will allow to check if we crash in the launch scripts
+          ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
+
           // We are the master, return
           startupStatus.setStatus("Successfully registered as active master.");
           this.clusterHasActiveMaster.set(true);
@@ -189,6 +195,10 @@ class ActiveMasterManager extends ZooKee
               currentMaster + "; master was restarted? Deleting node.");
             // Hurry along the expiration of the znode.
             ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
+
+            // We may have failed to delete the znode at the previous step, but
+            //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
+            ZNodeClearer.deleteMyEphemeralNodeOnDisk();
           } else {
             msg = "Another master is the active master, " + currentMaster +
               "; waiting to become the next active master";
@@ -249,6 +259,9 @@ class ActiveMasterManager extends ZooKee
       }
       if (activeMaster != null &&  activeMaster.equals(this.sn)) {
         ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
+        // We may have failed to delete the znode at the previous step, but
+        //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
+        ZNodeClearer.deleteMyEphemeralNodeOnDisk();
       }
     } catch (KeeperException e) {
       LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java Fri May 18 17:44:04 2012
@@ -31,6 +31,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ZNodeClearer;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.LocalHBaseCluster;
 import org.apache.hadoop.hbase.MasterNotRunningException;
@@ -47,9 +48,10 @@ public class HMasterCommandLine extends 
   private static final Log LOG = LogFactory.getLog(HMasterCommandLine.class);
 
   private static final String USAGE =
-    "Usage: Master [opts] start|stop\n" +
+    "Usage: Master [opts] start|stop|clear\n" +
     " start  Start Master. If local mode, start Master and RegionServer in same JVM\n" +
     " stop   Start cluster shutdown; Master signals RegionServer shutdown\n" +
+    " clear  Delete the master znode in ZooKeeper after a master crashes\n "+
     " where [opts] are:\n" +
     "   --minServers=<servers>    Minimum RegionServers needed to host user tables.\n" +
     "   --backup                  Master should start in backup mode";
@@ -105,6 +107,8 @@ public class HMasterCommandLine extends 
       return startMaster();
     } else if ("stop".equals(command)) {
       return stopMaster();
+    } else if ("clear".equals(command)) {
+      return (ZNodeClearer.clear(getConf()) ? 0 : -1);
     } else {
       usage("Invalid command: " + command);
       return -1;

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Fri May 18 17:44:04 2012
@@ -19,9 +19,6 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.StringWriter;
 import java.lang.Thread.UncaughtExceptionHandler;
@@ -82,6 +79,7 @@ import org.apache.hadoop.hbase.TableDesc
 import org.apache.hadoop.hbase.UnknownRowLockException;
 import org.apache.hadoop.hbase.UnknownScannerException;
 import org.apache.hadoop.hbase.YouAreDeadException;
+import org.apache.hadoop.hbase.ZNodeClearer;
 import org.apache.hadoop.hbase.catalog.CatalogTracker;
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.catalog.MetaReader;
@@ -865,7 +863,7 @@ public class  HRegionServer implements C
     }
     // We may have failed to delete the znode at the previous step, but
     //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
-    deleteMyEphemeralNodeOnDisk();
+    ZNodeClearer.deleteMyEphemeralNodeOnDisk();
     this.zooKeeper.close();
     LOG.info("stopping server " + this.serverNameFromMasterPOV +
       "; zookeeper connection closed.");
@@ -1054,7 +1052,7 @@ public class  HRegionServer implements C
       createMyEphemeralNode();
 
       // Save it in a file, this will allow to see if we crash
-      writeMyEphemeralNodeOnDisk();
+      ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
 
       // Master sent us hbase.rootdir to use. Should be fully qualified
       // path with file system specification included. Set 'fs.defaultFS'
@@ -1086,52 +1084,11 @@ public class  HRegionServer implements C
     }
   }
 
-  private String getMyEphemeralNodePath() {
-    return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
-  }
-
-  private String getMyEphemeralNodeFileName() {
-    return System.getenv().get("HBASE_ZNODE_FILE");
-  }
-
   private void createMyEphemeralNode() throws KeeperException {
     ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
       HConstants.EMPTY_BYTE_ARRAY);
   }
 
-  private void writeMyEphemeralNodeOnDisk() throws IOException {
-    String fileName = getMyEphemeralNodeFileName();
-
-    if (fileName == null) {
-      LOG.warn("No filename given to save the znode used, it won't be saved " +
-          "(Environment variable HBASE_ZNODE_FILE is not set).");
-      return;
-    }
-
-    FileWriter fstream = new FileWriter(fileName);
-    BufferedWriter out = new BufferedWriter(fstream);
-    try {
-      out.write(getMyEphemeralNodePath() + "\n");
-    } finally {
-      try {
-        out.close();
-      } finally {
-        fstream.close();
-      }
-    }
-  }
-
-  private void deleteMyEphemeralNodeOnDisk(){
-        String fileName = getMyEphemeralNodeFileName();
-
-        if (fileName == null){
-         return;
-       }
-
-        File f = new File(fileName);
-       f.delete();
-      }
-
   private void deleteMyEphemeralNode() throws KeeperException {
     ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
   }
@@ -3915,4 +3872,8 @@ public class  HRegionServer implements C
       return stoppable.isStopped();
     }
   }
+
+  private String getMyEphemeralNodePath() {
+    return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
+  }
 }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java Fri May 18 17:44:04 2012
@@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.protobuf.
 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
 import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
 
 /**
  * Manages the location of the current active Master for the RegionServer.
@@ -153,4 +154,28 @@ public class MasterAddressTracker extend
      mbuilder.setMaster(snbuilder.build());
      return ProtobufUtil.prependPBMagic(mbuilder.build().toByteArray());
    }
+
+  /**
+   * delete the master znode if its content is same as the parameter
+   */
+  public static boolean deleteIfEquals(ZooKeeperWatcher zkw, final String content) {
+    if (content == null){
+      throw new IllegalArgumentException("Content must not be null");
+    }
+
+    try {
+      Stat stat = new Stat();
+      byte[] data = ZKUtil.getDataNoWatch(zkw, zkw.getMasterAddressZNode(), stat);
+      ServerName sn = ServerName.parseFrom(data);
+      if (sn != null && content.equals(sn.toString())) {
+        return (ZKUtil.deleteNode(zkw, zkw.getMasterAddressZNode(), stat.getVersion()));
+      }
+    } catch (KeeperException e) {
+      LOG.warn("Can't get or delete the master znode", e);
+    } catch (DeserializationException e) {
+      LOG.warn("Can't get or delete the master znode", e);
+    }
+
+    return false;
+  }
 }
\ No newline at end of file

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java?rev=1340185&r1=1340184&r2=1340185&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java Fri May 18 17:44:04 2012
@@ -26,16 +26,20 @@ import static org.junit.Assert.assertNul
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
+import java.lang.reflect.Method;
 import java.util.Random;
 import java.util.concurrent.Semaphore;
 
+import junit.framework.Assert;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.master.TestActiveMasterManager.NodeDeletionListener;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.ZooDefs.Ids;
@@ -313,6 +317,38 @@ public class TestZooKeeperNodeTracker {
     public void process(WatchedEvent event) {}
   }
 
+  @Test
+  public void testCleanZNode() throws Exception {
+    ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
+        "testNodeTracker", new TestZooKeeperNodeTracker.StubAbortable());
+
+    final ServerName sn = new ServerName("127.0.0.1:52",45L);
+
+    ZKUtil.createAndFailSilent(zkw,
+        TEST_UTIL.getConfiguration().get(HConstants.ZOOKEEPER_ZNODE_PARENT,
+            HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT));
+
+    final String nodeName =  zkw.getMasterAddressZNode();
+
+    // Check that we manage the case when there is no data
+    ZKUtil.createAndFailSilent(zkw, nodeName);
+    MasterAddressTracker.deleteIfEquals(zkw, sn.toString());
+    Assert.assertFalse(ZKUtil.getData(zkw, nodeName) == null);
+
+    // Check that we don't delete if we're not supposed to
+    ZKUtil.setData(zkw, nodeName, MasterAddressTracker.toByteArray(sn));
+    MasterAddressTracker.deleteIfEquals(zkw, new ServerName("127.0.0.2:52",45L).toString());
+    Assert.assertFalse(ZKUtil.getData(zkw, nodeName) == null);
+
+    // Check that we delete when we're supposed to
+    ZKUtil.setData(zkw, nodeName,MasterAddressTracker.toByteArray(sn));
+    MasterAddressTracker.deleteIfEquals(zkw, sn.toString());
+    Assert.assertTrue( ZKUtil.getData(zkw, nodeName)== null );
+
+    // Check that we support the case when the znode does not exist
+    MasterAddressTracker.deleteIfEquals(zkw, sn.toString()); // must not throw an exception
+  }
+
   @org.junit.Rule
   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();