You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2012/04/23 22:29:45 UTC

svn commit: r1329430 - in /hbase/trunk: bin/hbase-daemon.sh src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Author: stack
Date: Mon Apr 23 20:29:45 2012
New Revision: 1329430

URL: http://svn.apache.org/viewvc?rev=1329430&view=rev
Log:
HBASE-5844 Delete the region servers znode after a regions server crash

Modified:
    hbase/trunk/bin/hbase-daemon.sh
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hbase/trunk/bin/hbase-daemon.sh
URL: http://svn.apache.org/viewvc/hbase/trunk/bin/hbase-daemon.sh?rev=1329430&r1=1329429&r2=1329430&view=diff
==============================================================================
--- hbase/trunk/bin/hbase-daemon.sh (original)
+++ hbase/trunk/bin/hbase-daemon.sh Mon Apr 23 20:29:45 2012
@@ -32,6 +32,15 @@
 #
 # Modelled after $HADOOP_HOME/bin/hadoop-daemon.sh
 
+cleanZNode() {
+  if [ -f $HBASE_ZNODE_FILE ]; then
+    #call ZK to delete the node
+    ZNODE=`cat $HBASE_ZNODE_FILE`
+    echo "Region Server $HBASE_ZNODE_FILE didn't stop properly. Cleaning ZNode ($ZNODE) to trigger an immediate recovery."
+    $bin/hbase zkcli delete $ZNODE > /dev/null 2>&1
+  fi
+}
+
 usage="Usage: hbase-daemon.sh [--config <conf-dir>]\
  (start|stop|restart) <hbase-command> \
  <args...>"
@@ -96,7 +105,7 @@ fi
 mkdir -p "$HBASE_LOG_DIR"
 
 if [ "$HBASE_PID_DIR" = "" ]; then
-  HBASE_PID_DIR=/tmp
+  export HBASE_PID_DIR=/tmp
 fi
 
 if [ "$HBASE_IDENT_STRING" = "" ]; then
@@ -121,6 +130,7 @@ logout=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.
 loggc=$HBASE_LOG_DIR/$HBASE_LOG_PREFIX.gc
 loglog="${HBASE_LOG_DIR}/${HBASE_LOGFILE}"
 pid=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.pid
+export HBASE_ZNODE_FILE=$HBASE_PID_DIR/hbase-$HBASE_IDENT_STRING-$command.znode
 
 if [ "$HBASE_USE_GC_LOGFILE" = "true" ]; then
   export HBASE_GC_OPTS=" -Xloggc:${loggc}"
@@ -148,9 +158,9 @@ case $startStop in
     # Add to the command log file vital stats on our environment.
     echo "`date` Starting $command on `hostname`" >> $loglog
     echo "`ulimit -a`" >> $loglog 2>&1
-    nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \
+    (nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \
         --config "${HBASE_CONF_DIR}" \
-        $command "$@" $startStop > "$logout" 2>&1 < /dev/null &
+        $command "$@" $startStop > "$logout" 2>&1 < /dev/null ; cleanZNode) &
     echo $! > $pid
     sleep 1; head "$logout"
     ;;

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1329430&r1=1329429&r2=1329430&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Mon Apr 23 20:29:45 2012
@@ -19,6 +19,9 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.StringWriter;
 import java.lang.Thread.UncaughtExceptionHandler;
@@ -719,6 +722,9 @@ public class HRegionServer extends Regio
     } catch (KeeperException e) {
       LOG.warn("Failed deleting my ephemeral node", e);
     }
+    // We may have failed to delete the znode at the previous step, but
+    //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
+    deleteMyEphemeralNodeOnDisk();
     this.zooKeeper.close();
     LOG.info("stopping server " + this.serverNameFromMasterPOV +
       "; zookeeper connection closed.");
@@ -895,6 +901,9 @@ public class HRegionServer extends Regio
       // Set our ephemeral znode up in zookeeper now we have a name.
       createMyEphemeralNode();
 
+      // Save it in a file, this will allow to see if we crash
+      writeMyEphemeralNodeOnDisk();
+
       // Master sent us hbase.rootdir to use. Should be fully qualified
       // path with file system specification included. Set 'fs.defaultFS'
       // to match the filesystem on hbase.rootdir else underlying hadoop hdfs
@@ -929,11 +938,49 @@ public class HRegionServer extends Regio
     return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
   }
 
+  private String getMyEphemeralNodeFileName(){
+    return System.getenv().get("HBASE_ZNODE_FILE");
+  }
+
+
   private void createMyEphemeralNode() throws KeeperException {
     ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper, getMyEphemeralNodePath(),
       HConstants.EMPTY_BYTE_ARRAY);
   }
 
+  private void writeMyEphemeralNodeOnDisk() throws IOException {
+    String fileName = getMyEphemeralNodeFileName();
+
+    if (fileName==null){
+      LOG.warn("No filename given to save the znode used, it won't be saved "+
+      "(Environment variable HBASE_ZNODE_FILE is not set).");
+      return;
+    }
+
+    FileWriter fstream = new FileWriter(fileName);
+    BufferedWriter out = new BufferedWriter(fstream);
+    try {
+      out.write(getMyEphemeralNodePath()+"\n");
+    } finally {
+      try {
+        out.close();
+      } finally {
+        fstream.close();
+      }
+    }
+  }
+
+  private void deleteMyEphemeralNodeOnDisk(){
+    String fileName = getMyEphemeralNodeFileName();
+
+    if (fileName==null){
+      return;
+    }
+
+    File f = new File(fileName);
+    f.delete();
+  }
+
   private void deleteMyEphemeralNode() throws KeeperException {
     ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
   }