You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2009/04/03 03:24:45 UTC

svn commit: r761498 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java

Author: apurtell
Date: Fri Apr  3 01:24:45 2009
New Revision: 761498

URL: http://svn.apache.org/viewvc?rev=761498&view=rev
Log:
HBASE-1205 RegionServers should find new master when a new master comes up

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=761498&r1=761497&r2=761498&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Fri Apr  3 01:24:45 2009
@@ -132,6 +132,8 @@
                (Tim Sell via Stack)
    HBASE-1186  Memory-aware Maps with LRU eviction for cell cache 
                (Jonathan Gray via Andrew Purtell)
+   HBASE-1205  RegionServers should find new master when a new master comes up
+               (Nitay Joffe via Andrew Purtell)
 
 Release 0.19.0 - 01/21/2009
   INCOMPATIBLE CHANGES

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=761498&r1=761497&r2=761498&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Fri Apr  3 01:24:45 2009
@@ -104,12 +104,16 @@
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.Watcher.Event.EventType;
 
 /**
  * HRegionServer makes a set of HRegions available to clients.  It checks in with
  * the HMaster. There are many HRegionServers in a single HBase deployment.
  */
-public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable {
+public class HRegionServer implements HConstants, HRegionInterface,
+    HBaseRPCErrorHandler, Runnable, Watcher {
   static final Log LOG = LogFactory.getLog(HRegionServer.class);
   private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
   private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
@@ -215,6 +219,9 @@
 
   private final ZooKeeperWrapper zooKeeperWrapper;
 
+  // A sleeper that sleeps for msgInterval.
+  private final Sleeper sleeper;
+
   /**
    * Starts a HRegionServer at the default location
    * @param conf
@@ -247,6 +254,8 @@
     this.serverLeaseTimeout =
       conf.getInt("hbase.master.lease.period", 120 * 1000);
 
+    sleeper = new Sleeper(this.msgInterval, this.stopRequested);
+
     // Cache flushing thread.
     this.cacheFlusher = new MemcacheFlusher(conf, this);
     
@@ -287,6 +296,8 @@
         "hbase-958 debugging");
     }
     this.zooKeeperWrapper = new ZooKeeperWrapper(conf);
+    watchMasterAddress();
+
     boolean startCodeOk = false; 
     while(!startCodeOk) {
       serverInfo.setStartCode(System.currentTimeMillis());
@@ -307,7 +318,32 @@
     for(int i = 0; i < nbBlocks; i++)  {
       reservedSpace.add(new byte[DEFAULT_SIZE_RESERVATION_BLOCK]);
     }
-    
+  }
+
+  /**
+   * We register ourselves as a watcher on the master address ZNode. This is
+   * called by ZooKeeper when we get an event on that ZNode. When this method
+   * is called it means either our master has died, or a new one has come up.
+   * Either way we need to update our knowledge of the master.
+   * @param event WatchedEvent from ZooKeeper.
+   */
+  public void process(WatchedEvent event) {
+    EventType type = event.getType();
+    LOG.info("Got ZooKeeper event, state: " + event.getState() + ", type: " +
+              type + ", path: " + event.getPath());
+    if (type == EventType.NodeCreated) {
+      getMaster();
+    }
+
+    // ZooKeeper watches are one time only, so we need to re-register our watch.
+    watchMasterAddress();
+  }
+
+  private void watchMasterAddress() {
+    while (!stopRequested.get() && !zooKeeperWrapper.watchMasterAddress(this)) {
+      LOG.warn("Unable to set watcher on ZooKeeper master address. Retrying.");
+      sleeper.sleep();
+    }
   }
 
   /**
@@ -317,10 +353,8 @@
    */
   public void run() {
     boolean quiesceRequested = false;
-    // A sleeper that sleeps for msgInterval.
-    Sleeper sleeper = new Sleeper(this.msgInterval, this.stopRequested);
     try {
-      init(reportForDuty(sleeper));
+      init(reportForDuty());
       long lastMsg = 0;
       // Now ask master what it wants us to do and tell it what we have done
       for (int tries = 0; !stopRequested.get() && isHealthy();) {
@@ -391,7 +425,7 @@
               switch(msgs[i].getType()) {
               case MSG_CALL_SERVER_STARTUP:
                 // We the MSG_CALL_SERVER_STARTUP on startup but we can also
-                // get it when the master is panicing because for instance
+                // get it when the master is panicking because for instance
                 // the HDFS has been yanked out from under it.  Be wary of
                 // this message.
                 if (checkFileSystem()) {
@@ -412,7 +446,7 @@
                     LOG.fatal("error restarting server", e);
                     break;
                   }
-                  reportForDuty(sleeper);
+                  reportForDuty();
                   restart = true;
                 } else {
                   LOG.fatal("file system available check failed. " +
@@ -1124,16 +1158,12 @@
     Threads.shutdown(this.compactSplitThread);
     Threads.shutdown(this.logRoller);
   }
-  
-  /*
-   * Let the master know we're here
-   * Run initialization using parameters passed us by the master.
-   */
-  private MapWritable reportForDuty(final Sleeper sleeper) {
+
+  private boolean getMaster() {
     HServerAddress masterAddress = null;
     while (masterAddress == null) {
       if (stopRequested.get()) {
-        return null;
+        return false;
       }
       try {
         masterAddress = zooKeeperWrapper.readMasterAddressOrThrow();
@@ -1144,9 +1174,7 @@
       }
     }
 
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Telling master at " + masterAddress + " that we are up");
-    }
+    LOG.info("Telling master at " + masterAddress + " that we are up");
     HMasterRegionInterface master = null;
     while (!stopRequested.get() && master == null) {
       try {
@@ -1162,6 +1190,17 @@
       }
     }
     this.hbaseMaster = master;
+    return true;
+  }
+
+  /*
+   * Let the master know we're here
+   * Run initialization using parameters passed us by the master.
+   */
+  private MapWritable reportForDuty() {
+    if (!getMaster()) {
+      return null;
+    }
     MapWritable result = null;
     long lastMsg = 0;
     while(!stopRequested.get()) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java?rev=761498&r1=761497&r2=761498&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java Fri Apr  3 01:24:45 2009
@@ -224,6 +224,26 @@
     return readAddress(masterElectionZNode, watcher);
   }
 
+  /**
+   * Set a watcher on the master address ZNode. The watcher will be set unless
+   * an exception occurs with ZooKeeper.
+   * @param watcher Watcher to set on master address ZNode.
+   * @return true if watcher was set, false otherwise.
+   */
+  public boolean watchMasterAddress(Watcher watcher) {
+    try {
+      zooKeeper.exists(masterElectionZNode, watcher);
+    } catch (KeeperException e) {
+      LOG.warn("Failed to set watcher on ZNode " + masterElectionZNode, e);
+      return false;
+    } catch (InterruptedException e) {
+      LOG.warn("Failed to set watcher on ZNode " + masterElectionZNode, e);
+      return false;
+    }
+    LOG.debug("Set watcher on master address ZNode " + masterElectionZNode);
+    return true;
+  }
+
   private HServerAddress readAddress(String znode, Watcher watcher) {
     try {
       return readAddressOrThrow(znode, watcher);