You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jd...@apache.org on 2009/10/29 02:24:03 UTC

svn commit: r830820 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/master/ src/test/org/apache/hadoop/hbase/

Author: jdcryans
Date: Thu Oct 29 01:24:03 2009
New Revision: 830820

URL: http://svn.apache.org/viewvc?rev=830820&view=rev
Log:
HBASE-1921  When the Master's session times out and there's only one, cluster is wedged

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Oct 29 01:24:03 2009
@@ -143,6 +143,7 @@
    HBASE-1756  Refactor HLog (changing package first)
    HBASE-1926  Remove unused xmlenc jar from trunk
    HBASE-1936  HLog group commit
+   HBASE-1921  When the Master's session times out and there's only one, cluster is wedged
 
   OPTIMIZATIONS
    HBASE-410   [testing] Speed up the test suite

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Thu Oct 29 01:24:03 2009
@@ -126,7 +126,7 @@
   // Metrics is set when we call run.
   private final MasterMetrics metrics;
   // Our zk client.
-  private final ZooKeeperWrapper zooKeeperWrapper;
+  private ZooKeeperWrapper zooKeeperWrapper;
   // Watcher for master address and for cluster shutdown.
   private final ZKMasterAddressWatcher zkMasterAddressWatcher;
   // A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo.
@@ -187,7 +187,7 @@
     this.zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
     this.zkMasterAddressWatcher =
       new ZKMasterAddressWatcher(this.zooKeeperWrapper, this.shutdownRequested);
-    this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address);
+    this.zkMasterAddressWatcher.writeAddressToZooKeeper(this.address, true);
     
     serverManager = new ServerManager(this);
     regionManager = new RegionManager(this);
@@ -1131,8 +1131,26 @@
       (event.getType().equals(EventType.NodeDeleted) && 
         event.getPath().equals(this.zooKeeperWrapper.getMasterElectionZNode())) &&
         !shutdownRequested.get()) {
-      LOG.error("Master lost its znode, killing itself now");
-      System.exit(1);
+
+      LOG.info("Master lost its znode, trying to get a new one");
+
+      // Can we still be the master? If not, goodbye
+
+      zooKeeperWrapper.close();
+      try {
+        zooKeeperWrapper = new ZooKeeperWrapper(conf, this);
+        this.zkMasterAddressWatcher.setZookeeper(zooKeeperWrapper);
+        if(!this.zkMasterAddressWatcher.
+            writeAddressToZooKeeper(this.address,false)) {
+          throw new Exception("Another Master is currently active");
+        }
+
+        // Verify the cluster to see if anything happened while we were away
+        joinCluster();
+      } catch (Exception e) {
+        LOG.error("Killing master because of", e);
+        System.exit(1);
+      }
     }
   }
 

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Thu Oct 29 01:24:03 2009
@@ -125,8 +125,6 @@
     regionsToFlush = Collections.synchronizedSortedMap(
         new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
         (Bytes.BYTES_COMPARATOR));
-
-  private final ZooKeeperWrapper zooKeeperWrapper;
   private final int zooKeeperNumRetries;
   private final int zooKeeperPause;
 
@@ -143,7 +141,6 @@
     // Scans the meta table
     metaScannerThread = new MetaScanner(master);
 
-    zooKeeperWrapper = master.getZooKeeperWrapper();
     zooKeeperNumRetries = conf.getInt(ZOOKEEPER_RETRIES, DEFAULT_ZOOKEEPER_RETRIES);
     zooKeeperPause = conf.getInt(ZOOKEEPER_PAUSE, DEFAULT_ZOOKEEPER_PAUSE);
 
@@ -602,8 +599,8 @@
     } catch(Exception iex) {
       LOG.warn("meta scanner", iex);
     }
-    zooKeeperWrapper.clearRSDirectory();
-    zooKeeperWrapper.close();
+    master.getZooKeeperWrapper().clearRSDirectory();
+    master.getZooKeeperWrapper().close();
   }
   
   /**
@@ -1121,7 +1118,7 @@
 
   private void writeRootRegionLocationToZooKeeper(HServerAddress address) {
     for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) {
-      if (zooKeeperWrapper.writeRootRegionLocation(address)) {
+      if (master.getZooKeeperWrapper().writeRootRegionLocation(address)) {
         return;
       }
 

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ZKMasterAddressWatcher.java Thu Oct 29 01:24:03 2009
@@ -41,13 +41,14 @@
  */
 class ZKMasterAddressWatcher implements Watcher {
   private static final Log LOG = LogFactory.getLog(ZKMasterAddressWatcher.class);
-  private final ZooKeeperWrapper zookeeper;
+
+  private ZooKeeperWrapper zookeeper;
   private final AtomicBoolean requestShutdown;
 
   /**
    * Create this watcher using passed ZooKeeperWrapper instance.
    * @param zk ZooKeeper
-   * @param requestShutdown Flag to set to request shutdown.
+   * @param flag Flag to set to request shutdown.
    */
   ZKMasterAddressWatcher(final ZooKeeperWrapper zk, final AtomicBoolean flag) {
     this.requestShutdown = flag;
@@ -98,17 +99,30 @@
    * address (or until cluster shutdown).
    * @param address Address whose format is HServerAddress.toString
    */
-  void writeAddressToZooKeeper(final HServerAddress address) {
-    while (true) {
+  boolean writeAddressToZooKeeper(
+      final HServerAddress address, boolean retry) {
+    do {
       waitForMasterAddressAvailability();
       // Check if we need to shutdown instead of taking control
-      if (this.requestShutdown.get()) return;
+      if (this.requestShutdown.get()) {
+        LOG.debug("Won't start Master because cluster is shuting down");
+        return false;
+      }
       if(this.zookeeper.writeMasterAddress(address)) {
         this.zookeeper.setClusterState(true);
         // Watch our own node
         this.zookeeper.readMasterAddress(this);
-        return;
+        return true;
       }
-    }
+    } while(retry);
+    return false;
+  }
+
+  /**
+   * Reset the ZK in case a new connection is required
+   * @param zookeeper new instance
+   */
+  public void setZookeeper(ZooKeeperWrapper zookeeper) {
+    this.zookeeper = zookeeper;
   }
 }
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java?rev=830820&r1=830819&r2=830820&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestZooKeeper.java Thu Oct 29 01:24:03 2009
@@ -95,41 +95,46 @@
     connection.relocateRegion(HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_BYTE_ARRAY);
   }
 
-  public void testRegionServerSessionExpired() {
-    try {
-      this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
-      new HTable(conf, HConstants.META_TABLE_NAME);
-  
-      ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
-      String quorumServers = zkw.getQuorumServers();
-      int sessionTimeout = 5 * 1000; // 5 seconds
-
-      HRegionServer rs = cluster.getRegionServer(0);
-      ZooKeeperWrapper rsZK = rs.getZooKeeperWrapper();
-      long sessionID = rsZK.getSessionID();
-      byte[] password = rsZK.getSessionPassword();
-  
-      ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance, sessionID, password);
-      zk.close();
+  public void testRegionServerSessionExpired() throws Exception{
+    this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true);
+    new HTable(conf, HConstants.META_TABLE_NAME);
+    HRegionServer rs = cluster.getRegionServer(0);
+    sessionExpirationHelper(rs.getZooKeeperWrapper());
+  }
 
-      Thread.sleep(sessionTimeout * 3L);
+  public void testMasterSessionExpired() throws Exception {
+    new HTable(conf, HConstants.META_TABLE_NAME);
+    HMaster master = cluster.getMaster();
+    sessionExpirationHelper(master.getZooKeeperWrapper());
+  }
+
+  public void sessionExpirationHelper(ZooKeeperWrapper nodeZK) throws Exception{
+    ZooKeeperWrapper zkw = new ZooKeeperWrapper(conf, EmptyWatcher.instance);
+    String quorumServers = zkw.getQuorumServers();
+    int sessionTimeout = 5 * 1000; // 5 seconds
+
+    byte[] password = nodeZK.getSessionPassword();
+    long sessionID = nodeZK.getSessionID();
+
+    ZooKeeper zk = new ZooKeeper(quorumServers,
+        sessionTimeout, EmptyWatcher.instance, sessionID, password);
+    zk.close();
+
+    Thread.sleep(sessionTimeout * 3L);
+
+    new HTable(conf, HConstants.META_TABLE_NAME);
+
+    HBaseAdmin admin = new HBaseAdmin(conf);
+    HTableDescriptor desc = new HTableDescriptor("test");
+    HColumnDescriptor family = new HColumnDescriptor("fam");
+    desc.addFamily(family);
+    admin.createTable(desc);
+
+    HTable table = new HTable("test");
+    Put put = new Put(Bytes.toBytes("testrow"));
+    put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
+    table.put(put);
 
-      new HTable(conf, HConstants.META_TABLE_NAME);
-  
-      HBaseAdmin admin = new HBaseAdmin(conf);
-      HTableDescriptor desc = new HTableDescriptor("test");
-      HColumnDescriptor family = new HColumnDescriptor("fam");
-      desc.addFamily(family);
-      admin.createTable(desc);
-  
-      HTable table = new HTable("test");
-      Put put = new Put(Bytes.toBytes("testrow"));
-      put.add(Bytes.toBytes("fam"), Bytes.toBytes("col"), Bytes.toBytes("testdata"));
-      table.put(put);
-    } catch (Exception e) {
-      e.printStackTrace();
-      fail();
-    }
   }
   
   public void testMultipleZK() {