You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2012/01/17 05:41:58 UTC

svn commit: r1232299 - in /hbase/branches/0.92: ./ src/main/java/org/apache/hadoop/hbase/catalog/ src/main/java/org/apache/hadoop/hbase/client/ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/regionserver/ src/main/j...

Author: tedyu
Date: Tue Jan 17 04:41:57 2012
New Revision: 1232299

URL: http://svn.apache.org/viewvc?rev=1232299&view=rev
Log:
HBASE-5153  Add retry logic in HConnectionImplementation#resetZooKeeperTrackers (Jieshan)

Added:
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ClosedConnectionException.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java
Modified:
    hbase/branches/0.92/CHANGES.txt
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java

Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Tue Jan 17 04:41:57 2012
@@ -925,6 +925,11 @@ Release 0.92.0 - Unreleased
    HBASE-3025  Coprocessor based access control
    HBASE-2418  Support for ZooKeeper authentication
 
+Release 0.90.7 - Unreleased
+
+  BUG FIXES
+   HBASE-5153  Add retry logic in HConnectionImplementation#resetZooKeeperTrackers (Jieshan)
+
 Release 0.90.6 - Unreleased
 
   BUG FIXES

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java Tue Jan 17 04:41:57 2012
@@ -230,8 +230,8 @@ public class CatalogTracker {
   public void start() throws IOException, InterruptedException {
     LOG.debug("Starting catalog tracker " + this);
     try {
-      this.rootRegionTracker.start();
-      this.metaNodeTracker.start();
+      this.rootRegionTracker.start(true);
+      this.metaNodeTracker.start(true);
     } catch (RuntimeException e) {
       Throwable t = e.getCause();
       this.abortable.abort(e.getMessage(), t);

Added: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ClosedConnectionException.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ClosedConnectionException.java?rev=1232299&view=auto
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ClosedConnectionException.java (added)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ClosedConnectionException.java Tue Jan 17 04:41:57 2012
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.client;
+
+import java.io.IOException;
+
+/**
+ * Thrown when HConnection has been closed.
+ */
+public class ClosedConnectionException extends IOException {
+  private static final long serialVersionUID = 8792360655678089586L;
+
+  public ClosedConnectionException() {
+    super();
+  }
+
+  public ClosedConnectionException(String s) {
+    super(s);
+  }
+}

Added: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java?rev=1232299&view=auto
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java (added)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java Tue Jan 17 04:41:57 2012
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import org.apache.hadoop.hbase.HConstants;
+
+
+/**
+ * Utility used by client connections such as {@link HConnection} and
+ * {@link ServerCallable}
+ */
+public class ConnectionUtils {
+  /**
+   * Calculate pause time.
+   * Built on {@link HConstants#RETRY_BACKOFF}.
+   * @param pause
+   * @param tries
+   * @return How long to wait after <code>tries</code> retries
+   */
+  public static long getPauseTime(final long pause, final int tries) {
+    int ntries = tries;
+    if (ntries >= HConstants.RETRY_BACKOFF.length) {
+      ntries = HConstants.RETRY_BACKOFF.length - 1;
+    }
+    return pause * HConstants.RETRY_BACKOFF[ntries];
+  }
+}

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java Tue Jan 17 04:41:57 2012
@@ -113,7 +113,7 @@ public class HBaseAdmin implements Abort
         this.connection = HConnectionManager.getConnection(this.conf);
       }
       try { // Sleep
-        Thread.sleep(getPauseTime(tries));
+        Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
       } catch (InterruptedException e) {
         Thread.currentThread().interrupt();
         // we should delete connection between client and zookeeper
@@ -274,14 +274,6 @@ public class HBaseAdmin implements Abort
     return this.connection.getHTableDescriptor(tableName);
   }
 
-  private long getPauseTime(int tries) {
-    int triesCount = tries;
-    if (triesCount >= HConstants.RETRY_BACKOFF.length) {
-      triesCount = HConstants.RETRY_BACKOFF.length - 1;
-    }
-    return this.pause * HConstants.RETRY_BACKOFF[triesCount];
-  }
-
   /**
    * Creates a new table.
    * Synchronous operation.
@@ -402,7 +394,7 @@ public class HBaseAdmin implements Abort
             " of " + numRegs + " regions are online; retries exhausted.");
         }
         try { // Sleep
-          Thread.sleep(getPauseTime(tries));
+          Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
         } catch (InterruptedException e) {
           throw new InterruptedIOException("Interrupted when opening" +
               " regions; " + actualRegCount.get() + " of " + numRegs + 
@@ -530,7 +522,7 @@ public class HBaseAdmin implements Abort
         }
       }
       try {
-        Thread.sleep(getPauseTime(tries));
+        Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
       } catch (InterruptedException e) {
         // continue
       }
@@ -611,7 +603,7 @@ public class HBaseAdmin implements Abort
       if (enabled) {
         break;
       }
-      long sleep = getPauseTime(tries);
+      long sleep = ConnectionUtils.getPauseTime(this.pause, tries);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Sleeping= " + sleep + "ms, waiting for all regions to be " +
           "enabled in " + Bytes.toString(tableName));
@@ -752,7 +744,7 @@ public class HBaseAdmin implements Abort
       if (disabled) {
         break;
       }
-      long sleep = getPauseTime(tries);
+      long sleep = ConnectionUtils.getPauseTime(this.pause, tries);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Sleeping= " + sleep + "ms, waiting for all regions to be " +
           "disabled in " + Bytes.toString(tableName));

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnection.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnection.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnection.java Tue Jan 17 04:41:57 2012
@@ -145,6 +145,12 @@ public interface HConnection extends Abo
    * Allows flushing the region cache.
    */
   public void clearRegionCache();
+  
+  /**
+   * Closes the original connection and creates a new one.
+   * @throws ZooKeeperConnectionException  if unable to connect to zookeeper
+   */
+  public void resetZooKeeperTrackersWithRetries() throws ZooKeeperConnectionException;
 
   /**
    * Allows flushing the region cache of all locations that pertain to

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java Tue Jan 17 04:41:57 2012
@@ -566,35 +566,70 @@ public class HConnectionManager {
           HConstants.HBASE_CLIENT_PREFETCH_LIMIT,
           HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT);
 
-      setupZookeeperTrackers();
+      setupZookeeperTrackers(true);
 
       this.master = null;
       this.masterChecked = false;
     }
 
-    private synchronized void setupZookeeperTrackers()
+    private synchronized boolean setupZookeeperTrackers(boolean allowAbort)
         throws ZooKeeperConnectionException{
       // initialize zookeeper and master address manager
       this.zooKeeper = getZooKeeperWatcher();
-      masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
-      masterAddressTracker.start();
+      this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
 
       this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this);
-      this.rootRegionTracker.start();
-
+      if (!this.masterAddressTracker.start(allowAbort)) {
+        this.masterAddressTracker.stop();
+        this.masterAddressTracker = null;
+        this.zooKeeper = null;
+        return false;
+      }
+      if (!this.rootRegionTracker.start(allowAbort)) {
+        this.masterAddressTracker.stop();
+        this.rootRegionTracker.stop();
+        this.masterAddressTracker = null;
+        this.rootRegionTracker = null;
+        this.zooKeeper = null;
+        return false;
+      }
       this.clusterId = new ClusterId(this.zooKeeper, this);
+      return true;
     }
 
-    private synchronized void resetZooKeeperTrackers()
+    @Override
+    public synchronized void resetZooKeeperTrackersWithRetries()
         throws ZooKeeperConnectionException {
       LOG.info("Trying to reconnect to zookeeper");
-      masterAddressTracker.stop();
-      masterAddressTracker = null;
-      rootRegionTracker.stop();
-      rootRegionTracker = null;
-      clusterId = null;
+      if (this.masterAddressTracker != null) {
+        this.masterAddressTracker.stop();
+        this.masterAddressTracker = null;
+      }
+      if (this.rootRegionTracker != null) {
+        this.rootRegionTracker.stop();
+        this.rootRegionTracker = null;
+      }
       this.zooKeeper = null;
-      setupZookeeperTrackers();
+      this.clusterId = null;
+      for (int tries = 0; tries < this.numRetries; tries++) {
+        boolean isLastTime = (tries == (this.numRetries - 1));
+        try {
+          if (setupZookeeperTrackers(isLastTime)) {
+            break;
+          }
+        } catch (ZooKeeperConnectionException zkce) {
+          if (isLastTime) {
+            throw zkce;
+          }
+        }
+        LOG.info("Tried to reconnect to zookeeper but failed,  already tried "
+            + tries + " times.");
+        try {
+          Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
+        } catch (InterruptedException e1) {
+          Thread.currentThread().interrupt();
+        }
+      }
     }
 
     public Configuration getConfiguration() {
@@ -791,7 +826,9 @@ public class HConnectionManager {
     private HRegionLocation locateRegion(final byte [] tableName,
       final byte [] row, boolean useCache)
     throws IOException {
-      if (this.closed) throw new IOException(toString() + " closed");
+      if (this.closed) {
+        throw new ClosedConnectionException(toString() + " closed");
+      }
       if (tableName == null || tableName.length == 0) {
         throw new IllegalArgumentException(
             "table name cannot be null or zero length");
@@ -1013,7 +1050,8 @@ public class HConnectionManager {
                 ((metaLocation == null)? "null": "{" + metaLocation + "}") +
                 ", attempt=" + tries + " of " +
                 this.numRetries + " failed; retrying after sleep of " +
-                getPauseTime(tries) + " because: " + e.getMessage());
+                ConnectionUtils.getPauseTime(this.pause, tries) + " because: "
+                + e.getMessage());
             }
           } else {
             throw e;
@@ -1316,6 +1354,9 @@ public class HConnectionManager {
 
     public <T> T getRegionServerWithRetries(ServerCallable<T> callable)
     throws IOException, RuntimeException {
+      if (this.closed) {
+        throw new ClosedConnectionException(toString() + " closed");
+      }
       List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions =
         new ArrayList<RetriesExhaustedException.ThrowableWithExtraContext>();
       for(int tries = 0; tries < numRetries; tries++) {
@@ -1359,6 +1400,9 @@ public class HConnectionManager {
 
     public <T> T getRegionServerWithoutRetries(ServerCallable<T> callable)
         throws IOException, RuntimeException {
+      if (this.closed) {
+        throw new ClosedConnectionException(toString() + " closed");
+      }
       try {
         callable.beforeCall();
         callable.connect(false);
@@ -1713,11 +1757,12 @@ public class HConnectionManager {
 
     @Override
     public void abort(final String msg, Throwable t) {
-      if (t instanceof KeeperException.SessionExpiredException) {
+      if (t instanceof KeeperException.SessionExpiredException
+          || t instanceof KeeperException.ConnectionLossException) {
         try {
           LOG.info("This client just lost it's session with ZooKeeper, trying" +
               " to reconnect.");
-          resetZooKeeperTrackers();
+          resetZooKeeperTrackersWithRetries();
           LOG.info("Reconnected successfully. This disconnect could have been" +
               " caused by a network partition or a long-running GC pause," +
               " either way it's recommended that you verify your environment.");

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Jan 17 04:41:57 2012
@@ -394,7 +394,7 @@ implements HMasterInterface, HMasterRegi
     // Set the cluster as up.  If new RSs, they'll be waiting on this before
     // going ahead with their startup.
     this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this);
-    this.clusterStatusTracker.start();
+    this.clusterStatusTracker.start(true);
     boolean wasUp = this.clusterStatusTracker.isClusterUp();
     if (!wasUp) this.clusterStatusTracker.setClusterUp();
 

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Jan 17 04:41:57 2012
@@ -552,13 +552,13 @@ public class HRegionServer implements HR
     // block until a master is available.  No point in starting up if no master
     // running.
     this.masterAddressManager = new MasterAddressTracker(this.zooKeeper, this);
-    this.masterAddressManager.start();
+    this.masterAddressManager.start(true);
     blockAndCheckIfStopped(this.masterAddressManager);
 
     // Wait on cluster being up.  Master will set this flag up in zookeeper
     // when ready.
     this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this);
-    this.clusterStatusTracker.start();
+    this.clusterStatusTracker.start(true);
     blockAndCheckIfStopped(this.clusterStatusTracker);
 
     // Create the catalog tracker and start it;

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java Tue Jan 17 04:41:57 2012
@@ -162,7 +162,7 @@ public class ReplicationZookeeper {
     // Set a tracker on replicationStateNodeNode
     this.statusTracker =
         new ReplicationStatusTracker(this.zookeeper, abortable);
-    statusTracker.start();
+    statusTracker.start(true);
     readReplicationStateZnode();
   }
 

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Tue Jan 17 04:41:57 2012
@@ -626,7 +626,7 @@ public class HBaseFsck {
         }
         
       });
-    rootRegionTracker.start();
+    rootRegionTracker.start(true);
     ServerName sn = null;
     try {
       sn = rootRegionTracker.getRootRegionLocation();

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java Tue Jan 17 04:41:57 2012
@@ -69,8 +69,12 @@ public abstract class ZooKeeperNodeTrack
    *
    * <p>Use {@link #blockUntilAvailable()} to block until the node is available
    * or {@link #getData(boolean)} to get the data of the node if it is available.
+   * 
+   * @param allowAbort If allowAbort is false, the abortable should not abort when a
+   *          KeeperException occur. 
+   * @return start result. true if start successfully.
    */
-  public synchronized void start() {
+  public synchronized boolean start(boolean allowAbort) {
     this.watcher.registerListener(this);
     try {
       if(ZKUtil.watchAndCheckExists(watcher, node)) {
@@ -80,15 +84,21 @@ public abstract class ZooKeeperNodeTrack
         } else {
           // It existed but now does not, try again to ensure a watch is set
           LOG.debug("Try starting again because there is no data from " + node);
-          start();
+          return start(allowAbort);
         }
       }
+      return true;
     } catch (KeeperException e) {
-      abortable.abort("Unexpected exception during initialization, aborting", e);
+      if (allowAbort && (abortable != null)) {
+        abortable.abort("Unexpected exception during initialization, aborting",
+            e);
+      }
+      return false;
     }
   }
 
   public synchronized void stop() {
+    this.watcher.unregisterListener(this);
     this.stopped = true;
     notifyAll();
   }
@@ -173,7 +183,9 @@ public abstract class ZooKeeperNodeTrack
         nodeDeleted(path);
       }
     } catch(KeeperException e) {
-      abortable.abort("Unexpected exception handling nodeCreated event", e);
+      if (abortable != null) {
+        abortable.abort("Unexpected exception handling nodeCreated event", e);
+      }
     }
   }
 
@@ -187,7 +199,9 @@ public abstract class ZooKeeperNodeTrack
           this.data = null;
         }
       } catch(KeeperException e) {
-        abortable.abort("Unexpected exception handling nodeDeleted event", e);
+        if (abortable != null) {
+          abortable.abort("Unexpected exception handling nodeDeleted event", e);
+        }
       }
     }
   }

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java Tue Jan 17 04:41:57 2012
@@ -220,6 +220,14 @@ public class ZooKeeperWatcher implements
   public void registerListener(ZooKeeperListener listener) {
     listeners.add(listener);
   }
+  
+  /**
+   * Unregister the specified listener.
+   * @param listener
+   */
+  public void unregisterListener(ZooKeeperListener listener) {
+    listeners.remove(listener);
+  }
 
   /**
    * Register the specified listener to receive ZooKeeper events and add it as

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java Tue Jan 17 04:41:57 2012
@@ -43,6 +43,7 @@ import java.util.concurrent.ExecutorServ
 import java.util.concurrent.Executors;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -56,6 +57,7 @@ import org.apache.hadoop.hbase.HTableDes
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.filter.BinaryComparator;
 import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.FilterList;
 import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
@@ -65,13 +67,12 @@ import org.apache.hadoop.hbase.filter.Re
 import org.apache.hadoop.hbase.filter.RowFilter;
 import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
 import org.apache.hadoop.hbase.filter.WhileMatchFilter;
-import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Store;
-import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.zookeeper.KeeperException;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -4308,4 +4309,34 @@ public class TestFromClientSide {
         System.currentTimeMillis() + ", cur=" + store.getNumberOfstorefiles());
     assertEquals(count, store.getNumberOfstorefiles());
   }
+  
+  /**
+   * Test HConnection can be recovered after this connection has been
+   * aborted.
+   * @throws IOException
+   */
+  @Test
+  public void testConnectionResetAfterAbort() throws IOException {
+    final byte[] COLUMN_FAMILY = Bytes.toBytes("columnfam");
+    final byte[] COLUMN = Bytes.toBytes("col");
+    HTable table = TEST_UTIL.createTable(
+        Bytes.toBytes("testConnectionRecover"), new byte[][] { COLUMN_FAMILY });
+    Put put01 = new Put(Bytes.toBytes("testrow1"));
+    put01.add(COLUMN_FAMILY, COLUMN, Bytes.toBytes("testValue"));
+    table.put(put01);
+
+    // At this time, abort the connection.
+    HConnection conn = table.getConnection();
+    conn.abort("Test Connection Abort", new KeeperException.ConnectionLossException());
+    boolean putSuccess = true;
+    // This put will success, for the connection has been recovered.
+    try {
+      Put put02 = new Put(Bytes.toBytes("testrow1"));
+      put02.add(COLUMN_FAMILY, COLUMN, Bytes.toBytes("testValue"));
+      table.put(put02);
+    } catch (IOException ioe) {
+      putSuccess = false;
+    }
+    assertTrue(putSuccess);
+  }  
 }

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java Tue Jan 17 04:41:57 2012
@@ -183,7 +183,7 @@ public class TestMasterCoprocessorExcept
           }
         });
 
-    masterTracker.start();
+    masterTracker.start(true);
     zkw.registerListener(masterTracker);
 
     // Test (part of the) output that should have be printed by master when it aborts:

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java Tue Jan 17 04:41:57 2012
@@ -169,7 +169,7 @@ public class TestMasterCoprocessorExcept
           }
         });
 
-    masterTracker.start();
+    masterTracker.start(true);
     zkw.registerListener(masterTracker);
 
     // Test (part of the) output that should have be printed by master when it aborts:

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java Tue Jan 17 04:41:57 2012
@@ -64,7 +64,7 @@ public class TestMasterAddressManager {
 
     // Should not have a master yet
     MasterAddressTracker addressManager = new MasterAddressTracker(zk, null);
-    addressManager.start();
+    addressManager.start(true);
     assertFalse(addressManager.hasMaster());
     zk.registerListener(addressManager);
 

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java?rev=1232299&r1=1232298&r2=1232299&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java Tue Jan 17 04:41:57 2012
@@ -72,7 +72,7 @@ public class TestZooKeeperNodeTracker {
     ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
       "testInterruptible", abortable);
     final TestTracker tracker = new TestTracker(zk, "/xyz", abortable);
-    tracker.start();
+    tracker.start(true);
     Thread t = new Thread() {
       @Override
       public void run() {
@@ -105,7 +105,7 @@ public class TestZooKeeperNodeTracker {
 
     // Start a ZKNT with no node currently available
     TestTracker localTracker = new TestTracker(zk, node, abortable);
-    localTracker.start();
+    localTracker.start(true);
     zk.registerListener(localTracker);
 
     // Make sure we don't have a node
@@ -120,7 +120,7 @@ public class TestZooKeeperNodeTracker {
 
     // Now, start a new ZKNT with the node already available
     TestTracker secondTracker = new TestTracker(zk, node, null);
-    secondTracker.start();
+    secondTracker.start(true);
     zk.registerListener(secondTracker);
 
     // Put up an additional zk listener so we know when zk event is done
@@ -213,7 +213,7 @@ public class TestZooKeeperNodeTracker {
 
     public WaitToGetDataThread(ZooKeeperWatcher zk, String node) {
       tracker = new TestTracker(zk, node, null);
-      tracker.start();
+      tracker.start(true);
       zk.registerListener(tracker);
       hasData = false;
     }