You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2014/10/29 22:44:40 UTC

git commit: HBASE-12376 HBaseAdmin leaks ZK connections if failure starting watchers (ConnectionLossException)

Repository: hbase
Updated Branches:
  refs/heads/0.98 36be60e50 -> 8e64e1bbe


HBASE-12376 HBaseAdmin leaks ZK connections if failure starting watchers (ConnectionLossException)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8e64e1bb
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8e64e1bb
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8e64e1bb

Branch: refs/heads/0.98
Commit: 8e64e1bbe0fce72074eae14c80541ef3e1365429
Parents: 36be60e
Author: stack <st...@apache.org>
Authored: Wed Oct 29 14:43:59 2014 -0700
Committer: stack <st...@apache.org>
Committed: Wed Oct 29 14:44:15 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/catalog/CatalogTracker.java    | 12 ++++-
 .../apache/hadoop/hbase/client/HBaseAdmin.java  | 49 ++++++++++++++-----
 .../apache/hadoop/hbase/client/TestAdmin.java   | 50 +++++++++++++++++++-
 3 files changed, 97 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8e64e1bb/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
index 6a7a6ff..09c66cc 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
@@ -17,7 +17,9 @@
  */
 package org.apache.hadoop.hbase.catalog;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Stopwatch;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -113,7 +115,7 @@ public class CatalogTracker {
   private boolean instantiatedzkw = false;
   private Abortable abortable;
 
-  private boolean stopped = false;
+  private volatile boolean stopped = false;
 
   static final byte [] META_REGION_NAME =
     HRegionInfo.FIRST_META_REGIONINFO.getRegionName();
@@ -203,6 +205,14 @@ public class CatalogTracker {
   }
 
   /**
+   * @return True if we are stopped. Call only after start else indeterminate answer.
+   */
+  @VisibleForTesting
+  public boolean isStopped() {
+    return this.stopped;
+  }
+
+  /**
    * Stop working.
    * Interrupts any ongoing waits.
    */

http://git-wip-us.apache.org/repos/asf/hbase/blob/8e64e1bb/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
index 022c382..583920c 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
@@ -25,11 +25,9 @@ import java.io.InterruptedIOException;
 import java.net.SocketTimeoutException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.regex.Pattern;
@@ -58,10 +56,10 @@ import org.apache.hadoop.hbase.TableNotEnabledException;
 import org.apache.hadoop.hbase.TableNotFoundException;
 import org.apache.hadoop.hbase.UnknownRegionException;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.catalog.CatalogTracker;
 import org.apache.hadoop.hbase.catalog.MetaReader;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
@@ -103,6 +101,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteTableReques
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DisableTableRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DispatchMergingRegionsRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.EnableTableRequest;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureRequest;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureResponse;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetClusterStatusRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetNamespaceDescriptorRequest;
@@ -110,6 +110,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterSta
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterStatusResponse;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsResponse;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneRequest;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneResponse;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneResponse;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
@@ -131,15 +133,11 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.StopMasterRequest
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.TruncateTableRequest;
 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.UnassignRegionRequest;
 import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
+import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
 import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
 import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureRequest;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureResponse;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneRequest;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneResponse;
-import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
 import org.apache.hadoop.hbase.util.Addressing;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -148,6 +146,7 @@ import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.zookeeper.KeeperException;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.ServiceException;
 
@@ -225,20 +224,46 @@ public class HBaseAdmin implements Abortable, Closeable {
    * @throws IOException
    * @see #cleanupCatalogTracker(CatalogTracker)
    */
-  private synchronized CatalogTracker getCatalogTracker()
+  @VisibleForTesting
+  synchronized CatalogTracker getCatalogTracker()
   throws ZooKeeperConnectionException, IOException {
+    boolean succeeded = false;
     CatalogTracker ct = null;
     try {
       ct = new CatalogTracker(this.conf);
-      ct.start();
+      startCatalogTracker(ct);
+      succeeded = true;
     } catch (InterruptedException e) {
       // Let it out as an IOE for now until we redo all so tolerate IEs
       throw (InterruptedIOException)new InterruptedIOException("Interrupted").initCause(e);
+    } finally {
+      // If we did not succeed but created a catalogtracker, clean it up. CT has a ZK instance
+      // in it and we'll leak if we don't do the 'stop'.
+      if (!succeeded && ct != null) {
+        try {
+          ct.stop();
+        } catch (RuntimeException re) {
+          LOG.error("Failed to clean up HBase's internal catalog tracker after a failed initialization. " +
+            "We may have leaked network connections to ZooKeeper; they won't be cleaned up until " +
+            "the JVM exits. If you see a large number of stale connections to ZooKeeper this is likely " +
+            "the cause. The following exception details will be needed for assistance from the " +
+            "HBase community.", re);
+        }
+        ct = null;
+      }
     }
     return ct;
   }
 
-  private void cleanupCatalogTracker(final CatalogTracker ct) {
+  @VisibleForTesting
+  CatalogTracker startCatalogTracker(final CatalogTracker ct)
+  throws IOException, InterruptedException {
+    ct.start();
+    return ct;
+  }
+
+  @VisibleForTesting
+  void cleanupCatalogTracker(final CatalogTracker ct) {
     ct.stop();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/8e64e1bb/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
index d6ca7be..8a00de8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
@@ -33,6 +33,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -69,7 +70,13 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
-import org.junit.*;
+import org.apache.zookeeper.KeeperException;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 import com.google.protobuf.ServiceException;
@@ -115,6 +122,47 @@ public class TestAdmin {
   }
 
   @Test (timeout=300000)
+  public void testFailedCatalogTrackerGetCleansUpProperly()
+  throws ZooKeeperConnectionException, IOException {
+    // An HBaseAdmin that we can make fail when it goes to get catalogtracker.
+    final AtomicBoolean fail = new AtomicBoolean(false);
+    final AtomicReference<CatalogTracker> internalCt = new AtomicReference<CatalogTracker>();
+    HBaseAdmin doctoredAdmin = new HBaseAdmin(this.admin.getConfiguration()) {
+      @Override
+      protected CatalogTracker startCatalogTracker(CatalogTracker ct)
+      throws IOException, InterruptedException {
+        internalCt.set(ct);
+        super.startCatalogTracker(ct);
+        if (fail.get()) {
+          throw new IOException("Intentional test fail",
+            new KeeperException.ConnectionLossException());
+        }
+        return ct;
+      }
+    };
+    try {
+      CatalogTracker ct = doctoredAdmin.getCatalogTracker();
+      assertFalse(ct.isStopped());
+      doctoredAdmin.cleanupCatalogTracker(ct);
+      assertTrue(ct.isStopped());
+      // Now have mess with our doctored admin and make the start of catalog tracker 'fail'.
+      fail.set(true);
+      boolean expectedException = false;
+      try {
+        doctoredAdmin.getCatalogTracker();
+      } catch (IOException ioe) {
+        assertTrue(ioe.getCause() instanceof KeeperException.ConnectionLossException);
+        expectedException = true;
+      }
+      if (!expectedException) fail("Didn't get expected exception!");
+      // Assert that the internally made ct was properly shutdown.
+      assertTrue("Internal CatalogTracker not closed down", internalCt.get().isStopped());
+    } finally {
+      doctoredAdmin.close();
+    }
+  }
+
+  @Test (timeout=300000)
   public void testSplitFlushCompactUnknownTable() throws InterruptedException {
     final String unknowntable = "fubar";
     Exception exception = null;