You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2014/10/29 22:44:40 UTC
git commit: HBASE-12376 HBaseAdmin leaks ZK connections if failure
starting watchers (ConnectionLossException)
Repository: hbase
Updated Branches:
refs/heads/0.98 36be60e50 -> 8e64e1bbe
HBASE-12376 HBaseAdmin leaks ZK connections if failure starting watchers (ConnectionLossException)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8e64e1bb
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8e64e1bb
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8e64e1bb
Branch: refs/heads/0.98
Commit: 8e64e1bbe0fce72074eae14c80541ef3e1365429
Parents: 36be60e
Author: stack <st...@apache.org>
Authored: Wed Oct 29 14:43:59 2014 -0700
Committer: stack <st...@apache.org>
Committed: Wed Oct 29 14:44:15 2014 -0700
----------------------------------------------------------------------
.../hadoop/hbase/catalog/CatalogTracker.java | 12 ++++-
.../apache/hadoop/hbase/client/HBaseAdmin.java | 49 ++++++++++++++-----
.../apache/hadoop/hbase/client/TestAdmin.java | 50 +++++++++++++++++++-
3 files changed, 97 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/8e64e1bb/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
index 6a7a6ff..09c66cc 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
@@ -17,7 +17,9 @@
*/
package org.apache.hadoop.hbase.catalog;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -113,7 +115,7 @@ public class CatalogTracker {
private boolean instantiatedzkw = false;
private Abortable abortable;
- private boolean stopped = false;
+ private volatile boolean stopped = false;
static final byte [] META_REGION_NAME =
HRegionInfo.FIRST_META_REGIONINFO.getRegionName();
@@ -203,6 +205,14 @@ public class CatalogTracker {
}
/**
+ * @return True if we are stopped. Call only after start else indeterminate answer.
+ */
+ @VisibleForTesting
+ public boolean isStopped() {
+ return this.stopped;
+ }
+
+ /**
* Stop working.
* Interrupts any ongoing waits.
*/
http://git-wip-us.apache.org/repos/asf/hbase/blob/8e64e1bb/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
index 022c382..583920c 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
@@ -25,11 +25,9 @@ import java.io.InterruptedIOException;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;
@@ -58,10 +56,10 @@ import org.apache.hadoop.hbase.TableNotEnabledException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaReader;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
@@ -103,6 +101,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DeleteTableReques
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DisableTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.DispatchMergingRegionsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.EnableTableRequest;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureRequest;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetClusterStatusRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetNamespaceDescriptorRequest;
@@ -110,6 +110,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterSta
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterStatusResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsResponse;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneRequest;
+import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
@@ -131,15 +133,11 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.StopMasterRequest
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.TruncateTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.UnassignRegionRequest;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
+import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureRequest;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.ExecProcedureResponse;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneRequest;
-import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneResponse;
-import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
import org.apache.hadoop.hbase.util.Addressing;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -148,6 +146,7 @@ import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException;
+import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ByteString;
import com.google.protobuf.ServiceException;
@@ -225,20 +224,46 @@ public class HBaseAdmin implements Abortable, Closeable {
* @throws IOException
* @see #cleanupCatalogTracker(CatalogTracker)
*/
- private synchronized CatalogTracker getCatalogTracker()
+ @VisibleForTesting
+ synchronized CatalogTracker getCatalogTracker()
throws ZooKeeperConnectionException, IOException {
+ boolean succeeded = false;
CatalogTracker ct = null;
try {
ct = new CatalogTracker(this.conf);
- ct.start();
+ startCatalogTracker(ct);
+ succeeded = true;
} catch (InterruptedException e) {
// Let it out as an IOE for now until we redo all so tolerate IEs
throw (InterruptedIOException)new InterruptedIOException("Interrupted").initCause(e);
+ } finally {
+ // If we did not succeed but created a catalogtracker, clean it up. CT has a ZK instance
+ // in it and we'll leak if we don't do the 'stop'.
+ if (!succeeded && ct != null) {
+ try {
+ ct.stop();
+ } catch (RuntimeException re) {
+ LOG.error("Failed to clean up HBase's internal catalog tracker after a failed initialization. " +
+ "We may have leaked network connections to ZooKeeper; they won't be cleaned up until " +
+ "the JVM exits. If you see a large number of stale connections to ZooKeeper this is likely " +
+ "the cause. The following exception details will be needed for assistance from the " +
+ "HBase community.", re);
+ }
+ ct = null;
+ }
}
return ct;
}
- private void cleanupCatalogTracker(final CatalogTracker ct) {
+ @VisibleForTesting
+ CatalogTracker startCatalogTracker(final CatalogTracker ct)
+ throws IOException, InterruptedException {
+ ct.start();
+ return ct;
+ }
+
+ @VisibleForTesting
+ void cleanupCatalogTracker(final CatalogTracker ct) {
ct.stop();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/8e64e1bb/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
index d6ca7be..8a00de8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
@@ -33,6 +33,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -69,7 +70,13 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
-import org.junit.*;
+import org.apache.zookeeper.KeeperException;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
import org.junit.experimental.categories.Category;
import com.google.protobuf.ServiceException;
@@ -115,6 +122,47 @@ public class TestAdmin {
}
@Test (timeout=300000)
+ public void testFailedCatalogTrackerGetCleansUpProperly()
+ throws ZooKeeperConnectionException, IOException {
+ // An HBaseAdmin that we can make fail when it goes to get catalogtracker.
+ final AtomicBoolean fail = new AtomicBoolean(false);
+ final AtomicReference<CatalogTracker> internalCt = new AtomicReference<CatalogTracker>();
+ HBaseAdmin doctoredAdmin = new HBaseAdmin(this.admin.getConfiguration()) {
+ @Override
+ protected CatalogTracker startCatalogTracker(CatalogTracker ct)
+ throws IOException, InterruptedException {
+ internalCt.set(ct);
+ super.startCatalogTracker(ct);
+ if (fail.get()) {
+ throw new IOException("Intentional test fail",
+ new KeeperException.ConnectionLossException());
+ }
+ return ct;
+ }
+ };
+ try {
+ CatalogTracker ct = doctoredAdmin.getCatalogTracker();
+ assertFalse(ct.isStopped());
+ doctoredAdmin.cleanupCatalogTracker(ct);
+ assertTrue(ct.isStopped());
+ // Now have mess with our doctored admin and make the start of catalog tracker 'fail'.
+ fail.set(true);
+ boolean expectedException = false;
+ try {
+ doctoredAdmin.getCatalogTracker();
+ } catch (IOException ioe) {
+ assertTrue(ioe.getCause() instanceof KeeperException.ConnectionLossException);
+ expectedException = true;
+ }
+ if (!expectedException) fail("Didn't get expected exception!");
+ // Assert that the internally made ct was properly shutdown.
+ assertTrue("Internal CatalogTracker not closed down", internalCt.get().isStopped());
+ } finally {
+ doctoredAdmin.close();
+ }
+ }
+
+ @Test (timeout=300000)
public void testSplitFlushCompactUnknownTable() throws InterruptedException {
final String unknowntable = "fubar";
Exception exception = null;