You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/01/24 09:50:50 UTC

[08/34] hbase git commit: HBASE-19794 TestZooKeeper hangs

HBASE-19794 TestZooKeeper hangs

Kill backup master first

Add some cleanup around NamespaceManager

Shorten the timeout waiting on namespace manager as workaround
until we have better soln for interrupting ongoing client rpcs.
Do it in general for all tests.

Signed-off-by: zhangduo <zh...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/27d00f58
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/27d00f58
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/27d00f58

Branch: refs/heads/HBASE-19064
Commit: 27d00f5861336cb9f8f01174be33f8c3c431980f
Parents: 7fe4aa6
Author: Michael Stack <st...@apache.org>
Authored: Sat Jan 20 20:11:10 2018 +0800
Committer: zhangduo <zh...@apache.org>
Committed: Sun Jan 21 20:59:16 2018 +0800

----------------------------------------------------------------------
 .../src/main/resources/hbase-default.xml        |  6 ++++
 .../apache/hadoop/hbase/ZKNamespaceManager.java |  4 +++
 .../hbase/master/ClusterSchemaServiceImpl.java  |  4 +++
 .../org/apache/hadoop/hbase/master/HMaster.java |  8 ++++--
 .../hbase/master/TableNamespaceManager.java     | 27 +++++++++++++++++-
 .../apache/hadoop/hbase/MiniHBaseCluster.java   | 16 +++++++++--
 .../hbase/client/TestZKAsyncRegistry.java       | 30 +++++++++-----------
 7 files changed, 72 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-common/src/main/resources/hbase-default.xml
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index bfae33c..26865de 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -1802,4 +1802,10 @@ possible configurations would overwhelm and obscure the important.
       Number of rows in a batch operation above which a warning will be logged.
     </description>
   </property>
+  <property>
+    <name>hbase.master.wait.on.service.seconds</name>
+    <value>30</value>
+    <description>Default is 5 minutes. Make it 30 seconds for tests. See
+    HBASE-19794 for some context.</description>
+  </property>
 </configuration>

http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java
index c6dc405..53b2439 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java
@@ -76,6 +76,10 @@ public class ZKNamespaceManager extends ZKListener {
     }
   }
 
+  public void stop() throws IOException {
+    this.watcher.unregisterListener(this);
+  }
+
   public NamespaceDescriptor get(String name) {
     return cache.get(name);
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java
index 4527bc0..4dd8de0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java
@@ -69,6 +69,10 @@ class ClusterSchemaServiceImpl extends AbstractService implements ClusterSchemaS
   protected void doStop() {
     // This is no stop for the table manager.
     notifyStopped();
+    TableNamespaceManager tnsm = getTableNamespaceManager();
+    if (tnsm != null) {
+      tnsm.stop("Stopping");
+    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 0dd7f62..09b18bc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -2692,9 +2692,11 @@ public class HMaster extends HRegionServer implements MasterServices {
 
   @Override
   public void stop(String msg) {
-    super.stop(msg);
-    if (this.activeMasterManager != null) {
-      this.activeMasterManager.stop();
+    if (!isStopped()) {
+      super.stop(msg);
+      if (this.activeMasterManager != null) {
+        this.activeMasterManager.stop();
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java
index 5de7dc5..47b27f4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ZKNamespaceManager;
 import org.apache.hadoop.hbase.client.Delete;
@@ -68,8 +69,9 @@ import org.slf4j.LoggerFactory;
 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC",
   justification="TODO: synchronize access on nsTable but it is done in tiers above and this " +
     "class is going away/shrinking")
-public class TableNamespaceManager {
+public class TableNamespaceManager implements Stoppable {
   private static final Logger LOG = LoggerFactory.getLogger(TableNamespaceManager.class);
+  private volatile boolean stopped = false;
 
   private Configuration conf;
   private MasterServices masterServices;
@@ -368,4 +370,27 @@ public class TableNamespaceManager {
     }
     return maxRegions;
   }
+
+  @Override
+  public boolean isStopped() {
+    return this.stopped;
+  }
+
+  @Override
+  public void stop(String why) {
+    if (this.stopped) {
+      return;
+    }
+    try {
+      this.zkNamespaceManager.stop();
+    } catch (IOException ioe) {
+      LOG.warn("Failed NamespaceManager close", ioe);
+    }
+    try {
+      this.nsTable.close();
+    } catch (IOException ioe) {
+      LOG.warn("Failed Namespace Table close", ioe);
+    }
+    this.stopped = true;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
index 3a40477..378f6ec 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
@@ -837,12 +837,22 @@ public class MiniHBaseCluster extends HBaseCluster {
    * impossible to bring the mini-cluster back for clean shutdown.
    */
   public void killAll() {
+    // Do backups first.
+    MasterThread activeMaster = null;
+    for (MasterThread masterThread : getMasterThreads()) {
+      if (!masterThread.getMaster().isActiveMaster()) {
+        masterThread.getMaster().abort("killAll");
+      } else {
+        activeMaster = masterThread;
+      }
+    }
+    // Do active after.
+    if (activeMaster != null) {
+      activeMaster.getMaster().abort("killAll");
+    }
     for (RegionServerThread rst : getRegionServerThreads()) {
       rst.getRegionServer().abort("killAll");
     }
-    for (MasterThread masterThread : getMasterThreads()) {
-      masterThread.getMaster().abort("killAll", new Throwable());
-    }
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/27d00f58/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java
index 3478549..4f9a71f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java
@@ -27,10 +27,8 @@ import java.io.IOException;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.IntStream;
-
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.ClusterId;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionLocation;
@@ -104,7 +102,7 @@ public class TestZKAsyncRegistry {
     String expectedClusterId = TEST_UTIL.getHBaseCluster().getMaster().getClusterId();
     assertEquals("Expected " + expectedClusterId + ", found=" + clusterId,
         expectedClusterId, clusterId);
-    assertEquals(TEST_UTIL.getHBaseCluster().getClusterStatus().getServersSize(),
+    assertEquals(TEST_UTIL.getHBaseCluster().getClusterMetrics().getLiveServerMetrics().size(),
       REGISTRY.getCurrentNrHRS().get().intValue());
     assertEquals(TEST_UTIL.getHBaseCluster().getMaster().getServerName(),
       REGISTRY.getMasterAddress().get());
@@ -122,19 +120,19 @@ public class TestZKAsyncRegistry {
 
   @Test
   public void testIndependentZKConnections() throws IOException {
-    ReadOnlyZKClient zk1 = REGISTRY.getZKClient();
-
-    Configuration otherConf = new Configuration(TEST_UTIL.getConfiguration());
-    otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1");
-    try (ZKAsyncRegistry otherRegistry = new ZKAsyncRegistry(otherConf)) {
-      ReadOnlyZKClient zk2 = otherRegistry.getZKClient();
-
-      assertNotSame("Using a different configuration / quorum should result in different backing " +
-          "zk connection.",
-        zk1, zk2);
-      assertNotEquals(
-        "Using a different configrution / quorum should be reflected in the " + "zk connection.",
-        zk1.getConnectString(), zk2.getConnectString());
+    try (ReadOnlyZKClient zk1 = REGISTRY.getZKClient()) {
+      Configuration otherConf = new Configuration(TEST_UTIL.getConfiguration());
+      otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1");
+      try (ZKAsyncRegistry otherRegistry = new ZKAsyncRegistry(otherConf)) {
+        ReadOnlyZKClient zk2 = otherRegistry.getZKClient();
+        assertNotSame("Using a different configuration / quorum should result in different " +
+          "backing zk connection.", zk1, zk2);
+        assertNotEquals(
+          "Using a different configrution / quorum should be reflected in the zk connection.",
+          zk1.getConnectString(), zk2.getConnectString());
+      }
+    } finally {
+      LOG.info("DONE!");
     }
   }
 }