You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by je...@apache.org on 2015/06/09 18:47:57 UTC

hbase git commit: HBASE-13845 Expire of one region server carrying meta can bring down the master

Repository: hbase
Updated Branches:
  refs/heads/branch-1 14db858a2 -> d37d9c43d


HBASE-13845 Expire of one region server carrying meta can bring down the master


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/d37d9c43
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/d37d9c43
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/d37d9c43

Branch: refs/heads/branch-1
Commit: d37d9c43de6c919a58ff34548a36af1e22e6cc2a
Parents: 14db858
Author: Jerry He <je...@apache.org>
Authored: Tue Jun 9 09:47:33 2015 -0700
Committer: Jerry He <je...@apache.org>
Committed: Tue Jun 9 09:47:33 2015 -0700

----------------------------------------------------------------------
 .../master/procedure/ServerCrashProcedure.java  |   4 +-
 .../hbase/master/TestMetaShutdownHandler.java   | 143 +++++++++++++++++++
 2 files changed, 146 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/d37d9c43/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 63d99e7..6eeaf6f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -707,7 +707,9 @@ implements ServerProcedureInterface {
       services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
     } else if (serverName.equals(services.getMetaTableLocator().
         getMetaRegionLocation(services.getZooKeeper()))) {
-      throw new IOException("hbase:meta is onlined on the dead server " + this.serverName);
+      // hbase:meta seems to be still alive on the server whom master is expiring
+      // and thinks is dying. Let's re-assign the hbase:meta anyway.
+      services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
     } else {
       LOG.info("Skip assigning hbase:meta because it is online at "
           + services.getMetaTableLocator().getMetaRegionLocation(services.getZooKeeper()));

http://git-wip-us.apache.org/repos/asf/hbase/blob/d37d9c43/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java
new file mode 100644
index 0000000..619d367
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaShutdownHandler.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CoordinatedStateManager;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.Waiter;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Tests handling of meta-carrying region server failover.
+ */
+@Category(MediumTests.class)
+public class TestMetaShutdownHandler {
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  final static Configuration conf = TEST_UTIL.getConfiguration();
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster(1, 3, null, null, MyRegionServer.class);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * This test will test the expire handling of a meta-carrying
+   * region server.
+   * After HBaseMiniCluster is up, we will delete the ephemeral
+   * node of the meta-carrying region server, which will trigger
+   * the expire of this region server on the master.
+   * On the other hand, we will slow down the abort process on
+   * the region server so that it is still up during the master SSH.
+   * We will check that the master SSH is still successfully done.
+   */
+  @Test (timeout=180000)
+  public void testExpireMetaRegionServer() throws Exception {
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+
+    HMaster master = cluster.getMaster();
+    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
+    ServerName metaServerName = regionStates.getRegionServerOfRegion(
+      HRegionInfo.FIRST_META_REGIONINFO);
+    if (master.getServerName().equals(metaServerName) || metaServerName == null
+        || !metaServerName.equals(cluster.getServerHoldingMeta())) {
+      // Move meta off master
+      metaServerName = cluster.getLiveRegionServerThreads()
+          .get(0).getRegionServer().getServerName();
+      master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
+        Bytes.toBytes(metaServerName.getServerName()));
+      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
+    }
+    RegionState metaState =
+        MetaTableLocator.getMetaRegionState(master.getZooKeeper());
+    assertEquals("Meta should be not in transition",
+      metaState.getState(), RegionState.State.OPEN);
+    assertNotEquals("Meta should be moved off master",
+      metaServerName, master.getServerName());
+
+    // Delete the ephemeral node of the meta-carrying region server.
+    // This is trigger the expire of this region server on the master.
+    String rsEphemeralNodePath =
+        ZKUtil.joinZNode(master.getZooKeeper().rsZNode, metaServerName.toString());
+    ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath);
+    // Wait for SSH to finish
+    final ServerManager serverManager = master.getServerManager();
+    final ServerName priorMetaServerName = metaServerName;
+    TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return !serverManager.isServerOnline(priorMetaServerName)
+            && !serverManager.areDeadServersInProgress();
+      }
+    });
+
+    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
+    // Now, make sure meta is assigned
+    assertTrue("Meta should be assigned",
+      regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
+    // Now, make sure meta is registered in zk
+    metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
+    assertEquals("Meta should be not in transition",
+      metaState.getState(), RegionState.State.OPEN);
+    assertEquals("Meta should be assigned", metaState.getServerName(),
+      regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
+    assertNotEquals("Meta should be assigned on a different server",
+      metaState.getServerName(), metaServerName);
+  }
+
+  public static class MyRegionServer extends MiniHBaseClusterRegionServer {
+
+    public MyRegionServer(Configuration conf, CoordinatedStateManager cp)
+      throws IOException, KeeperException,
+        InterruptedException {
+      super(conf, cp);
+    }
+
+    @Override
+    public void abort(String reason, Throwable cause) {
+      // sleep to slow down the region server abort
+      try {
+        Thread.sleep(30*1000);
+      } catch (InterruptedException e) {
+        return;
+      }
+      super.abort(reason, cause);
+    }
+  }
+}