You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by hu...@apache.org on 2021/03/18 18:36:08 UTC
[hbase] branch branch-2.3 updated: HBASE-25639 meta replica state
is not respected during active master switch (#3052)
This is an automated email from the ASF dual-hosted git repository.
huaxiangsun pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 0a4ddd6 HBASE-25639 meta replica state is not respected during active master switch (#3052)
0a4ddd6 is described below
commit 0a4ddd6c3bcf50ce7f3563e1699145d38cdf9de0
Author: huaxiangsun <hu...@apache.org>
AuthorDate: Thu Mar 18 11:35:46 2021 -0700
HBASE-25639 meta replica state is not respected during active master switch (#3052)
Signed-off-by: stack <st...@duboce.net>
---
.../org/apache/hadoop/hbase/master/HMaster.java | 2 +-
.../hadoop/hbase/master/MasterMetaBootstrap.java | 27 +++---
.../hbase/master/assignment/AssignmentManager.java | 36 +++++---
.../master/TestMasterFailoverWithMetaReplica.java | 102 +++++++++++++++++++++
4 files changed, 138 insertions(+), 29 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index b78f7f3..5f8b63d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1018,7 +1018,7 @@ public class HMaster extends HRegionServer implements MasterServices {
RegionState rs = this.assignmentManager.getRegionStates().
getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO);
LOG.info("hbase:meta {}", rs);
- if (rs != null && rs.isOffline()) {
+ if ((rs == null) || (rs != null && rs.isOffline())) {
Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream()
.filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
initMetaProc = optProc.orElseGet(() -> {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index da8d228..800ae97 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
@@ -61,25 +60,25 @@ class MasterMetaBootstrap {
throw new IllegalStateException("hbase:meta must be initialized first before we can " +
"assign out its replicas");
}
- ServerName metaServername = MetaTableLocator.getMetaRegionLocation(this.master.getZooKeeper());
+
for (int i = 1; i < numReplicas; i++) {
// Get current meta state for replica from zk.
- RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), i);
RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
- RegionInfoBuilder.FIRST_META_REGIONINFO, i);
- LOG.debug(hri.getRegionNameAsString() + " replica region state from zookeeper=" + metaState);
- if (metaServername.equals(metaState.getServerName())) {
- metaState = null;
- LOG.info(hri.getRegionNameAsString() +
- " old location is same as current hbase:meta location; setting location as null...");
- }
+ RegionInfoBuilder.FIRST_META_REGIONINFO, i);
+
+ RegionState rs = assignmentManager.getRegionStates().getRegionState(hri);
+ LOG.debug(hri.getRegionNameAsString() + " replica region state from zookeeper=" + rs);
+
// These assigns run inline. All is blocked till they complete. Only interrupt is shutting
// down hosting server which calls AM#stop.
- if (metaState != null && metaState.getServerName() != null) {
- // Try to retain old assignment.
- assignmentManager.assignAsync(hri, metaState.getServerName());
- } else {
+ if (rs == null) {
assignmentManager.assignAsync(hri);
+ } else if (rs != null && rs.isOffline()) {
+ if (rs.getServerName() != null) {
+ assignmentManager.assignAsync(hri, rs.getServerName());
+ } else {
+ assignmentManager.assignAsync(hri);
+ }
}
}
unassignExcessMetaReplica(numReplicas);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index edc7dee..880de2a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.RegionStatesCount;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.TableState;
@@ -225,21 +226,28 @@ public class AssignmentManager {
ZKWatcher zkw = master.getZooKeeper();
// it could be null in some tests
if (zkw != null) {
- // here we are still in the early steps of active master startup. There is only one thread(us)
- // can access AssignmentManager and create region node, so here we do not need to lock the
- // region node.
- RegionState regionState = MetaTableLocator.getMetaRegionState(zkw);
- RegionStateNode regionNode =
- regionStates.getOrCreateRegionStateNode(RegionInfoBuilder.FIRST_META_REGIONINFO);
- regionNode.setRegionLocation(regionState.getServerName());
- regionNode.setState(regionState.getState());
- if (regionNode.getProcedure() != null) {
- regionNode.getProcedure().stateLoaded(this, regionNode);
- }
- if (regionState.getServerName() != null) {
- regionStates.addRegionToServer(regionNode);
+ List<String> metaZNodes = zkw.getMetaReplicaNodes();
+ LOG.debug("hbase:meta replica znodes: {}", metaZNodes);
+ for (String metaZNode : metaZNodes) {
+ int replicaId = zkw.getZNodePaths().getMetaReplicaIdFromZnode(metaZNode);
+ // here we are still in the early steps of active master startup. There is only one thread(us)
+ // can access AssignmentManager and create region node, so here we do not need to lock the
+ // region node.
+ RegionState regionState = MetaTableLocator.getMetaRegionState(zkw, replicaId);
+ RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionState.getRegion());
+ regionNode.setRegionLocation(regionState.getServerName());
+ regionNode.setState(regionState.getState());
+ if (regionNode.getProcedure() != null) {
+ regionNode.getProcedure().stateLoaded(this, regionNode);
+ }
+ if (regionState.getServerName() != null) {
+ regionStates.addRegionToServer(regionNode);
+ }
+ if (RegionReplicaUtil.isDefaultReplica(replicaId)) {
+ setMetaAssigned(regionState.getRegion(), regionState.getState() == State.OPEN);
+ }
+ LOG.debug("Loaded hbase:meta {}", regionNode);
}
- setMetaAssigned(regionState.getRegion(), regionState.getState() == State.OPEN);
}
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailoverWithMetaReplica.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailoverWithMetaReplica.java
new file mode 100644
index 0000000..c58dc06
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailoverWithMetaReplica.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import java.util.List;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.StartMiniClusterOption;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({MasterTests.class, LargeTests.class})
+public class TestMasterFailoverWithMetaReplica {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestMasterFailoverWithMetaReplica.class);
+ private static final int num_of_meta_replica = 2;
+
+ /**
+ * Test that during master failover, there is no state change for meta replica regions
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testMasterFailoverWithMetaReplica() throws Exception {
+ // Start the cluster
+ HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, num_of_meta_replica);
+
+ StartMiniClusterOption option = StartMiniClusterOption.builder()
+ .numMasters(2).numRegionServers(num_of_meta_replica).build();
+ TEST_UTIL.startMiniCluster(option);
+ MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+
+ assertTrue(cluster.waitForActiveAndReadyMaster());
+ HMaster oldMaster = cluster.getMaster();
+
+ // Make sure meta replica regions are assigned.
+ for (int replicaId = 1; replicaId < num_of_meta_replica; replicaId++) {
+ RegionInfo h = RegionReplicaUtil
+ .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
+ AssignmentTestingUtil.waitForAssignment(oldMaster.getAssignmentManager(), h);
+ }
+
+ int oldProcedureNum = oldMaster.getProcedures().size();
+
+ int activeIndex = -1;
+ List<JVMClusterUtil.MasterThread> masterThreads = cluster.getMasterThreads();
+
+ for (int i = 0; i < masterThreads.size(); i++) {
+ if (masterThreads.get(i).getMaster().isActiveMaster()) {
+ activeIndex = i;
+ }
+ }
+
+ // Stop the active master and wait for new master to come online.
+ cluster.stopMaster(activeIndex);
+ cluster.waitOnMaster(activeIndex);
+ assertTrue(cluster.waitForActiveAndReadyMaster());
+ // double check this is actually a new master
+ HMaster newMaster = cluster.getMaster();
+ assertFalse(oldMaster == newMaster);
+
+ int newProcedureNum = newMaster.getProcedures().size();
+
+ // Make sure all region servers report back and there is no new procedures.
+ assertEquals(newMaster.getServerManager().getOnlineServers().size(), num_of_meta_replica);
+ assertEquals(oldProcedureNum, newProcedureNum);
+
+ // Stop the cluster
+ TEST_UTIL.shutdownMiniCluster();
+ }
+}