You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/07/03 14:43:59 UTC
[hbase] branch branch-2.2 updated: HBASE-24562: Stabilize master
startup with meta replicas enabled (#1997)
This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.2 by this push:
new 899422b HBASE-24562: Stabilize master startup with meta replicas enabled (#1997)
899422b is described below
commit 899422be44b4b2c5fbd3b9fa79ba782d965ac0d8
Author: BukrosSzabolcs <bu...@gmail.com>
AuthorDate: Fri Jul 3 16:43:41 2020 +0200
HBASE-24562: Stabilize master startup with meta replicas enabled (#1997)
Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
.../org/apache/hadoop/hbase/master/HMaster.java | 6 +-
.../hadoop/hbase/master/MasterMetaBootstrap.java | 4 +-
.../hbase/master/assignment/AssignmentManager.java | 34 +++++++++-
.../hadoop/hbase/client/TestMetaWithReplicas.java | 77 ++++++++++++++++++++++
4 files changed, 115 insertions(+), 6 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index e87560c..0dc2997 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1131,7 +1131,11 @@ public class HMaster extends HRegionServer implements MasterServices {
assignmentManager.checkIfShouldMoveSystemRegionAsync();
status.setStatus("Assign meta replicas");
MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
- metaBootstrap.assignMetaReplicas();
+ try {
+ metaBootstrap.assignMetaReplicas();
+ } catch (IOException | KeeperException e){
+ LOG.error("Assigning meta replica failed: ", e);
+ }
status.setStatus("Starting quota manager");
initQuotaManager();
if (QuotaUtil.isQuotaEnabled(conf)) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index e57817e..1cf6cf1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -81,9 +81,9 @@ class MasterMetaBootstrap {
// down hosting server which calls AM#stop.
if (metaState != null && metaState.getServerName() != null) {
// Try to retain old assignment.
- assignmentManager.assign(hri, metaState.getServerName());
+ assignmentManager.assignAsync(hri, metaState.getServerName());
} else {
- assignmentManager.assign(hri);
+ assignmentManager.assignAsync(hri);
}
}
unassignExcessMetaReplica(numReplicas);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 9a5796f..8afe691 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -589,9 +589,9 @@ public class AssignmentManager {
}
}
- // TODO: Need an async version of this for hbck2.
- public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
- // TODO: should we use getRegionStateNode?
+ private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn)
+ throws IOException {
+ // TODO: should we use getRegionStateNode?
RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);
TransitRegionStateProcedure proc;
regionNode.lock();
@@ -602,6 +602,12 @@ public class AssignmentManager {
} finally {
regionNode.unlock();
}
+ return proc;
+ }
+
+ // TODO: Need an async version of this for hbck2.
+ public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
+ TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);
return proc.getProcId();
}
@@ -610,6 +616,28 @@ public class AssignmentManager {
return assign(regionInfo, null);
}
+ /**
+ * Submits a procedure that assigns a region to a target server without waiting for it to finish
+ * @param regionInfo the region we would like to assign
+ * @param sn target server name
+ * @return submitProcedure
+ * @throws IOException if preTransitCheck fails
+ */
+ public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+ TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
+ return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
+ }
+
+ /**
+ * Submits a procedure that assigns a region without waiting for it to finish
+ * @param regionInfo the region we would like to assign
+ * @return submitProcedure
+ * @throws IOException if preTransitCheck fails
+ */
+ public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException {
+ return assignAsync(regionInfo, null);
+ }
+
public long unassign(RegionInfo regionInfo) throws IOException {
RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);
if (regionNode == null) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
index ddf5688..cbfac28 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
@@ -23,12 +23,14 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
@@ -41,8 +43,12 @@ import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
import org.apache.hadoop.hbase.testclassification.LargeTests;
@@ -52,7 +58,9 @@ import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
+import org.apache.zookeeper.KeeperException;
import org.junit.After;
+import org.junit.Assert;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
@@ -350,4 +358,73 @@ public class TestMetaWithReplicas {
assertNotEquals(3, i);
}
}
+
+ @Test
+ public void testFailedReplicaAssigment() throws InterruptedException, IOException {
+ //using our rigged master, to force a failed meta replica assignment
+ TEST_UTIL.getMiniHBaseCluster().getConfiguration().setClass(HConstants.MASTER_IMPL,
+ BrokenMetaReplicaMaster.class, HMaster.class);
+ TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
+ HMaster newMaster = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
+ //waiting for master to come up
+ TEST_UTIL.waitFor(30000, () -> newMaster.isInitialized());
+ TEST_UTIL.getMiniHBaseCluster().getConfiguration().unset(HConstants.MASTER_IMPL);
+
+ AssignmentManager am = newMaster.getAssignmentManager();
+ //showing one of the replicas got assigned
+ RegionInfo metaReplicaHri = RegionReplicaUtil.getRegionInfoForReplica(
+ RegionInfoBuilder.FIRST_META_REGIONINFO, 1);
+ TEST_UTIL.waitFor(30000, () ->
+ am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri) != null &&
+ am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri).getRegionLocation()
+ != null);
+ RegionStateNode metaReplicaRegionNode =
+ am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri);
+ Assert.assertNotNull(metaReplicaRegionNode.getRegionLocation());
+ //showing one of the replicas failed to be assigned
+ RegionInfo metaReplicaHri2 = RegionReplicaUtil.getRegionInfoForReplica(
+ RegionInfoBuilder.FIRST_META_REGIONINFO, 2);
+ RegionStateNode metaReplicaRegionNode2 =
+ am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri2);
+ Assert.assertNull(metaReplicaRegionNode2.getRegionLocation());
+
+ //showing master is active and running
+ Assert.assertFalse(newMaster.isStopping());
+ Assert.assertFalse(newMaster.isStopped());
+ Assert.assertTrue(newMaster.isActiveMaster());
+ }
+
+ public static class BrokenTransitRegionStateProcedure extends TransitRegionStateProcedure {
+ protected BrokenTransitRegionStateProcedure() {
+ //super(env, hri, assignCandidate, forceNewPlan, type);
+ super(null, null, null, false,TransitionType.ASSIGN);
+ }
+ }
+
+ public static class BrokenMetaReplicaMaster extends HMaster{
+ public BrokenMetaReplicaMaster(final Configuration conf) throws IOException, KeeperException {
+ super(conf);
+ }
+
+ @Override
+ public AssignmentManager createAssignmentManager(MasterServices master) {
+ return new BrokenMasterMetaAssignmentManager(master);
+ }
+ }
+
+ public static class BrokenMasterMetaAssignmentManager extends AssignmentManager{
+ MasterServices master;
+ public BrokenMasterMetaAssignmentManager(final MasterServices master) {
+ super(master);
+ this.master = master;
+ }
+
+ public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+ RegionStateNode regionNode = getRegionStates().getOrCreateRegionStateNode(regionInfo);
+ if (regionNode.getRegionInfo().getReplicaId() == 2) {
+ regionNode.setProcedure(new BrokenTransitRegionStateProcedure());
+ }
+ return super.assignAsync(regionInfo, sn);
+ }
+ }
}