You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/06/26 17:09:34 UTC
[hbase] branch branch-2.3 updated: HBASE-24562: Stabilize master
startup with meta replicas enabled (#1903)
This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 5e09359 HBASE-24562: Stabilize master startup with meta replicas enabled (#1903)
5e09359 is described below
commit 5e0935977daba8755a4a02e3fb9f85d5f6225428
Author: BukrosSzabolcs <bu...@gmail.com>
AuthorDate: Wed Jun 24 19:38:36 2020 +0200
HBASE-24562: Stabilize master startup with meta replicas enabled (#1903)
Signed-off-by: Wellington Chevreuil <wc...@apache.org>
Signed-off-by: Huaxiang Sun <hu...@apache.com>
(cherry picked from commit 8cdb2cca4461d6adad3f44af001055848a205370)
---
.../org/apache/hadoop/hbase/master/HMaster.java | 6 +-
.../hadoop/hbase/master/MasterMetaBootstrap.java | 4 +-
.../hbase/master/assignment/AssignmentManager.java | 34 ++++++++-
.../hadoop/hbase/client/TestMetaWithReplicas.java | 83 ++++++++++++++++++++--
4 files changed, 115 insertions(+), 12 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 689e2a9..211f157 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1140,7 +1140,11 @@ public class HMaster extends HRegionServer implements MasterServices {
assignmentManager.checkIfShouldMoveSystemRegionAsync();
status.setStatus("Assign meta replicas");
MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
- metaBootstrap.assignMetaReplicas();
+ try {
+ metaBootstrap.assignMetaReplicas();
+ } catch (IOException | KeeperException e){
+ LOG.error("Assigning meta replica failed: ", e);
+ }
status.setStatus("Starting quota manager");
initQuotaManager();
if (QuotaUtil.isQuotaEnabled(conf)) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index e57817e..1cf6cf1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -81,9 +81,9 @@ class MasterMetaBootstrap {
// down hosting server which calls AM#stop.
if (metaState != null && metaState.getServerName() != null) {
// Try to retain old assignment.
- assignmentManager.assign(hri, metaState.getServerName());
+ assignmentManager.assignAsync(hri, metaState.getServerName());
} else {
- assignmentManager.assign(hri);
+ assignmentManager.assignAsync(hri);
}
}
unassignExcessMetaReplica(numReplicas);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index e64d838..1df8678 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -589,9 +589,9 @@ public class AssignmentManager {
}
}
- // TODO: Need an async version of this for hbck2.
- public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
- // TODO: should we use getRegionStateNode?
+ private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn)
+ throws IOException {
+ // TODO: should we use getRegionStateNode?
RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);
TransitRegionStateProcedure proc;
regionNode.lock();
@@ -602,6 +602,12 @@ public class AssignmentManager {
} finally {
regionNode.unlock();
}
+ return proc;
+ }
+
+ // TODO: Need an async version of this for hbck2.
+ public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
+ TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);
return proc.getProcId();
}
@@ -610,6 +616,28 @@ public class AssignmentManager {
return assign(regionInfo, null);
}
+ /**
+ * Submits a procedure that assigns a region to a target server without waiting for it to finish
+ * @param regionInfo the region we would like to assign
+ * @param sn target server name
+ * @return
+ * @throws IOException
+ */
+ public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+ TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
+ return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
+ }
+
+ /**
+ * Submits a procedure that assigns a region without waiting for it to finish
+ * @param regionInfo the region we would like to assign
+ * @return
+ * @throws IOException
+ */
+ public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException {
+ return assignAsync(regionInfo, null);
+ }
+
public long unassign(RegionInfo regionInfo) throws IOException {
RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);
if (regionNode == null) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
index 5db646f..a5a98a6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
@@ -23,12 +23,14 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
@@ -42,9 +44,12 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.StartMiniClusterOption;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
-import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
@@ -54,6 +59,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
import org.junit.After;
+import org.junit.Assert;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
@@ -63,6 +69,8 @@ import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+
/**
* Tests the scenarios where replicas are enabled for the meta table
*/
@@ -163,7 +171,7 @@ public class TestMetaWithReplicas {
conf.get("zookeeper.znode.metaserver", "meta-region-server"));
// check that the data in the znode is parseable (this would also mean the znode exists)
byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
- ProtobufUtil.toServerName(data);
+ ProtobufUtil.parseServerNameFrom(data);
for (int i = 1; i < 3; i++) {
String secZnode = ZNodePaths.joinZNode(baseZNode,
conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i);
@@ -171,7 +179,7 @@ public class TestMetaWithReplicas {
assertTrue(str.equals(secZnode));
// check that the data in the znode is parseable (this would also mean the znode exists)
data = ZKUtil.getData(zkw, secZnode);
- ProtobufUtil.toServerName(data);
+ ProtobufUtil.parseServerNameFrom(data);
}
}
@@ -198,7 +206,7 @@ public class TestMetaWithReplicas {
String primaryMetaZnode = ZNodePaths.joinZNode(baseZNode,
conf.get("zookeeper.znode.metaserver", "meta-region-server"));
byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
- ServerName primary = ProtobufUtil.toServerName(data);
+ ServerName primary = ProtobufUtil.parseServerNameFrom(data);
LOG.info("Primary=" + primary.toString());
TableName TABLE = TableName.valueOf("testShutdownHandling");
@@ -304,7 +312,7 @@ public class TestMetaWithReplicas {
conf.get("zookeeper.znode.metaserver", "meta-region-server"));
// check that the data in the znode is parseable (this would also mean the znode exists)
byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
- ServerName currentServer = ProtobufUtil.toServerName(data);
+ ServerName currentServer = ProtobufUtil.parseServerNameFrom(data);
Collection<ServerName> liveServers = TEST_UTIL.getAdmin()
.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet();
ServerName moveToServer = null;
@@ -326,7 +334,7 @@ public class TestMetaWithReplicas {
do {
Thread.sleep(10);
data = ZKUtil.getData(zkw, primaryMetaZnode);
- currentServer = ProtobufUtil.toServerName(data);
+ currentServer = ProtobufUtil.parseServerNameFrom(data);
i++;
} while (!moveToServer.equals(currentServer) && i < max); //wait for 10 seconds overall
assertNotEquals(max, i);
@@ -353,4 +361,67 @@ public class TestMetaWithReplicas {
assertNotEquals(3, i);
}
}
+
+ @Test
+ public void testFailedReplicaAssigment() throws InterruptedException, IOException {
+ //using our rigged master, to force a failed meta replica assignment
+ TEST_UTIL.getMiniHBaseCluster().getConfiguration().setClass(HConstants.MASTER_IMPL, BrokenMetaReplicaMaster.class, HMaster.class);
+ TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
+ HMaster newMaster = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
+ //waiting for master to come up
+ TEST_UTIL.waitFor(30000, () -> newMaster.isInitialized());
+ TEST_UTIL.getMiniHBaseCluster().getConfiguration().unset(HConstants.MASTER_IMPL);
+
+
+ AssignmentManager am = newMaster.getAssignmentManager();
+ //showing one of the replicas got assigned
+ RegionInfo metaReplicaHri = RegionReplicaUtil.getRegionInfoForReplica(
+ RegionInfoBuilder.FIRST_META_REGIONINFO, 1);
+ RegionStateNode metaReplicaRegionNode = am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri);
+ Assert.assertNotNull(metaReplicaRegionNode.getRegionLocation());
+ //showing one of the replicas failed to be assigned
+ RegionInfo metaReplicaHri2 = RegionReplicaUtil.getRegionInfoForReplica(
+ RegionInfoBuilder.FIRST_META_REGIONINFO, 2);
+ RegionStateNode metaReplicaRegionNode2 = am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri2);
+ Assert.assertNull(metaReplicaRegionNode2.getRegionLocation());
+
+ //showing master is active and running
+ Assert.assertFalse(newMaster.isStopping());
+ Assert.assertFalse(newMaster.isStopped());
+ Assert.assertTrue(newMaster.isActiveMaster());
+ }
+
+ public static class BrokenTransitRegionStateProcedure extends TransitRegionStateProcedure {
+ protected BrokenTransitRegionStateProcedure() {
+ //super(env, hri, assignCandidate, forceNewPlan, type);
+ super(null, null, null, false,TransitionType.ASSIGN);
+ }
+ }
+
+ public static class BrokenMetaReplicaMaster extends HMaster{
+ public BrokenMetaReplicaMaster(final Configuration conf) throws IOException {
+ super(conf);
+ }
+
+ @Override
+ public AssignmentManager createAssignmentManager(MasterServices master) {
+ return new BrokenMasterMetaAssignmentManager(master);
+ }
+ }
+
+ public static class BrokenMasterMetaAssignmentManager extends AssignmentManager{
+ MasterServices master;
+ public BrokenMasterMetaAssignmentManager(final MasterServices master) {
+ super(master);
+ this.master = master;
+ }
+
+ public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+ RegionStateNode regionNode = getRegionStates().getOrCreateRegionStateNode(regionInfo);
+ if (regionNode.getRegionInfo().getReplicaId() == 2) {
+ regionNode.setProcedure(new BrokenTransitRegionStateProcedure());
+ }
+ return super.assignAsync(regionInfo, sn);
+ }
+ }
}