You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ps...@apache.org on 2021/03/11 16:21:06 UTC
[hbase] branch master updated: HBASE-25587 [hbck2] Schedule SCP for
all unknown servers (#2978)
This is an automated email from the ASF dual-hosted git repository.
psomogyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new 0cc1ae4 HBASE-25587 [hbck2] Schedule SCP for all unknown servers (#2978)
0cc1ae4 is described below
commit 0cc1ae48ed2768bcc0aea32d55823266ac141926
Author: Peter Somogyi <ps...@apache.org>
AuthorDate: Thu Mar 11 17:20:36 2021 +0100
HBASE-25587 [hbck2] Schedule SCP for all unknown servers (#2978)
Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
.../org/apache/hadoop/hbase/client/HBaseHbck.java | 16 +++++++
.../java/org/apache/hadoop/hbase/client/Hbck.java | 2 +
.../src/main/protobuf/server/master/Master.proto | 10 ++++
.../hadoop/hbase/master/MasterRpcServices.java | 25 ++++++++++
.../apache/hadoop/hbase/master/ServerManager.java | 11 +++++
.../hadoop/hbase/master/procedure/TestHBCKSCP.java | 30 +++++++-----
.../hbase/master/procedure/TestHBCKSCPUnknown.java | 55 ++++++++++++++++++++++
7 files changed, 137 insertions(+), 12 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
index 2562593..d153ef7 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckServic
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RegionSpecifierAndState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ScheduleSCPsForUnknownServersRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ScheduleSCPsForUnknownServersResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ScheduleServerCrashProcedureResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.UnassignsResponse;
@@ -201,6 +203,20 @@ public class HBaseHbck implements Hbck {
}
@Override
+ public List<Long> scheduleSCPsForUnknownServers() throws IOException {
+ try {
+ ScheduleSCPsForUnknownServersResponse response =
+ this.hbck.scheduleSCPsForUnknownServers(
+ rpcControllerFactory.newController(),
+ ScheduleSCPsForUnknownServersRequest.newBuilder().build());
+ return response.getPidList();
+ } catch (ServiceException se) {
+ LOG.debug("Failed to run ServerCrashProcedures for unknown servers", se);
+ throw new IOException(se);
+ }
+ }
+
+ @Override
public boolean runHbckChore() throws IOException {
try {
RunHbckChoreResponse response = this.hbck.runHbckChore(rpcControllerFactory.newController(),
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
index 42e8594..c518a87 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
@@ -129,6 +129,8 @@ public interface Hbck extends Abortable, Closeable {
List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames) throws IOException;
+ List<Long> scheduleSCPsForUnknownServers() throws IOException;
+
/**
* Request HBCK chore to run at master side.
*
diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
index 118ce77..45b4149 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
@@ -1225,6 +1225,12 @@ message ScheduleServerCrashProcedureResponse {
repeated uint64 pid = 1;
}
+message ScheduleSCPsForUnknownServersRequest {}
+
+message ScheduleSCPsForUnknownServersResponse {
+ repeated uint64 pid = 1;
+}
+
message FixMetaRequest {}
message FixMetaResponse {}
@@ -1264,6 +1270,10 @@ service HbckService {
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
returns(ScheduleServerCrashProcedureResponse);
+ /** Schedule a ServerCrashProcedure for unknown servers */
+ rpc ScheduleSCPsForUnknownServers(ScheduleSCPsForUnknownServersRequest)
+ returns(ScheduleSCPsForUnknownServersResponse);
+
/**
* Request HBCK chore to run at master side.
*/
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 8f2f0da..21239db 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -2732,6 +2732,31 @@ public class MasterRpcServices extends RSRpcServices implements
}
@Override
+ public MasterProtos.ScheduleSCPsForUnknownServersResponse scheduleSCPsForUnknownServers(
+ RpcController controller, MasterProtos.ScheduleSCPsForUnknownServersRequest request)
+ throws ServiceException {
+
+ List<Long> pids = new ArrayList<>();
+ final Set<ServerName> serverNames =
+ master.getAssignmentManager().getRegionStates().getRegionStates().stream()
+ .map(RegionState::getServerName).collect(Collectors.toSet());
+
+ final Set<ServerName> unknownServerNames = serverNames.stream()
+ .filter(sn -> master.getServerManager().isServerUnknown(sn)).collect(Collectors.toSet());
+
+ for (ServerName sn: unknownServerNames) {
+ LOG.info("{} schedule ServerCrashProcedure for unknown {}",
+ this.master.getClientIdAuditPrefix(), sn);
+ if (shouldSubmitSCP(sn)) {
+ pids.add(this.master.getServerManager().expireServer(sn, true));
+ } else {
+ pids.add(Procedure.NO_PROC_ID);
+ }
+ }
+ return MasterProtos.ScheduleSCPsForUnknownServersResponse.newBuilder().addAllPid(pids).build();
+ }
+
+ @Override
public FixMetaResponse fixMeta(RpcController controller, FixMetaRequest request)
throws ServiceException {
rpcPreCheck("fixMeta");
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 7bbfd0b..6c4020b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -901,6 +901,17 @@ public class ServerManager {
return serverName == null || deadservers.isDeadServer(serverName);
}
+ /**
+ * Check if a server is unknown. A server can be online,
+ * or known to be dead, or unknown to this manager (i.e, not online,
+ * not known to be dead either; it is simply not tracked by the
+ * master any more, for example, a very old previous instance).
+ */
+ public boolean isServerUnknown(ServerName serverName) {
+ return serverName == null
+ || (!onlineServers.containsKey(serverName) && !deadservers.isDeadServer(serverName));
+ }
+
public void shutdownCluster() {
String statusStr = "Cluster shutdown requested of master=" + this.master.getServerName();
LOG.info(statusStr);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
index dda7422..a24b49d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCP.java
@@ -40,12 +40,8 @@ import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
-
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
-
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
@@ -57,6 +53,10 @@ import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
+
/**
* Test of the HBCK-version of SCP.
@@ -109,14 +109,15 @@ public class TestHBCKSCP extends TestSCPBase {
// not be processing this server 'normally'. Remove it from processing by
// calling 'finish' and then remove it from dead servers so rsServerName
// becomes an 'Unknown Server' even though it is still around.
+ LOG.info("Killing {}", rsServerName);
+ cluster.killRegionServer(rsServerName);
+
master.getServerManager().moveFromOnlineToDeadServers(rsServerName);
master.getServerManager().getDeadServers().removeDeadServer(rsServerName);
master.getAssignmentManager().getRegionStates().removeServer(rsServerName);
// Kill the server. Nothing should happen since an 'Unknown Server' as far
// as the Master is concerned; i.e. no SCP.
- LOG.info("Killing {}", rsServerName);
HRegionServer hrs = cluster.getRegionServer(rsServerName);
- hrs.abort("KILLED");
while (!hrs.isStopped()) {
Threads.sleep(10);
}
@@ -134,12 +135,7 @@ public class TestHBCKSCP extends TestSCPBase {
// I now have 'Unknown Server' references in hbase:meta; i.e. Server references
// with no corresponding SCP. Queue one.
- MasterProtos.ScheduleServerCrashProcedureResponse response =
- master.getMasterRpcServices().scheduleServerCrashProcedure(null,
- MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder().
- addServerName(ProtobufUtil.toServerName(rsServerName)).build());
- assertEquals(1, response.getPidCount());
- long pid = response.getPid(0);
+ long pid = scheduleHBCKSCP(rsServerName, master);
assertNotEquals(Procedure.NO_PROC_ID, pid);
while (master.getMasterProcedureExecutor().getActiveProcIds().contains(pid)) {
Threads.sleep(10);
@@ -155,6 +151,16 @@ public class TestHBCKSCP extends TestSCPBase {
assertFalse(searchMeta(master, rsServerName));
}
+ protected long scheduleHBCKSCP(ServerName rsServerName, HMaster master) throws ServiceException {
+ MasterProtos.ScheduleServerCrashProcedureResponse response =
+ master.getMasterRpcServices().scheduleServerCrashProcedure(null,
+ MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder().
+ addServerName(ProtobufUtil.toServerName(rsServerName)).build());
+ assertEquals(1, response.getPidCount());
+ long pid = response.getPid(0);
+ return pid;
+ }
+
/**
* @return True if we find reference to <code>sn</code> in meta table.
*/
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCPUnknown.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCPUnknown.java
new file mode 100644
index 0000000..6702f40
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestHBCKSCPUnknown.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
+
+
+/**
+ * Test of the HBCK-version of SCP.
+ * The HBCKSCP is an SCP only it reads hbase:meta for list of Regions that were
+ * on the server-to-process rather than consult Master in-memory-state.
+ */
+@Category({ MasterTests.class, LargeTests.class })
+public class TestHBCKSCPUnknown extends TestHBCKSCP {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestHBCKSCPUnknown.class);
+
+ @Override
+ protected long scheduleHBCKSCP(ServerName rsServerName, HMaster master) throws ServiceException {
+ MasterProtos.ScheduleSCPsForUnknownServersResponse response =
+ master.getMasterRpcServices().scheduleSCPsForUnknownServers(null,
+ MasterProtos.ScheduleSCPsForUnknownServersRequest.newBuilder().build());
+ assertEquals(1, response.getPidCount());
+ long pid = response.getPid(0);
+ return pid;
+ }
+}