You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2020/06/09 03:26:15 UTC
[hbase] branch branch-2 updated: HBASE-24117 Shutdown
AssignmentManager before ProcedureExecutor may cause SCP to accidentally
skip assigning a region (#1865)
This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new c5dacfb HBASE-24117 Shutdown AssignmentManager before ProcedureExecutor may cause SCP to accidentally skip assigning a region (#1865)
c5dacfb is described below
commit c5dacfbbea2547a77bae47513089408eeabb61da
Author: Duo Zhang <zh...@apache.org>
AuthorDate: Tue Jun 9 11:07:16 2020 +0800
HBASE-24117 Shutdown AssignmentManager before ProcedureExecutor may cause SCP to accidentally skip assigning a region (#1865)
Signed-off-by: Michael Stack <st...@apache.org>
---
.../src/main/java/org/apache/hadoop/hbase/master/HMaster.java | 7 +++++--
.../apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java | 7 +++++++
.../hbase/master/assignment/TestCloseRegionWhileRSCrash.java | 2 +-
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index dd069d9..c497c12 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1502,6 +1502,11 @@ public class HMaster extends HRegionServer implements MasterServices {
LOG.debug("Stopping service threads");
+ // stop procedure executor prior to other services such as server manager and assignment
+ // manager, as these services are important for some running procedures. See HBASE-24117 for
+ // example.
+ stopProcedureExecutor();
+
if (this.quotaManager != null) {
this.quotaManager.stop();
}
@@ -1516,8 +1521,6 @@ public class HMaster extends HRegionServer implements MasterServices {
this.assignmentManager.stop();
}
- stopProcedureExecutor();
-
if (masterRegion != null) {
masterRegion.close(isAborted());
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 076c266..6ca8c0c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
@@ -485,6 +486,12 @@ public class ServerCrashProcedure
// UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
// in the way of our clearing out 'Unknown Servers'.
if (!isMatchingRegionLocation(regionNode)) {
+ // See HBASE-24117, though we have already changed the shutdown order, it is still worth
+ // double checking here to confirm that we do not skip assignment incorrectly.
+ if (!am.isRunning()) {
+ throw new DoNotRetryIOException(
+ "AssignmentManager has been stopped, can not process assignment any more");
+ }
LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
this, regionNode, serverName);
continue;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java
index 4761991..75f73e5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java
@@ -163,7 +163,7 @@ public class TestCloseRegionWhileRSCrash {
UTIL.shutdownMiniCluster();
}
- @org.junit.Ignore @Test // Until root-cause of flakeyness, HBASE-24117, is addressed.
+ @Test
public void testRetryBackoff() throws IOException, InterruptedException {
HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
RegionInfo region = srcRs.getRegions(TABLE_NAME).get(0).getRegionInfo();