You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by el...@apache.org on 2018/12/07 22:29:02 UTC

[19/51] [abbrv] hbase git commit: HBASE-21508 Ignore the reportRegionStateTransition call from a dead server

HBASE-21508 Ignore the reportRegionStateTransition call from a dead server

Signed-off-by: Guanghao Zhang <zg...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a0e3cb6c
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a0e3cb6c
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a0e3cb6c

Branch: refs/heads/HBASE-20952
Commit: a0e3cb6c0cddcb27ac82d01b21aeb0f547c6f13c
Parents: 27c0bf5
Author: Duo Zhang <zh...@apache.org>
Authored: Mon Nov 26 11:28:08 2018 +0800
Committer: Duo Zhang <zh...@apache.org>
Committed: Mon Nov 26 14:32:14 2018 +0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/master/HMaster.java |   3 +-
 .../master/assignment/AssignmentManager.java    | 205 ++++++++++---------
 .../hbase/master/assignment/RegionStates.java   |  16 --
 .../hbase/master/assignment/ServerState.java    |   5 +
 .../master/assignment/ServerStateNode.java      |  49 +++--
 .../assignment/TransitRegionStateProcedure.java |   8 +-
 .../master/procedure/ProcedureSyncWait.java     | 105 ++++++----
 .../master/procedure/ServerCrashProcedure.java  |   2 +-
 .../apache/hadoop/hbase/client/TestAdmin2.java  |   4 +-
 .../master/assignment/MockMasterServices.java   |  26 +--
 .../assignment/TestAssignmentManager.java       |   9 -
 .../assignment/TestAssignmentManagerBase.java   |   2 +-
 ...portRegionStateTransitionFromDeadServer.java | 201 ++++++++++++++++++
 13 files changed, 415 insertions(+), 220 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index e1d3740..ae04283 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -3800,8 +3800,7 @@ public class HMaster extends HRegionServer implements MasterServices {
     if (offload) {
       final List<ServerName> destServers = this.serverManager.createDestinationServersList();
       for (ServerName server : serversAdded) {
-        final List<RegionInfo> regionsOnServer =
-          this.assignmentManager.getRegionStates().getServerRegionInfoSet(server);
+        final List<RegionInfo> regionsOnServer = this.assignmentManager.getRegionsOnServer(server);
         for (RegionInfo hri : regionsOnServer) {
           ServerName dest = balancer.randomAssignment(hri, destServers);
           if (dest == null) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 37e5f0c..a564ea9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -53,7 +53,6 @@ import org.apache.hadoop.hbase.master.MetricsAssignmentManager;
 import org.apache.hadoop.hbase.master.RegionPlan;
 import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionState.State;
-import org.apache.hadoop.hbase.master.ServerListener;
 import org.apache.hadoop.hbase.master.TableStateManager;
 import org.apache.hadoop.hbase.master.balancer.FavoredStochasticBalancer;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
@@ -99,7 +98,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto
  * Unassigns are triggered by DisableTable, Split, Merge
  */
 @InterfaceAudience.Private
-public class AssignmentManager implements ServerListener {
+public class AssignmentManager {
   private static final Logger LOG = LoggerFactory.getLogger(AssignmentManager.class);
 
   // TODO: AMv2
@@ -193,9 +192,6 @@ public class AssignmentManager implements ServerListener {
 
     LOG.trace("Starting assignment manager");
 
-    // Register Server Listener
-    master.getServerManager().registerListener(this);
-
     // Start the Assignment Thread
     startAssignmentThread();
 
@@ -275,9 +271,6 @@ public class AssignmentManager implements ServerListener {
     // Stop the RegionStateStore
     regionStates.clear();
 
-    // Unregister Server Listener
-    master.getServerManager().unregisterListener(this);
-
     // Update meta events (for testing)
     if (hasProcExecutor) {
       metaLoadEvent.suspend();
@@ -319,14 +312,31 @@ public class AssignmentManager implements ServerListener {
     return regionStates;
   }
 
+  /**
+   * Returns the regions hosted by the specified server.
+   * <p/>
+   * Notice that, for SCP, after we submit the SCP, no one can change the region list for the
+   * ServerStateNode so we do not need any locks here. And for other usage, this can only give you a
+   * snapshot of the current region list for this server, which means, right after you get the
+   * region list, new regions may be moved to this server or some regions may be moved out from this
+   * server, so you should not use it critically if you need strong consistency.
+   */
+  public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
+    ServerStateNode serverInfo = regionStates.getServerNode(serverName);
+    if (serverInfo == null) {
+      return Collections.emptyList();
+    }
+    return serverInfo.getRegionInfoList();
+  }
+
   public RegionStateStore getRegionStateStore() {
     return regionStateStore;
   }
 
   public List<ServerName> getFavoredNodes(final RegionInfo regionInfo) {
-    return this.shouldAssignRegionsWithFavoredNodes?
-        ((FavoredStochasticBalancer)getBalancer()).getFavoredNodes(regionInfo):
-          ServerName.EMPTY_SERVER_LIST;
+    return this.shouldAssignRegionsWithFavoredNodes
+      ? ((FavoredStochasticBalancer) getBalancer()).getFavoredNodes(regionInfo)
+      : ServerName.EMPTY_SERVER_LIST;
   }
 
   // ============================================================================================
@@ -522,12 +532,11 @@ public class AssignmentManager implements ServerListener {
   }
 
   private List<RegionInfo> getSystemTables(ServerName serverName) {
-    Set<RegionStateNode> regions = this.getRegionStates().getServerNode(serverName).getRegions();
-    if (regions == null) {
+    ServerStateNode serverNode = regionStates.getServerNode(serverName);
+    if (serverNode == null) {
       return Collections.emptyList();
     }
-    return regions.stream().map(RegionStateNode::getRegionInfo)
-      .filter(r -> r.getTable().isSystemTable()).collect(Collectors.toList());
+    return serverNode.getSystemRegionInfoList();
   }
 
   private void preTransitCheck(RegionStateNode regionNode, RegionState.State[] expectedStates)
@@ -817,54 +826,79 @@ public class AssignmentManager implements ServerListener {
   // ============================================================================================
   //  RS Region Transition Report helpers
   // ============================================================================================
-  // TODO: Move this code in MasterRpcServices and call on specific event?
+  private void reportRegionStateTransition(ReportRegionStateTransitionResponse.Builder builder,
+      ServerName serverName, List<RegionStateTransition> transitionList) throws IOException {
+    for (RegionStateTransition transition : transitionList) {
+      switch (transition.getTransitionCode()) {
+        case OPENED:
+        case FAILED_OPEN:
+        case CLOSED:
+          assert transition.getRegionInfoCount() == 1 : transition;
+          final RegionInfo hri = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));
+          updateRegionTransition(serverName, transition.getTransitionCode(), hri,
+            transition.hasOpenSeqNum() ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM);
+          break;
+        case READY_TO_SPLIT:
+        case SPLIT:
+        case SPLIT_REVERTED:
+          assert transition.getRegionInfoCount() == 3 : transition;
+          final RegionInfo parent = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));
+          final RegionInfo splitA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));
+          final RegionInfo splitB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));
+          updateRegionSplitTransition(serverName, transition.getTransitionCode(), parent, splitA,
+            splitB);
+          break;
+        case READY_TO_MERGE:
+        case MERGED:
+        case MERGE_REVERTED:
+          assert transition.getRegionInfoCount() == 3 : transition;
+          final RegionInfo merged = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));
+          final RegionInfo mergeA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));
+          final RegionInfo mergeB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));
+          updateRegionMergeTransition(serverName, transition.getTransitionCode(), merged, mergeA,
+            mergeB);
+          break;
+      }
+    }
+  }
+
   public ReportRegionStateTransitionResponse reportRegionStateTransition(
       final ReportRegionStateTransitionRequest req) throws PleaseHoldException {
-    final ReportRegionStateTransitionResponse.Builder builder =
+    ReportRegionStateTransitionResponse.Builder builder =
         ReportRegionStateTransitionResponse.newBuilder();
-    final ServerName serverName = ProtobufUtil.toServerName(req.getServer());
+    ServerName serverName = ProtobufUtil.toServerName(req.getServer());
+    ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
+    // here we have to acquire a read lock instead of a simple exclusive lock. This is because that
+    // we should not block other reportRegionStateTransition call from the same region server. This
+    // is not only about performance, but also to prevent dead lock. Think of the meta region is
+    // also on the same region server and you hold the lock which blocks the
+    // reportRegionStateTransition for meta, and since meta is not online, you will block inside the
+    // lock protection to wait for meta online...
+    serverNode.readLock().lock();
     try {
-      for (RegionStateTransition transition: req.getTransitionList()) {
-        switch (transition.getTransitionCode()) {
-          case OPENED:
-          case FAILED_OPEN:
-          case CLOSED:
-            assert transition.getRegionInfoCount() == 1 : transition;
-            final RegionInfo hri = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));
-            updateRegionTransition(serverName, transition.getTransitionCode(), hri,
-                transition.hasOpenSeqNum() ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM);
-            break;
-          case READY_TO_SPLIT:
-          case SPLIT:
-          case SPLIT_REVERTED:
-            assert transition.getRegionInfoCount() == 3 : transition;
-            final RegionInfo parent = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));
-            final RegionInfo splitA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));
-            final RegionInfo splitB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));
-            updateRegionSplitTransition(serverName, transition.getTransitionCode(),
-              parent, splitA, splitB);
-            break;
-          case READY_TO_MERGE:
-          case MERGED:
-          case MERGE_REVERTED:
-            assert transition.getRegionInfoCount() == 3 : transition;
-            final RegionInfo merged = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));
-            final RegionInfo mergeA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));
-            final RegionInfo mergeB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));
-            updateRegionMergeTransition(serverName, transition.getTransitionCode(),
-              merged, mergeA, mergeB);
-            break;
+      // we only accept reportRegionStateTransition if the region server is online, see the comment
+      // above in submitServerCrash method and HBASE-21508 for more details.
+      if (serverNode.isInState(ServerState.ONLINE)) {
+        try {
+          reportRegionStateTransition(builder, serverName, req.getTransitionList());
+        } catch (PleaseHoldException e) {
+          LOG.trace("Failed transition ", e);
+          throw e;
+        } catch (UnsupportedOperationException | IOException e) {
+          // TODO: at the moment we have a single error message and the RS will abort
+          // if the master says that one of the region transitions failed.
+          LOG.warn("Failed transition", e);
+          builder.setErrorMessage("Failed transition " + e.getMessage());
         }
+      } else {
+        LOG.warn("The region server {} is already dead, skip reportRegionStateTransition call",
+          serverName);
+        builder.setErrorMessage("You are dead");
       }
-    } catch (PleaseHoldException e) {
-      LOG.trace("Failed transition ", e);
-      throw e;
-    } catch (UnsupportedOperationException|IOException e) {
-      // TODO: at the moment we have a single error message and the RS will abort
-      // if the master says that one of the region transitions failed.
-      LOG.warn("Failed transition", e);
-      builder.setErrorMessage("Failed transition " + e.getMessage());
+    } finally {
+      serverNode.readLock().unlock();
     }
+
     return builder.build();
   }
 
@@ -1017,9 +1051,6 @@ public class AssignmentManager implements ServerListener {
     }
     // The Heartbeat tells us of what regions are on the region serve, check the state.
     checkOnlineRegionsReport(serverNode, regionNames);
-
-    // wake report event
-    wakeServerReportEvent(serverNode);
   }
 
   // just check and output possible inconsistency, without actually doing anything
@@ -1061,18 +1092,6 @@ public class AssignmentManager implements ServerListener {
     }
   }
 
-  protected boolean waitServerReportEvent(ServerName serverName, Procedure<?> proc) {
-    final ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
-    if (serverNode == null) {
-      LOG.warn("serverName=null; {}", proc);
-    }
-    return serverNode.getReportEvent().suspendIfNotReady(proc);
-  }
-
-  protected void wakeServerReportEvent(final ServerStateNode serverNode) {
-    serverNode.getReportEvent().wake(getProcedureScheduler());
-  }
-
   // ============================================================================================
   //  RIT chore
   // ============================================================================================
@@ -1321,13 +1340,27 @@ public class AssignmentManager implements ServerListener {
     return 0;
   }
 
-  public long submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) {
-    boolean carryingMeta = isCarryingMeta(serverName);
-    ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();
-    long pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(),
-        serverName, shouldSplitWal, carryingMeta));
-    LOG.debug("Added=" + serverName
-        + " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);
+  public long submitServerCrash(ServerName serverName, boolean shouldSplitWal) {
+    boolean carryingMeta;
+    long pid;
+    ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
+    // we hold the write lock here for fencing on reportRegionStateTransition. Once we set the
+    // server state to CRASHED, we will no longer accept the reportRegionStateTransition call from
+    // this server. This is used to simplify the implementation for TRSP and SCP, where we can make
+    // sure that, the region list fetched by SCP will not be changed any more.
+    serverNode.writeLock().lock();
+    try {
+      serverNode.setState(ServerState.CRASHED);
+      carryingMeta = isCarryingMeta(serverName);
+      ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();
+      pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(), serverName,
+        shouldSplitWal, carryingMeta));
+    } finally {
+      serverNode.writeLock().unlock();
+    }
+    LOG.info(
+      "Added {} to dead servers which carryingMeta={}, submitted ServerCrashProcedure pid={}",
+      serverName, carryingMeta, pid);
     return pid;
   }
 
@@ -1847,22 +1880,6 @@ public class AssignmentManager implements ServerListener {
         .collect(Collectors.toList());
   }
 
-  // ============================================================================================
-  //  Server Helpers
-  // ============================================================================================
-  @Override
-  public void serverAdded(final ServerName serverName) {
-  }
-
-  @Override
-  public void serverRemoved(final ServerName serverName) {
-    final ServerStateNode serverNode = regionStates.getServerNode(serverName);
-    if (serverNode == null) return;
-
-    // just in case, wake procedures waiting for this server report
-    wakeServerReportEvent(serverNode);
-  }
-
   @VisibleForTesting
   MasterServices getMaster() {
     return master;

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 2b9c0bd..7b85409 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -357,22 +357,6 @@ public class RegionStates {
         ((hri.isOffline() || hri.isSplit()) && offline);
   }
 
-  /**
-   * Returns the set of regions hosted by the specified server
-   * @param serverName the server we are interested in
-   * @return set of RegionInfo hosted by the specified server
-   */
-  public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) {
-    ServerStateNode serverInfo = getServerNode(serverName);
-    if (serverInfo == null) {
-      return Collections.emptyList();
-    }
-
-    synchronized (serverInfo) {
-      return serverInfo.getRegionInfoList();
-    }
-  }
-
   // ============================================================================================
   // Split helpers
   // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
index 6925c42..3efe6e2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
@@ -30,6 +30,11 @@ enum ServerState {
   ONLINE,
 
   /**
+   * Indicate that the server has crashed, i.e., we have already scheduled a SCP for it.
+   */
+  CRASHED,
+
+  /**
    * Only server which carries meta can have this state. We will split wal for meta and then
    * assign meta first before splitting other wals.
    */

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
index 2042214..6f763aa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
@@ -17,12 +17,15 @@
  */
 package org.apache.hadoop.hbase.master.assignment;
 
-import java.util.ArrayList;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.stream.Collectors;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
 import org.apache.yetus.audience.InterfaceAudience;
 
 /**
@@ -31,23 +34,16 @@ import org.apache.yetus.audience.InterfaceAudience;
 @InterfaceAudience.Private
 class ServerStateNode implements Comparable<ServerStateNode> {
 
-  private static final class ServerReportEvent extends ProcedureEvent<ServerName> {
-    public ServerReportEvent(final ServerName serverName) {
-      super(serverName);
-    }
-  }
-
-  private final ServerReportEvent reportEvent;
-
   private final Set<RegionStateNode> regions;
   private final ServerName serverName;
 
+  private final ReadWriteLock lock = new ReentrantReadWriteLock();
+
   private volatile ServerState state = ServerState.ONLINE;
 
-  public ServerStateNode(final ServerName serverName) {
+  public ServerStateNode(ServerName serverName) {
     this.serverName = serverName;
     this.regions = ConcurrentHashMap.newKeySet();
-    this.reportEvent = new ServerReportEvent(serverName);
   }
 
   public ServerName getServerName() {
@@ -58,10 +54,6 @@ class ServerStateNode implements Comparable<ServerStateNode> {
     return state;
   }
 
-  public ProcedureEvent<?> getReportEvent() {
-    return reportEvent;
-  }
-
   public boolean isInState(final ServerState... expected) {
     boolean expectedState = false;
     if (expected != null) {
@@ -76,20 +68,17 @@ class ServerStateNode implements Comparable<ServerStateNode> {
     this.state = state;
   }
 
-  public Set<RegionStateNode> getRegions() {
-    return regions;
-  }
-
   public int getRegionCount() {
     return regions.size();
   }
 
-  public ArrayList<RegionInfo> getRegionInfoList() {
-    ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size());
-    for (RegionStateNode region : regions) {
-      hris.add(region.getRegionInfo());
-    }
-    return hris;
+  public List<RegionInfo> getRegionInfoList() {
+    return regions.stream().map(RegionStateNode::getRegionInfo).collect(Collectors.toList());
+  }
+
+  public List<RegionInfo> getSystemRegionInfoList() {
+    return regions.stream().filter(RegionStateNode::isSystemTable)
+      .map(RegionStateNode::getRegionInfo).collect(Collectors.toList());
   }
 
   public void addRegion(final RegionStateNode regionNode) {
@@ -100,6 +89,14 @@ class ServerStateNode implements Comparable<ServerStateNode> {
     this.regions.remove(regionNode);
   }
 
+  public Lock readLock() {
+    return lock.readLock();
+  }
+
+  public Lock writeLock() {
+    return lock.writeLock();
+  }
+
   @Override
   public int compareTo(final ServerStateNode other) {
     return getServerName().compareTo(other.getServerName());

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
index 90ebf7b..0885a6a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -500,11 +500,9 @@ public class TransitRegionStateProcedure
       case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
       case REGION_STATE_TRANSITION_CONFIRM_OPENED:
         // for these 3 states, the region may still be online on the crashed server
-        if (serverName.equals(regionNode.getRegionLocation())) {
-          env.getAssignmentManager().regionClosed(regionNode, false);
-          if (currentState != RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE) {
-            regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
-          }
+        env.getAssignmentManager().regionClosed(regionNode, false);
+        if (currentState != RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE) {
+          regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
         }
         break;
       default:

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
index c8ff9f8..9353124 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
@@ -61,51 +61,61 @@ public final class ProcedureSyncWait {
   }
 
   private static class ProcedureFuture implements Future<byte[]> {
-      private final ProcedureExecutor<MasterProcedureEnv> procExec;
-      private final Procedure<?> proc;
+    private final ProcedureExecutor<MasterProcedureEnv> procExec;
+    private final Procedure<?> proc;
 
-      private boolean hasResult = false;
-      private byte[] result = null;
+    private boolean hasResult = false;
+    private byte[] result = null;
 
-      public ProcedureFuture(ProcedureExecutor<MasterProcedureEnv> procExec, Procedure<?> proc) {
-        this.procExec = procExec;
-        this.proc = proc;
-      }
+    public ProcedureFuture(ProcedureExecutor<MasterProcedureEnv> procExec, Procedure<?> proc) {
+      this.procExec = procExec;
+      this.proc = proc;
+    }
 
-      @Override
-      public boolean cancel(boolean mayInterruptIfRunning) { return false; }
+    @Override
+    public boolean cancel(boolean mayInterruptIfRunning) {
+      return false;
+    }
 
-      @Override
-      public boolean isCancelled() { return false; }
+    @Override
+    public boolean isCancelled() {
+      return false;
+    }
 
-      @Override
-      public boolean isDone() { return hasResult; }
+    @Override
+    public boolean isDone() {
+      return hasResult;
+    }
 
-      @Override
-      public byte[] get() throws InterruptedException, ExecutionException {
-        if (hasResult) return result;
-        try {
-          return waitForProcedureToComplete(procExec, proc, Long.MAX_VALUE);
-        } catch (Exception e) {
-          throw new ExecutionException(e);
-        }
+    @Override
+    public byte[] get() throws InterruptedException, ExecutionException {
+      if (hasResult) {
+        return result;
+      }
+      try {
+        return waitForProcedureToComplete(procExec, proc, Long.MAX_VALUE);
+      } catch (Exception e) {
+        throw new ExecutionException(e);
       }
+    }
 
-      @Override
-      public byte[] get(long timeout, TimeUnit unit)
-          throws InterruptedException, ExecutionException, TimeoutException {
-        if (hasResult) return result;
-        try {
-          result = waitForProcedureToComplete(procExec, proc, unit.toMillis(timeout));
-          hasResult = true;
-          return result;
-        } catch (TimeoutIOException e) {
-          throw new TimeoutException(e.getMessage());
-        } catch (Exception e) {
-          throw new ExecutionException(e);
-        }
+    @Override
+    public byte[] get(long timeout, TimeUnit unit)
+        throws InterruptedException, ExecutionException, TimeoutException {
+      if (hasResult) {
+        return result;
+      }
+      try {
+        result = waitForProcedureToComplete(procExec, proc, unit.toMillis(timeout));
+        hasResult = true;
+        return result;
+      } catch (TimeoutIOException e) {
+        throw new TimeoutException(e.getMessage());
+      } catch (Exception e) {
+        throw new ExecutionException(e);
       }
     }
+  }
 
   public static Future<byte[]> submitProcedure(final ProcedureExecutor<MasterProcedureEnv> procExec,
       final Procedure<MasterProcedureEnv> proc) {
@@ -124,9 +134,8 @@ public final class ProcedureSyncWait {
   }
 
   public static byte[] waitForProcedureToCompleteIOE(
-      final ProcedureExecutor<MasterProcedureEnv> procExec,
-      final Procedure<?> proc, final long timeout)
-  throws IOException {
+      final ProcedureExecutor<MasterProcedureEnv> procExec, final Procedure<?> proc,
+      final long timeout) throws IOException {
     try {
       return waitForProcedureToComplete(procExec, proc, timeout);
     } catch (IOException e) {
@@ -139,7 +148,7 @@ public final class ProcedureSyncWait {
   public static byte[] waitForProcedureToComplete(
       final ProcedureExecutor<MasterProcedureEnv> procExec, final Procedure<?> proc,
       final long timeout) throws IOException {
-    waitFor(procExec.getEnvironment(), "pid=" + proc.getProcId(),
+    waitFor(procExec.getEnvironment(), timeout, "pid=" + proc.getProcId(),
       new ProcedureSyncWait.Predicate<Boolean>() {
         @Override
         public Boolean evaluate() throws IOException {
@@ -171,15 +180,25 @@ public final class ProcedureSyncWait {
 
   public static <T> T waitFor(MasterProcedureEnv env, String purpose, Predicate<T> predicate)
       throws IOException {
-    final Configuration conf = env.getMasterConfiguration();
-    final long waitTime = conf.getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
-    final long waitingTimeForEvents = conf.getInt("hbase.master.event.waiting.time", 1000);
+    Configuration conf = env.getMasterConfiguration();
+    long waitTime = conf.getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
+    return waitFor(env, waitTime, purpose, predicate);
+  }
+
+  public static <T> T waitFor(MasterProcedureEnv env, long waitTime, String purpose,
+      Predicate<T> predicate) throws IOException {
+    Configuration conf = env.getMasterConfiguration();
+    long waitingTimeForEvents = conf.getInt("hbase.master.event.waiting.time", 1000);
     return waitFor(env, waitTime, waitingTimeForEvents, purpose, predicate);
   }
 
   public static <T> T waitFor(MasterProcedureEnv env, long waitTime, long waitingTimeForEvents,
       String purpose, Predicate<T> predicate) throws IOException {
-    final long done = EnvironmentEdgeManager.currentTime() + waitTime;
+    long done = EnvironmentEdgeManager.currentTime() + waitTime;
+    if (done <= 0) {
+      // long overflow, usually this means we pass Long.MAX_VALUE as waitTime
+      done = Long.MAX_VALUE;
+    }
     boolean logged = false;
     do {
       T result = predicate.evaluate();

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 1fcc6eb..048bca8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -146,7 +146,7 @@ public class ServerCrashProcedure
           break;
         case SERVER_CRASH_GET_REGIONS:
           this.regionsOnCrashedServer =
-            services.getAssignmentManager().getRegionStates().getServerRegionInfoSet(serverName);
+            services.getAssignmentManager().getRegionsOnServer(serverName);
           // Where to go next? Depends on whether we should split logs at all or
           // if we should do distributed log splitting.
           if (!this.shouldSplitWal) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java
index 4ab1a8f..97c7b9b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java
@@ -713,12 +713,12 @@ public class TestAdmin2 {
     assertEquals(3, clusterRegionServers.size());
 
     HashMap<ServerName, List<RegionInfo>> serversToDecommssion = new HashMap<>();
-    // Get a server that has regions. We will decommission two of the servers,
+    // Get a server that has meta online. We will decommission two of the servers,
     // leaving one online.
     int i;
     for (i = 0; i < clusterRegionServers.size(); i++) {
       List<RegionInfo> regionsOnServer = admin.getRegions(clusterRegionServers.get(i));
-      if (regionsOnServer.size() > 0) {
+      if (admin.getRegions(clusterRegionServers.get(i)).stream().anyMatch(p -> p.isMetaRegion())) {
         serversToDecommssion.put(clusterRegionServers.get(i), regionsOnServer);
         break;
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
index 5a1f87d..56467cc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.master.assignment;
 import static org.mockito.ArgumentMatchers.any;
 
 import java.io.IOException;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
@@ -28,7 +27,6 @@ import java.util.SortedSet;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.CoordinatedStateManager;
-import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerMetricsBuilder;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableDescriptors;
@@ -51,7 +49,6 @@ import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher;
-import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
@@ -93,19 +90,15 @@ public class MockMasterServices extends MockNoopMasterServices {
   private final ClusterConnection connection;
   private final LoadBalancer balancer;
   private final ServerManager serverManager;
-  // Set of regions on a 'server'. Populated externally. Used in below faking 'cluster'.
-  private final NavigableMap<ServerName, SortedSet<byte []>> regionsToRegionServers;
 
-  private final ProcedureEvent initialized = new ProcedureEvent("master initialized");
+  private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized");
   public static final String DEFAULT_COLUMN_FAMILY_NAME = "cf";
   public static final ServerName MOCK_MASTER_SERVERNAME =
       ServerName.valueOf("mockmaster.example.org", 1234, -1L);
 
   public MockMasterServices(Configuration conf,
-      NavigableMap<ServerName, SortedSet<byte []>> regionsToRegionServers)
-  throws IOException {
+      NavigableMap<ServerName, SortedSet<byte[]>> regionsToRegionServers) throws IOException {
     super(conf);
-    this.regionsToRegionServers = regionsToRegionServers;
     Superusers.initialize(conf);
     this.fileSystemManager = new MasterFileSystem(conf);
     this.walManager = new MasterWalManager(this);
@@ -120,15 +113,6 @@ public class MockMasterServices extends MockNoopMasterServices {
       public boolean isTableDisabled(final TableName tableName) {
         return false;
       }
-
-      @Override
-      protected boolean waitServerReportEvent(ServerName serverName, Procedure proc) {
-        // Make a report with current state of the server 'serverName' before we call wait..
-        SortedSet<byte[]> regions = regionsToRegionServers.get(serverName);
-        getAssignmentManager().reportOnlineRegions(serverName,
-          regions == null ? new HashSet<byte[]>() : regions);
-        return super.waitServerReportEvent(serverName, proc);
-      }
     };
     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
     this.serverManager = new ServerManager(this);
@@ -176,7 +160,7 @@ public class MockMasterServices extends MockNoopMasterServices {
     this.assignmentManager.start();
     for (int i = 0; i < numServes; ++i) {
       ServerName sn = ServerName.valueOf("localhost", 100 + i, 1);
-      serverManager.regionServerReport(sn, new ServerLoad(ServerMetricsBuilder.of(sn)));
+      serverManager.regionServerReport(sn, ServerMetricsBuilder.of(sn));
     }
     this.procedureExecutor.getEnvironment().setEventReady(initialized, true);
   }
@@ -202,7 +186,7 @@ public class MockMasterServices extends MockNoopMasterServices {
       return;
     }
     ServerName sn = ServerName.valueOf(serverName.getAddress().toString(), startCode);
-    serverManager.regionServerReport(sn, new ServerLoad(ServerMetricsBuilder.of(sn)));
+    serverManager.regionServerReport(sn, ServerMetricsBuilder.of(sn));
   }
 
   @Override
@@ -260,7 +244,7 @@ public class MockMasterServices extends MockNoopMasterServices {
   }
 
   @Override
-  public ProcedureEvent getInitializedEvent() {
+  public ProcedureEvent<?> getInitializedEvent() {
     return this.initialized;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
index 94963a0..5ec7cc6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
@@ -27,7 +27,6 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.procedure2.util.StringUtils;
@@ -49,14 +48,6 @@ public class TestAssignmentManager extends TestAssignmentManagerBase {
 
   private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManager.class);
 
-  @Test(expected = NullPointerException.class)
-  public void testWaitServerReportEventWithNullServer() throws UnexpectedStateException {
-    // Test what happens if we pass in null server. I'd expect it throws NPE.
-    if (this.am.waitServerReportEvent(null, null)) {
-      throw new UnexpectedStateException();
-    }
-  }
-
   @Test
   public void testAssignWithGoodExec() throws Exception {
     // collect AM metrics before test

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
index 5f5a576..7ab37bc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
@@ -186,7 +186,7 @@ public abstract class TestAssignmentManagerBase {
 
   protected byte[] waitOnFuture(final Future<byte[]> future) throws Exception {
     try {
-      return future.get(5, TimeUnit.SECONDS);
+      return future.get(60, TimeUnit.SECONDS);
     } catch (ExecutionException e) {
       LOG.info("ExecutionException", e);
       Exception ee = (Exception) e.getCause();

http://git-wip-us.apache.org/repos/asf/hbase/blob/a0e3cb6c/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java
new file mode 100644
index 0000000..6c9e5eb
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java
@@ -0,0 +1,201 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import static org.junit.Assert.assertNotEquals;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.PleaseHoldException;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestReportRegionStateTransitionFromDeadServer {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestReportRegionStateTransitionFromDeadServer.class);
+
+  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
+
+  private static CountDownLatch ARRIVE_GET_REGIONS;
+  private static CountDownLatch RESUME_GET_REGIONS;
+  private static CountDownLatch ARRIVE_REPORT;
+  private static CountDownLatch RESUME_REPORT;
+
+  private static final class ServerManagerForTest extends ServerManager {
+
+    public ServerManagerForTest(MasterServices master) {
+      super(master);
+    }
+
+    @Override
+    public List<ServerName> createDestinationServersList() {
+      return super.createDestinationServersList(EXCLUDE_SERVERS);
+    }
+  }
+
+  private static final class AssignmentManagerForTest extends AssignmentManager {
+
+    public AssignmentManagerForTest(MasterServices master) {
+      super(master);
+    }
+
+    @Override
+    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
+      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
+      if (ARRIVE_GET_REGIONS != null) {
+        ARRIVE_GET_REGIONS.countDown();
+        try {
+          RESUME_GET_REGIONS.await();
+        } catch (InterruptedException e) {
+        }
+      }
+      return regions;
+    }
+
+    @Override
+    public ReportRegionStateTransitionResponse reportRegionStateTransition(
+        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
+      if (ARRIVE_REPORT != null && req.getTransitionList().stream()
+        .allMatch(t -> !ProtobufUtil.toRegionInfo(t.getRegionInfo(0)).isMetaRegion())) {
+        ARRIVE_REPORT.countDown();
+        try {
+          RESUME_REPORT.await();
+        } catch (InterruptedException e) {
+        }
+      }
+      return super.reportRegionStateTransition(req);
+    }
+  }
+
+  public static final class HMasterForTest extends HMaster {
+
+    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+      super(conf);
+    }
+
+    @Override
+    protected AssignmentManager createAssignmentManager(MasterServices master) {
+      return new AssignmentManagerForTest(master);
+    }
+
+    @Override
+    protected ServerManager createServerManager(MasterServices master) throws IOException {
+      setupClusterConnection();
+      return new ServerManagerForTest(master);
+    }
+  }
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  private static TableName NAME = TableName.valueOf("Report");
+
+  private static byte[] CF = Bytes.toBytes("cf");
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
+    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
+    UTIL.startMiniCluster(3);
+    UTIL.getAdmin().balancerSwitch(false, true);
+    UTIL.createTable(NAME, CF);
+    UTIL.waitTableAvailable(NAME);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void test() throws HBaseIOException, InterruptedException, ExecutionException {
+    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
+    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
+    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
+
+    // move from rs0 to rs1, and then kill rs0. Later add rs1 to exclude servers, and at last verify
+    // that the region should not be on rs1 and rs2 both.
+    HRegionServer rs0 = UTIL.getMiniHBaseCluster().getRegionServer(rsn.getRegionLocation());
+    HRegionServer rs1 = UTIL.getOtherRegionServer(rs0);
+    HRegionServer rs2 = UTIL.getMiniHBaseCluster().getRegionServerThreads().stream()
+      .map(t -> t.getRegionServer()).filter(rs -> rs != rs0 && rs != rs1).findAny().get();
+
+    RESUME_REPORT = new CountDownLatch(1);
+    ARRIVE_REPORT = new CountDownLatch(1);
+    Future<?> future =
+      am.moveAsync(new RegionPlan(region, rs0.getServerName(), rs1.getServerName()));
+    ARRIVE_REPORT.await();
+
+    RESUME_GET_REGIONS = new CountDownLatch(1);
+    ARRIVE_GET_REGIONS = new CountDownLatch(1);
+    rs0.abort("For testing!");
+
+    ARRIVE_GET_REGIONS.await();
+    RESUME_REPORT.countDown();
+
+    try {
+      future.get(15, TimeUnit.SECONDS);
+    } catch (TimeoutException e) {
+      // after the fix in HBASE-21508 we will get this exception as the TRSP can not be finished any
+      // more before SCP interrupts it. It's OK.
+    }
+
+    EXCLUDE_SERVERS.add(rs1.getServerName());
+    RESUME_GET_REGIONS.countDown();
+    // wait until there are no running procedures, no SCP and no TRSP
+    UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
+      .getActiveProcIds().isEmpty());
+    boolean onRS1 = !rs1.getRegions(NAME).isEmpty();
+    boolean onRS2 = !rs2.getRegions(NAME).isEmpty();
+    assertNotEquals(
+      "should either be on rs1 or rs2, but onRS1 is " + onRS1 + " and on RS2 is " + onRS2, onRS1,
+      onRS2);
+  }
+}