You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2019/05/13 08:17:42 UTC

[hbase] branch branch-2 updated: HBASE-22365 Region may be opened on two RegionServers

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 4b4c3ba  HBASE-22365 Region may be opened on two RegionServers
4b4c3ba is described below

commit 4b4c3ba6e7f221702c2ed11a902819158319b3b1
Author: zhangduo <zh...@apache.org>
AuthorDate: Fri May 10 20:50:59 2019 +0800

    HBASE-22365 Region may be opened on two RegionServers
---
 .../hbase/master/assignment/AssignmentManager.java | 101 ++++++----
 .../master/assignment/CloseRegionProcedure.java    |  22 ++-
 .../master/assignment/OpenRegionProcedure.java     |  64 +++---
 .../assignment/RegionRemoteProcedureBase.java      |  49 +++--
 .../assignment/TransitRegionStateProcedure.java    |  11 +-
 .../master/assignment/TestSCPGetRegionsRace.java   | 218 +++++++++++++++++++++
 6 files changed, 381 insertions(+), 84 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index c2af566..1dc6c18 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -213,6 +213,9 @@ public class AssignmentManager {
       try {
         regionNode.setRegionLocation(regionState.getServerName());
         regionNode.setState(regionState.getState());
+        if (regionNode.getProcedure() != null) {
+          regionNode.getProcedure().stateLoaded(this, regionNode);
+        }
         setMetaAssigned(regionState.getRegion(), regionState.getState() == State.OPEN);
       } finally {
         regionNode.unlock();
@@ -235,14 +238,12 @@ public class AssignmentManager {
       TransitRegionStateProcedure existingProc = regionNode.getProcedure();
       if (existingProc != null) {
         // This is possible, as we will detach the procedure from the RSN before we
-        // actually finish the procedure. This is because that, we will update the region state
-        // directly in the reportTransition method for TRSP, and theoretically the region transition
-        // has been done, so we need to detach the procedure from the RSN. But actually the
-        // procedure has not been marked as done in the pv2 framework yet, so it is possible that we
-        // schedule a new TRSP immediately and when arriving here, we will find out that there are
-        // multiple TRSPs for the region. But we can make sure that, only the last one can take the
-        // charge, the previous ones should have all been finished already.
-        // So here we will compare the proc id, the greater one will win.
+        // actually finish the procedure. This is because that, we will detach the TRSP from the RSN
+        // during execution, at that time, the procedure has not been marked as done in the pv2
+        // framework yet, so it is possible that we schedule a new TRSP immediately and when
+        // arriving here, we will find out that there are multiple TRSPs for the region. But we can
+        // make sure that, only the last one can take the charge, the previous ones should have all
+        // been finished already. So here we will compare the proc id, the greater one will win.
         if (existingProc.getProcId() < proc.getProcId()) {
           // the new one wins, unset and set it to the new one below
           regionNode.unsetProcedure(existingProc);
@@ -1301,7 +1302,6 @@ public class AssignmentManager {
           // In any of these cases, state is empty. For now, presume OFFLINE but there are probably
           // cases where we need to probe more to be sure this correct; TODO informed by experience.
           LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);
-
           localState = State.OFFLINE;
         }
         RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);
@@ -1322,6 +1322,9 @@ public class AssignmentManager {
         } else if (localState == State.OFFLINE || regionInfo.isOffline()) {
           regionStates.addToOfflineRegions(regionNode);
         }
+        if (regionNode.getProcedure() != null) {
+          regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode);
+        }
       }
     });
 
@@ -1482,8 +1485,9 @@ public class AssignmentManager {
   private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN };
 
   // ============================================================================================
-  //  Region Status update
-  //  Should only be called in TransitRegionStateProcedure
+  // Region Status update
+  // Should only be called in TransitRegionStateProcedure(and related procedures), as the locking
+  // and pre-assumptions are very tricky.
   // ============================================================================================
   private void transitStateAndUpdate(RegionStateNode regionNode, RegionState.State newState,
       RegionState.State... expectedStates) throws IOException {
@@ -1512,7 +1516,7 @@ public class AssignmentManager {
     metrics.incrementOperationCounter();
   }
 
-  // should be called within the synchronized block of RegionStateNode.
+  // should be called under the RegionStateNode lock
   // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then
   // we will persist the FAILED_OPEN state into hbase:meta.
   void regionFailedOpen(RegionStateNode regionNode, boolean giveUp) throws IOException {
@@ -1538,24 +1542,7 @@ public class AssignmentManager {
     }
   }
 
-  // should be called within the synchronized block of RegionStateNode
-  void regionOpened(RegionStateNode regionNode) throws IOException {
-    // TODO: OPENING Updates hbase:meta too... we need to do both here and there?
-    // That is a lot of hbase:meta writing.
-    transitStateAndUpdate(regionNode, State.OPEN, STATES_EXPECTED_ON_OPEN);
-    RegionInfo hri = regionNode.getRegionInfo();
-    if (isMetaRegion(hri)) {
-      // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
-      // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager
-      // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state
-      // on table that contains state.
-      setMetaAssigned(hri, true);
-    }
-    regionStates.addRegionToServer(regionNode);
-    regionStates.removeFromFailedOpen(hri);
-  }
-
-  // should be called within the synchronized block of RegionStateNode
+  // should be called under the RegionStateNode lock
   void regionClosing(RegionStateNode regionNode) throws IOException {
     transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING);
 
@@ -1569,18 +1556,36 @@ public class AssignmentManager {
     metrics.incrementOperationCounter();
   }
 
-  // should be called within the synchronized block of RegionStateNode
-  // The parameter 'normally' means whether we are closed cleanly, if it is true, then it means that
-  // we are closed due to a RS crash.
-  void regionClosed(RegionStateNode regionNode, boolean normally) throws IOException {
-    RegionState.State state = regionNode.getState();
+  // for open and close, they will first be persist to the procedure store in
+  // RegionRemoteProcedureBase. So here we will first change the in memory state as it is considered
+  // as succeeded if the persistence to procedure store is succeeded, and then when the
+  // RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta.
+
+  // should be called under the RegionStateNode lock
+  void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException {
+    regionNode.transitionState(State.OPEN, STATES_EXPECTED_ON_OPEN);
+    RegionInfo regionInfo = regionNode.getRegionInfo();
+    regionStates.addRegionToServer(regionNode);
+    regionStates.removeFromFailedOpen(regionInfo);
+  }
+
+  // should be called under the RegionStateNode lock
+  void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException {
     ServerName regionLocation = regionNode.getRegionLocation();
-    if (normally) {
-      regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED);
-    } else {
-      // For SCP
-      regionNode.transitionState(State.ABNORMALLY_CLOSED);
+    regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED);
+    regionNode.setRegionLocation(null);
+    if (regionLocation != null) {
+      regionNode.setLastHost(regionLocation);
+      regionStates.removeRegionFromServer(regionLocation, regionNode);
     }
+  }
+
+  // should be called under the RegionStateNode lock
+  // for SCP
+  void regionClosedAbnormally(RegionStateNode regionNode) throws IOException {
+    RegionState.State state = regionNode.getState();
+    ServerName regionLocation = regionNode.getRegionLocation();
+    regionNode.transitionState(State.ABNORMALLY_CLOSED);
     regionNode.setRegionLocation(null);
     boolean succ = false;
     try {
@@ -1599,6 +1604,22 @@ public class AssignmentManager {
     }
   }
 
+  void persistToMeta(RegionStateNode regionNode) throws IOException {
+    regionStateStore.updateRegionLocation(regionNode);
+    RegionInfo regionInfo = regionNode.getRegionInfo();
+    if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) {
+      // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
+      // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager
+      // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state
+      // on table that contains state.
+      setMetaAssigned(regionInfo, true);
+    }
+  }
+
+  // ============================================================================================
+  // The above methods can only be called in TransitRegionStateProcedure(and related procedures)
+  // ============================================================================================
+
   public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName,
       final RegionInfo daughterA, final RegionInfo daughterB) throws IOException {
     // Update hbase:meta. Parent will be marked offline and split up in hbase:meta.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
index 4fd60f0..af31966 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
+import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
@@ -90,8 +91,8 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase {
   }
 
   @Override
-  protected void reportTransition(RegionStateNode regionNode, TransitionCode transitionCode,
-      long seqId) throws IOException {
+  protected void checkTransition(RegionStateNode regionNode, TransitionCode transitionCode,
+      long seqId) throws UnexpectedStateException {
     if (transitionCode != TransitionCode.CLOSED) {
       throw new UnexpectedStateException("Received report unexpected " + transitionCode +
         " transition, " + regionNode.toShortString() + ", " + this + ", expected CLOSED.");
@@ -99,8 +100,19 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase {
   }
 
   @Override
-  protected void updateTransition(MasterProcedureEnv env, RegionStateNode regionNode,
-      TransitionCode transitionCode, long seqId) throws IOException {
-    env.getAssignmentManager().regionClosed(regionNode, true);
+  protected void updateTransitionWithoutPersistingToMeta(MasterProcedureEnv env,
+      RegionStateNode regionNode, TransitionCode transitionCode, long seqId) throws IOException {
+    assert transitionCode == TransitionCode.CLOSED;
+    env.getAssignmentManager().regionClosedWithoutPersistingToMeta(regionNode);
+  }
+
+  @Override
+  protected void restoreSucceedState(AssignmentManager am, RegionStateNode regionNode, long seqId)
+      throws IOException {
+    if (regionNode.getState() == State.CLOSED) {
+      // should have already been persisted, ignore
+      return;
+    }
+    am.regionClosedWithoutPersistingToMeta(regionNode);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
index 579b757..4f7fcef 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
+import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
@@ -77,19 +78,31 @@ public class OpenRegionProcedure extends RegionRemoteProcedureBase {
     return env.getAssignmentManager().getAssignmentManagerMetrics().getOpenProcMetrics();
   }
 
+  private void regionOpenedWithoutPersistingToMeta(AssignmentManager am, RegionStateNode regionNode,
+      TransitionCode transitionCode, long openSeqNum) throws IOException {
+    if (openSeqNum < regionNode.getOpenSeqNum()) {
+      LOG.warn(
+        "Received report {} transition from {} for {}, pid={} but the new openSeqNum {}" +
+          " is less than the current one {}, ignoring...",
+        transitionCode, targetServer, regionNode, getProcId(), openSeqNum,
+        regionNode.getOpenSeqNum());
+    } else {
+      regionNode.setOpenSeqNum(openSeqNum);
+    }
+    am.regionOpenedWithoutPersistingToMeta(regionNode);
+  }
+
   @Override
-  protected void reportTransition(RegionStateNode regionNode, TransitionCode transitionCode,
-      long seqId) throws IOException {
+  protected void checkTransition(RegionStateNode regionNode, TransitionCode transitionCode,
+      long openSeqNum) throws UnexpectedStateException {
     switch (transitionCode) {
       case OPENED:
-        // this is the openSeqNum
-        if (seqId < 0) {
+        if (openSeqNum < 0) {
           throw new UnexpectedStateException("Received report unexpected " + TransitionCode.OPENED +
-            " transition openSeqNum=" + seqId + ", " + regionNode + ", proc=" + this);
+            " transition openSeqNum=" + openSeqNum + ", " + regionNode + ", proc=" + this);
         }
         break;
       case FAILED_OPEN:
-        // nothing to check
         break;
       default:
         throw new UnexpectedStateException(
@@ -99,27 +112,26 @@ public class OpenRegionProcedure extends RegionRemoteProcedureBase {
   }
 
   @Override
-  protected void updateTransition(MasterProcedureEnv env, RegionStateNode regionNode,
-      TransitionCode transitionCode, long openSeqNum) throws IOException {
-    switch (transitionCode) {
-      case OPENED:
-        if (openSeqNum < regionNode.getOpenSeqNum()) {
-          LOG.warn(
-            "Received report {} transition from {} for {}, pid={} but the new openSeqNum {}" +
-              " is less than the current one {}, ignoring...",
-            transitionCode, targetServer, regionNode, getProcId(), openSeqNum,
-            regionNode.getOpenSeqNum());
-        } else {
-          regionNode.setOpenSeqNum(openSeqNum);
-        }
-        env.getAssignmentManager().regionOpened(regionNode);
-        break;
-      case FAILED_OPEN:
-        env.getAssignmentManager().regionFailedOpen(regionNode, false);
-        break;
-      default:
-        throw new UnexpectedStateException("Unexpected transition code: " + transitionCode);
+  protected void updateTransitionWithoutPersistingToMeta(MasterProcedureEnv env,
+      RegionStateNode regionNode, TransitionCode transitionCode, long openSeqNum)
+      throws IOException {
+    if (transitionCode == TransitionCode.OPENED) {
+      regionOpenedWithoutPersistingToMeta(env.getAssignmentManager(), regionNode, transitionCode,
+        openSeqNum);
+    } else {
+      assert transitionCode == TransitionCode.FAILED_OPEN;
+      // will not persist to meta if giveUp is false
+      env.getAssignmentManager().regionFailedOpen(regionNode, false);
     }
+  }
 
+  @Override
+  protected void restoreSucceedState(AssignmentManager am, RegionStateNode regionNode,
+      long openSeqNum) throws IOException {
+    if (regionNode.getState() == State.OPEN) {
+      // should have already been persisted, ignore
+      return;
+    }
+    regionOpenedWithoutPersistingToMeta(am, regionNode, TransitionCode.OPENED, openSeqNum);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
index 9377d89..fa00958 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
@@ -153,9 +153,13 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
     return false;
   }
 
-  // do some checks to see if the report is valid, without actually updating meta.
-  protected abstract void reportTransition(RegionStateNode regionNode,
-      TransitionCode transitionCode, long seqId) throws IOException;
+  // do some checks to see if the report is valid
+  protected abstract void checkTransition(RegionStateNode regionNode, TransitionCode transitionCode,
+      long seqId) throws UnexpectedStateException;
+
+  // change the in memory state of the regionNode, but do not update meta.
+  protected abstract void updateTransitionWithoutPersistingToMeta(MasterProcedureEnv env,
+      RegionStateNode regionNode, TransitionCode transitionCode, long seqId) throws IOException;
 
   // A bit strange but the procedure store will throw RuntimeException if we can not persist the
   // state, so upper layer should take care of this...
@@ -175,7 +179,7 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
       throw new UnexpectedStateException("Received report from " + serverName + ", expected " +
         targetServer + ", " + regionNode + ", proc=" + this);
     }
-    reportTransition(regionNode, transitionCode, seqId);
+    checkTransition(regionNode, transitionCode, seqId);
     // this state means we have received the report from RS, does not mean the result is fine, as we
     // may received a FAILED_OPEN.
     this.state = RegionRemoteProcedureBaseState.REGION_REMOTE_PROCEDURE_REPORT_SUCCEED;
@@ -196,13 +200,24 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
         this.seqId = HConstants.NO_SEQNUM;
       }
     }
+    try {
+      updateTransitionWithoutPersistingToMeta(env, regionNode, transitionCode, seqId);
+    } catch (IOException e) {
+      throw new AssertionError("should not happen", e);
+    }
   }
 
   void serverCrashed(MasterProcedureEnv env, RegionStateNode regionNode, ServerName serverName) {
-    if (state != RegionRemoteProcedureBaseState.REGION_REMOTE_PROCEDURE_DISPATCH) {
+    if (state == RegionRemoteProcedureBaseState.REGION_REMOTE_PROCEDURE_SERVER_CRASH) {
       // should be a retry
       return;
     }
+    RegionRemoteProcedureBaseState oldState = state;
+    // it is possible that the state is in REGION_REMOTE_PROCEDURE_SERVER_CRASH, think of this
+    // sequence
+    // 1. region is open on the target server and the above reportTransition call is succeeded
+    // 2. before we are woken up and update the meta, the target server crashes, and then we arrive
+    // here
     this.state = RegionRemoteProcedureBaseState.REGION_REMOTE_PROCEDURE_SERVER_CRASH;
     boolean succ = false;
     try {
@@ -210,7 +225,21 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
       succ = true;
     } finally {
       if (!succ) {
-        this.state = RegionRemoteProcedureBaseState.REGION_REMOTE_PROCEDURE_DISPATCH;
+        this.state = oldState;
+      }
+    }
+  }
+
+  protected abstract void restoreSucceedState(AssignmentManager am, RegionStateNode regionNode,
+      long seqId) throws IOException;
+
+  void stateLoaded(AssignmentManager am, RegionStateNode regionNode) {
+    if (state == RegionRemoteProcedureBaseState.REGION_REMOTE_PROCEDURE_REPORT_SUCCEED) {
+      try {
+        restoreSucceedState(am, regionNode, seqId);
+      } catch (IOException e) {
+        // should not happen as we are just restoring the state
+        throw new AssertionError(e);
       }
     }
   }
@@ -224,10 +253,6 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
     getParent(env).unattachRemoteProc(this);
   }
 
-  // actually update the state to meta
-  protected abstract void updateTransition(MasterProcedureEnv env, RegionStateNode regionNode,
-      TransitionCode transitionCode, long seqId) throws IOException;
-
   @Override
   protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
       throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
@@ -254,7 +279,7 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
           throw new ProcedureSuspendedException();
         }
         case REGION_REMOTE_PROCEDURE_REPORT_SUCCEED:
-          updateTransition(env, regionNode, transitionCode, seqId);
+          env.getAssignmentManager().persistToMeta(regionNode);
           unattach(env);
           return null;
         case REGION_REMOTE_PROCEDURE_DISPATCH_FAIL:
@@ -262,7 +287,7 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
           unattach(env);
           return null;
         case REGION_REMOTE_PROCEDURE_SERVER_CRASH:
-          env.getAssignmentManager().regionClosed(regionNode, false);
+          env.getAssignmentManager().regionClosedAbnormally(regionNode);
           unattach(env);
           return null;
         default:
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
index 9e017fd..6bf5dc2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -403,7 +403,7 @@ public class TransitRegionStateProcedure
       remoteProc.serverCrashed(env, regionNode, serverName);
     } else {
       // we are in RUNNING state, just update the region state, and we will process it later.
-      env.getAssignmentManager().regionClosed(regionNode, false);
+      env.getAssignmentManager().regionClosedAbnormally(regionNode);
     }
   }
 
@@ -416,6 +416,15 @@ public class TransitRegionStateProcedure
     this.remoteProc = null;
   }
 
+  // will be called after we finish loading the meta entry for this region.
+  // used to change the state of the region node if we have a sub procedure, as we may not persist
+  // the state to meta yet. See the code in RegionRemoteProcedureBase.execute for more details.
+  void stateLoaded(AssignmentManager am, RegionStateNode regionNode) {
+    if (remoteProc != null) {
+      remoteProc.stateLoaded(am, regionNode);
+    }
+  }
+
   @Override
   protected void rollbackState(MasterProcedureEnv env, RegionStateTransitionState state)
       throws IOException, InterruptedException {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java
new file mode 100644
index 0000000..cbbdbdc
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Future;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.PleaseHoldException;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.StartMiniClusterOption;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.IdLock;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
+
+/**
+ * Testcase for HBASE-22365.
+ */
+@Category({ MasterTests.class, MediumTests.class })
+public class TestSCPGetRegionsRace {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestSCPGetRegionsRace.class);
+
+  private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>();
+
+  private static final class ServerManagerForTest extends ServerManager {
+
+    public ServerManagerForTest(MasterServices master) {
+      super(master);
+    }
+
+    @Override
+    public List<ServerName> createDestinationServersList() {
+      return super.createDestinationServersList(EXCLUDE_SERVERS);
+    }
+  }
+
+  private static CountDownLatch ARRIVE_REPORT;
+
+  private static CountDownLatch RESUME_REPORT;
+
+  private static CountDownLatch ARRIVE_GET;
+
+  private static CountDownLatch RESUME_GET;
+
+  private static final class AssignmentManagerForTest extends AssignmentManager {
+
+    public AssignmentManagerForTest(MasterServices master) {
+      super(master);
+    }
+
+    @Override
+    public ReportRegionStateTransitionResponse reportRegionStateTransition(
+        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
+      if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) {
+        if (ARRIVE_REPORT != null) {
+          ARRIVE_REPORT.countDown();
+          try {
+            RESUME_REPORT.await();
+            RESUME_REPORT = null;
+          } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      }
+      return super.reportRegionStateTransition(req);
+    }
+
+    @Override
+    public List<RegionInfo> getRegionsOnServer(ServerName serverName) {
+      List<RegionInfo> regions = super.getRegionsOnServer(serverName);
+      if (ARRIVE_GET != null) {
+        ARRIVE_GET.countDown();
+        try {
+          RESUME_GET.await();
+          RESUME_GET = null;
+        } catch (InterruptedException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return regions;
+    }
+
+  }
+
+  public static final class HMasterForTest extends HMaster {
+
+    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+      super(conf);
+    }
+
+    @Override
+    protected AssignmentManager createAssignmentManager(MasterServices master) {
+      return new AssignmentManagerForTest(master);
+    }
+
+    @Override
+    protected ServerManager createServerManager(MasterServices master) throws IOException {
+      setupClusterConnection();
+      return new ServerManagerForTest(master);
+    }
+  }
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  private static TableName NAME = TableName.valueOf("Assign");
+
+  private static byte[] CF = Bytes.toBytes("cf");
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    UTIL.startMiniCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class)
+      .numMasters(1).numRegionServers(3).build());
+    UTIL.createTable(NAME, CF);
+    UTIL.waitTableAvailable(NAME);
+    UTIL.getAdmin().balancerSwitch(false, true);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void test() throws Exception {
+    RegionInfo region =
+      Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo();
+    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
+    AssignmentManager am = master.getAssignmentManager();
+    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
+    ServerName source = rsn.getRegionLocation();
+    ServerName dest =
+      UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get();
+
+    ARRIVE_REPORT = new CountDownLatch(1);
+    RESUME_REPORT = new CountDownLatch(1);
+
+    Future<?> future = am.moveAsync(new RegionPlan(region, source, dest));
+
+    ARRIVE_REPORT.await();
+    ARRIVE_REPORT = null;
+    // let's get procedure lock to stop the TRSP
+    IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock();
+    long procId = master.getProcedures().stream()
+      .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId();
+    IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId);
+    RESUME_REPORT.countDown();
+
+    // kill the source region server
+    ARRIVE_GET = new CountDownLatch(1);
+    RESUME_GET = new CountDownLatch(1);
+    UTIL.getMiniHBaseCluster().killRegionServer(source);
+
+    // wait until we try to get the region list of the region server
+    ARRIVE_GET.await();
+    ARRIVE_GET = null;
+    // release the procedure lock and let the TRSP to finish
+    procExecutionLock.releaseLockEntry(lockEntry);
+    future.get();
+
+    // resume the SCP
+    EXCLUDE_SERVERS.add(dest);
+    RESUME_GET.countDown();
+    // wait until there are no SCPs and TRSPs
+    UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished() ||
+      (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure))));
+
+    // assert the region is only on the dest server.
+    HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest);
+    assertNotNull(rs.getRegion(region.getEncodedName()));
+    assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName()));
+  }
+}