You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/11/13 03:34:07 UTC

hbase git commit: HBASE-21463 The checkOnlineRegionsReport can accidentally complete a TRSP

Repository: hbase
Updated Branches:
  refs/heads/master f77008112 -> 55fa8f4b3


HBASE-21463 The checkOnlineRegionsReport can accidentally complete a TRSP


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/55fa8f4b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/55fa8f4b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/55fa8f4b

Branch: refs/heads/master
Commit: 55fa8f4b3359e00009e8739ecb974e475c2f1ecc
Parents: f770081
Author: Duo Zhang <zh...@apache.org>
Authored: Mon Nov 12 11:47:53 2018 +0800
Committer: Duo Zhang <zh...@apache.org>
Committed: Tue Nov 13 11:31:03 2018 +0800

----------------------------------------------------------------------
 .../hbase/procedure2/ProcedureExecutor.java     |   5 +
 .../src/main/protobuf/MasterProcedure.proto     |   3 +-
 .../org/apache/hadoop/hbase/master/HMaster.java |   2 +-
 .../master/assignment/AssignmentManager.java    | 162 ++++++----------
 .../master/procedure/EnableTableProcedure.java  |  37 ++--
 .../master/assignment/MockMasterServices.java   |  12 +-
 .../assignment/TestReportOnlineRegionsRace.java | 188 +++++++++++++++++++
 .../procedure/TestEnableTableProcedure.java     |  47 ++---
 .../TestMasterFailoverWithProcedures.java       |   2 +-
 9 files changed, 291 insertions(+), 167 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
index c18ca32..cbdb9b8 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
@@ -1915,6 +1915,11 @@ public class ProcedureExecutor<TEnvironment> {
     return completed.size();
   }
 
+  @VisibleForTesting
+  public IdLock getProcExecutionLock() {
+    return procExecutionLock;
+  }
+
   // ==========================================================================
   //  Worker Thread
   // ==========================================================================

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
index 7a1c1d3..073e444 100644
--- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
@@ -161,7 +161,8 @@ enum EnableTableState {
 message EnableTableStateData {
   required UserInformation user_info = 1;
   required TableName table_name = 2;
-  required bool skip_table_state_check = 3;
+  // not used any more, always false
+  required bool skip_table_state_check = 3[deprecated=true];
 }
 
 enum DisableTableState {

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index adf1dcd..df744b6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -2570,7 +2570,7 @@ public class HMaster extends HRegionServer implements MasterServices {
         // Note: if the procedure throws exception, we will catch it and rethrow.
         final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createLatch();
         submitProcedure(new EnableTableProcedure(procedureExecutor.getEnvironment(),
-            tableName, false, prepareLatch));
+            tableName, prepareLatch));
         prepareLatch.await();
 
         getMaster().getMasterCoprocessorHost().postEnableTable(tableName);

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 765ab6b..37e5f0c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.PleaseHoldException;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.UnknownRegionException;
-import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
@@ -162,7 +161,8 @@ public class AssignmentManager implements ServerListener {
     this(master, new RegionStateStore(master));
   }
 
-  public AssignmentManager(final MasterServices master, final RegionStateStore stateStore) {
+  @VisibleForTesting
+  AssignmentManager(final MasterServices master, final RegionStateStore stateStore) {
     this.master = master;
     this.regionStateStore = stateStore;
     this.metrics = new MetricsAssignmentManager();
@@ -979,23 +979,26 @@ public class AssignmentManager implements ServerListener {
   //  RS Status update (report online regions) helpers
   // ============================================================================================
   /**
-   * the master will call this method when the RS send the regionServerReport().
-   * the report will contains the "online regions".
-   * this method will check the the online regions against the in-memory state of the AM,
-   * if there is a mismatch we will try to fence out the RS with the assumption
-   * that something went wrong on the RS side.
+   * The master will call this method when the RS send the regionServerReport(). The report will
+   * contains the "online regions". This method will check the the online regions against the
+   * in-memory state of the AM, and we will log a warn message if there is a mismatch. This is
+   * because that there is no fencing between the reportRegionStateTransition method and
+   * regionServerReport method, so there could be race and introduce inconsistency here, but
+   * actually there is no problem.
+   * <p/>
+   * Please see HBASE-21421 and HBASE-21463 for more details.
    */
-  public void reportOnlineRegions(final ServerName serverName, final Set<byte[]> regionNames)
-      throws YouAreDeadException {
-    if (!isRunning()) return;
+  public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) {
+    if (!isRunning()) {
+      return;
+    }
     if (LOG.isTraceEnabled()) {
-      LOG.trace("ReportOnlineRegions " + serverName + " regionCount=" + regionNames.size() +
-        ", metaLoaded=" + isMetaLoaded() + " " +
-          regionNames.stream().map(element -> Bytes.toStringBinary(element)).
-            collect(Collectors.toList()));
+      LOG.trace("ReportOnlineRegions {} regionCount={}, metaLoaded={} {}", serverName,
+        regionNames.size(), isMetaLoaded(),
+        regionNames.stream().map(Bytes::toStringBinary).collect(Collectors.toList()));
     }
 
-    final ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
+    ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
 
     synchronized (serverNode) {
       if (!serverNode.isInState(ServerState.ONLINE)) {
@@ -1003,103 +1006,58 @@ public class AssignmentManager implements ServerListener {
         return;
       }
     }
-
     if (regionNames.isEmpty()) {
       // nothing to do if we don't have regions
-      LOG.trace("no online region found on " + serverName);
-    } else if (!isMetaLoaded()) {
-      // if we are still on startup, discard the report unless is from someone holding meta
-      checkOnlineRegionsReportForMeta(serverNode, regionNames);
-    } else {
-      // The Heartbeat updates us of what regions are only. check and verify the state.
-      checkOnlineRegionsReport(serverNode, regionNames);
+      LOG.trace("no online region found on {}", serverName);
+      return;
     }
+    if (!isMetaLoaded()) {
+      // we are still on startup, skip checking
+      return;
+    }
+    // The Heartbeat tells us of what regions are on the region serve, check the state.
+    checkOnlineRegionsReport(serverNode, regionNames);
 
     // wake report event
     wakeServerReportEvent(serverNode);
   }
 
-  void checkOnlineRegionsReportForMeta(ServerStateNode serverNode, Set<byte[]> regionNames) {
-    try {
-      for (byte[] regionName : regionNames) {
-        final RegionInfo hri = getMetaRegionFromName(regionName);
-        if (hri == null) {
-          if (LOG.isTraceEnabled()) {
-            LOG.trace("Skip online report for region=" + Bytes.toStringBinary(regionName) +
-              " while meta is loading");
-          }
-          continue;
-        }
-
-        RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri);
-        LOG.info("META REPORTED: " + regionNode);
-        regionNode.lock();
-        try {
-          if (!reportTransition(regionNode, serverNode, TransitionCode.OPENED, 0)) {
-            LOG.warn("META REPORTED but no procedure found (complete?); set location=" +
-              serverNode.getServerName());
-            regionNode.setRegionLocation(serverNode.getServerName());
-          } else if (LOG.isTraceEnabled()) {
-            LOG.trace("META REPORTED: " + regionNode);
-          }
-        } finally {
-          regionNode.unlock();
-        }
+  // just check and output possible inconsistency, without actually doing anything
+  private void checkOnlineRegionsReport(ServerStateNode serverNode, Set<byte[]> regionNames) {
+    ServerName serverName = serverNode.getServerName();
+    for (byte[] regionName : regionNames) {
+      if (!isRunning()) {
+        return;
       }
-    } catch (IOException e) {
-      ServerName serverName = serverNode.getServerName();
-      LOG.warn("KILLING " + serverName + ": " + e.getMessage());
-      killRegionServer(serverNode);
-    }
-  }
-
-  void checkOnlineRegionsReport(final ServerStateNode serverNode, final Set<byte[]> regionNames) {
-    final ServerName serverName = serverNode.getServerName();
-    try {
-      for (byte[] regionName: regionNames) {
-        if (!isRunning()) {
-          return;
-        }
-        final RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName);
-        if (regionNode == null) {
-          throw new UnexpectedStateException("Not online: " + Bytes.toStringBinary(regionName));
-        }
-        regionNode.lock();
-        try {
-          if (regionNode.isInState(State.OPENING, State.OPEN)) {
-            if (!regionNode.getRegionLocation().equals(serverName)) {
-              throw new UnexpectedStateException(regionNode.toString() +
-                " reported OPEN on server=" + serverName +
-                " but state has otherwise.");
-            } else if (regionNode.isInState(State.OPENING)) {
-              try {
-                if (!reportTransition(regionNode, serverNode, TransitionCode.OPENED, 0)) {
-                  LOG.warn(regionNode.toString() + " reported OPEN on server=" + serverName +
-                    " but state has otherwise AND NO procedure is running");
-                }
-              } catch (UnexpectedStateException e) {
-                LOG.warn(regionNode.toString() + " reported unexpteced OPEN: " + e.getMessage(), e);
-              }
-            }
-          } else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) {
-            long diff = regionNode.getLastUpdate() - EnvironmentEdgeManager.currentTime();
-            if (diff > 1000/*One Second... make configurable if an issue*/) {
-              // So, we can get report that a region is CLOSED or SPLIT because a heartbeat
-              // came in at about same time as a region transition. Make sure there is some
-              // elapsed time between killing remote server.
-              throw new UnexpectedStateException(regionNode.toString() +
-                " reported an unexpected OPEN; time since last update=" + diff);
-            }
+      RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName);
+      if (regionNode == null) {
+        LOG.warn("No region state node for {}, it should already be on {}",
+          Bytes.toStringBinary(regionName), serverName);
+        continue;
+      }
+      regionNode.lock();
+      try {
+        long diff = EnvironmentEdgeManager.currentTime() - regionNode.getLastUpdate();
+        if (regionNode.isInState(State.OPENING, State.OPEN)) {
+          // This is possible as a region server has just closed a region but the region server
+          // report is generated before the closing, but arrive after the closing. Make sure there
+          // is some elapsed time so less false alarms.
+          if (!regionNode.getRegionLocation().equals(serverName) && diff > 1000) {
+            LOG.warn("{} reported OPEN on server={} but state has otherwise", regionNode,
+              serverName);
+          }
+        } else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) {
+          // So, we can get report that a region is CLOSED or SPLIT because a heartbeat
+          // came in at about same time as a region transition. Make sure there is some
+          // elapsed time so less false alarms.
+          if (diff > 1000) {
+            LOG.warn("{} reported an unexpected OPEN on {}; time since last update={}ms",
+              regionNode, serverName, diff);
           }
-        } finally {
-          regionNode.unlock();
         }
+      } finally {
+        regionNode.unlock();
       }
-    } catch (IOException e) {
-      //See HBASE-21421, we can count on reportRegionStateTransition calls
-      //We only log a warming here. It could be a network lag.
-      LOG.warn("Failed to checkOnlineRegionsReport, maybe due to network lag, "
-          + "if this message continues, be careful of double assign", e);
     }
   }
 
@@ -1905,10 +1863,6 @@ public class AssignmentManager implements ServerListener {
     wakeServerReportEvent(serverNode);
   }
 
-  private void killRegionServer(final ServerStateNode serverNode) {
-    master.getServerManager().expireServer(serverNode.getServerName());
-  }
-
   @VisibleForTesting
   MasterServices getMaster() {
     return master;

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
index 144b073..3994304 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -15,7 +15,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.hadoop.hbase.master.procedure;
 
 import java.io.IOException;
@@ -52,36 +51,30 @@ public class EnableTableProcedure
   private static final Logger LOG = LoggerFactory.getLogger(EnableTableProcedure.class);
 
   private TableName tableName;
-  private boolean skipTableStateCheck;
 
   private Boolean traceEnabled = null;
 
   public EnableTableProcedure() {
-    super();
   }
 
   /**
    * Constructor
    * @param env MasterProcedureEnv
    * @param tableName the table to operate on
-   * @param skipTableStateCheck whether to check table state
    */
-  public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
-      final boolean skipTableStateCheck) {
-    this(env, tableName, skipTableStateCheck, null);
+  public EnableTableProcedure(MasterProcedureEnv env, TableName tableName) {
+    this(env, tableName, null);
   }
 
   /**
    * Constructor
    * @param env MasterProcedureEnv
    * @param tableName the table to operate on
-   * @param skipTableStateCheck whether to check table state
    */
-  public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
-      final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
+  public EnableTableProcedure(MasterProcedureEnv env, TableName tableName,
+      ProcedurePrepareLatch syncLatch) {
     super(env, syncLatch);
     this.tableName = tableName;
-    this.skipTableStateCheck = skipTableStateCheck;
   }
 
   @Override
@@ -268,29 +261,27 @@ public class EnableTableProcedure
   }
 
   @Override
-  protected void serializeStateData(ProcedureStateSerializer serializer)
-      throws IOException {
+  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
     super.serializeStateData(serializer);
 
+    // the skipTableStateCheck is false so we still need to set it...
+    @SuppressWarnings("deprecation")
     MasterProcedureProtos.EnableTableStateData.Builder enableTableMsg =
-        MasterProcedureProtos.EnableTableStateData.newBuilder()
-            .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
-            .setTableName(ProtobufUtil.toProtoTableName(tableName))
-            .setSkipTableStateCheck(skipTableStateCheck);
+      MasterProcedureProtos.EnableTableStateData.newBuilder()
+        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
+        .setTableName(ProtobufUtil.toProtoTableName(tableName)).setSkipTableStateCheck(false);
 
     serializer.serialize(enableTableMsg.build());
   }
 
   @Override
-  protected void deserializeStateData(ProcedureStateSerializer serializer)
-      throws IOException {
+  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
     super.deserializeStateData(serializer);
 
     MasterProcedureProtos.EnableTableStateData enableTableMsg =
-        serializer.deserialize(MasterProcedureProtos.EnableTableStateData.class);
+      serializer.deserialize(MasterProcedureProtos.EnableTableStateData.class);
     setUser(MasterProcedureUtil.toUserInfo(enableTableMsg.getUserInfo()));
     tableName = ProtobufUtil.toTableName(enableTableMsg.getTableName());
-    skipTableStateCheck = enableTableMsg.getSkipTableStateCheck();
   }
 
   @Override
@@ -318,7 +309,7 @@ public class EnableTableProcedure
     if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
       setFailure("master-enable-table", new TableNotFoundException(tableName));
       canTableBeEnabled = false;
-    } else if (!skipTableStateCheck) {
+    } else {
       // There could be multiple client requests trying to disable or enable
       // the table at the same time. Ensure only the first request is honored
       // After that, no other requests can be accepted until the table reaches

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
index c0dc72c..5a1f87d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
@@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.ServerMetricsBuilder;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableDescriptors;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
 import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
@@ -78,7 +77,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActi
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActionResult;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ResultOrException;
 
-
 /**
  * A mocked master services.
  * Tries to fake it. May not always work.
@@ -126,13 +124,9 @@ public class MockMasterServices extends MockNoopMasterServices {
       @Override
       protected boolean waitServerReportEvent(ServerName serverName, Procedure proc) {
         // Make a report with current state of the server 'serverName' before we call wait..
-        SortedSet<byte []> regions = regionsToRegionServers.get(serverName);
-        try {
-          getAssignmentManager().reportOnlineRegions(serverName,
-              regions == null? new HashSet<byte []>(): regions);
-        } catch (YouAreDeadException e) {
-          throw new RuntimeException(e);
-        }
+        SortedSet<byte[]> regions = regionsToRegionServers.get(serverName);
+        getAssignmentManager().reportOnlineRegions(serverName,
+          regions == null ? new HashSet<byte[]>() : regions);
         return super.waitServerReportEvent(serverName, proc);
       }
     };

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java
new file mode 100644
index 0000000..371897b
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Future;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.PleaseHoldException;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.IdLock;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestReportOnlineRegionsRace {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestReportOnlineRegionsRace.class);
+
+  private static volatile CountDownLatch ARRIVE_RS_REPORT;
+  private static volatile CountDownLatch RESUME_RS_REPORT;
+  private static volatile CountDownLatch FINISH_RS_REPORT;
+
+  private static volatile CountDownLatch RESUME_REPORT_STATE;
+
+  private static final class AssignmentManagerForTest extends AssignmentManager {
+
+    public AssignmentManagerForTest(MasterServices master) {
+      super(master);
+    }
+
+    @Override
+    public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) {
+      if (ARRIVE_RS_REPORT != null) {
+        ARRIVE_RS_REPORT.countDown();
+        try {
+          RESUME_RS_REPORT.await();
+        } catch (InterruptedException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      super.reportOnlineRegions(serverName, regionNames);
+      if (FINISH_RS_REPORT != null) {
+        FINISH_RS_REPORT.countDown();
+      }
+    }
+
+    @Override
+    public ReportRegionStateTransitionResponse reportRegionStateTransition(
+        ReportRegionStateTransitionRequest req) throws PleaseHoldException {
+      if (RESUME_REPORT_STATE != null) {
+        try {
+          RESUME_REPORT_STATE.await();
+        } catch (InterruptedException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return super.reportRegionStateTransition(req);
+    }
+
+  }
+
+  public static final class HMasterForTest extends HMaster {
+
+    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+      super(conf);
+    }
+
+    @Override
+    protected AssignmentManager createAssignmentManager(MasterServices master) {
+      return new AssignmentManagerForTest(master);
+    }
+  }
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  private static TableName NAME = TableName.valueOf("Race");
+
+  private static byte[] CF = Bytes.toBytes("cf");
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
+    UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 1000);
+    UTIL.startMiniCluster(1);
+    UTIL.createTable(NAME, CF);
+    UTIL.waitTableAvailable(NAME);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testRace() throws Exception {
+    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
+    ProcedureExecutor<MasterProcedureEnv> procExec =
+      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
+    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
+    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
+
+    // halt a regionServerReport
+    RESUME_RS_REPORT = new CountDownLatch(1);
+    ARRIVE_RS_REPORT = new CountDownLatch(1);
+    FINISH_RS_REPORT = new CountDownLatch(1);
+
+    ARRIVE_RS_REPORT.await();
+
+    // schedule a TRSP to REOPEN the region
+    RESUME_REPORT_STATE = new CountDownLatch(1);
+    Future<byte[]> future =
+      am.moveAsync(new RegionPlan(region, rsn.getRegionLocation(), rsn.getRegionLocation()));
+    TransitRegionStateProcedure proc =
+      procExec.getProcedures().stream().filter(p -> p instanceof TransitRegionStateProcedure)
+        .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p).findAny().get();
+    IdLock procExecLock = procExec.getProcExecutionLock();
+    // a CloseRegionProcedure and then the OpenRegionProcedure we want to block
+    IdLock.Entry lockEntry = procExecLock.getLockEntry(proc.getProcId() + 2);
+    // resume the reportRegionStateTransition to finish the CloseRegionProcedure
+    RESUME_REPORT_STATE.countDown();
+    // wait until we schedule the OpenRegionProcedure
+    UTIL.waitFor(10000,
+      () -> proc.getCurrentStateId() == REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE);
+    // the region should be in OPENING state
+    assertEquals(RegionState.State.OPENING, rsn.getState());
+    // resume the region server report
+    RESUME_RS_REPORT.countDown();
+    // wait until it finishes, it will find that the region is opened on the rs
+    FINISH_RS_REPORT.await();
+    // let the OpenRegionProcedure go
+    procExecLock.releaseLockEntry(lockEntry);
+    // wait until the TRSP is done
+    future.get();
+
+    // confirm that the region can still be write, i.e, the regionServerReport method should not
+    // change the region state to OPEN
+    try (Table table = UTIL.getConnection().getTableBuilder(NAME, null).setWriteRpcTimeout(1000)
+      .setOperationTimeout(2000).build()) {
+      table.put(
+        new Put(Bytes.toBytes("key")).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes("val")));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java
index 9a244d3..a91ebc4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java
@@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Rule;
 import org.junit.Test;
@@ -37,15 +36,17 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-@Category({MasterTests.class, MediumTests.class})
+@Category({ MasterTests.class, MediumTests.class })
 public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestEnableTableProcedure.class);
+    HBaseClassTestRule.forClass(TestEnableTableProcedure.class);
 
   private static final Logger LOG = LoggerFactory.getLogger(TestEnableTableProcedure.class);
-  @Rule public TestName name = new TestName();
+
+  @Rule
+  public TestName name = new TestName();
 
   @Test
   public void testEnableTable() throws Exception {
@@ -56,15 +57,15 @@ public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
     UTIL.getAdmin().disableTable(tableName);
 
     // Enable the table
-    long procId = procExec.submitProcedure(
-      new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
+    long procId =
+      procExec.submitProcedure(new EnableTableProcedure(procExec.getEnvironment(), tableName));
     // Wait the completion
     ProcedureTestingUtility.waitProcedure(procExec, procId);
     ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
     MasterProcedureTestingUtility.validateTableIsEnabled(getMaster(), tableName);
   }
 
-  @Test(expected=TableNotDisabledException.class)
+  @Test(expected = TableNotDisabledException.class)
   public void testEnableNonDisabledTable() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
@@ -72,8 +73,8 @@ public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
     MasterProcedureTestingUtility.createTable(procExec, tableName, null, "f1", "f2");
 
     // Enable the table - expect failure
-    long procId1 = procExec.submitProcedure(
-        new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
+    long procId1 =
+      procExec.submitProcedure(new EnableTableProcedure(procExec.getEnvironment(), tableName));
     ProcedureTestingUtility.waitProcedure(procExec, procId1);
 
     Procedure<?> result = procExec.getResult(procId1);
@@ -82,19 +83,11 @@ public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
     assertTrue(
       ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
 
-    // Enable the table with skipping table state check flag (simulate recovery scenario)
-    long procId2 = procExec.submitProcedure(
-        new EnableTableProcedure(procExec.getEnvironment(), tableName, true));
-    // Wait the completion
-    ProcedureTestingUtility.waitProcedure(procExec, procId2);
-    ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
-
     // Enable the table - expect failure from ProcedurePrepareLatch
     final ProcedurePrepareLatch prepareLatch = new ProcedurePrepareLatch.CompatibilityLatch();
     procExec.submitProcedure(
-        new EnableTableProcedure(procExec.getEnvironment(), tableName, false, prepareLatch));
+      new EnableTableProcedure(procExec.getEnvironment(), tableName, prepareLatch));
     prepareLatch.await();
-    Assert.fail("Enable should throw exception through latch.");
   }
 
   @Test
@@ -102,9 +95,8 @@ public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
 
-    final byte[][] splitKeys = new byte[][] {
-      Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
-    };
+    final byte[][] splitKeys =
+      new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") };
     MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, "f1", "f2");
     UTIL.getAdmin().disableTable(tableName);
     ProcedureTestingUtility.waitNoProcedureRunning(procExec);
@@ -112,8 +104,8 @@ public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
     ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
 
     // Start the Enable procedure && kill the executor
-    long procId = procExec.submitProcedure(
-        new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
+    long procId =
+      procExec.submitProcedure(new EnableTableProcedure(procExec.getEnvironment(), tableName));
 
     // Restart the executor and execute the step twice
     MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
@@ -126,17 +118,16 @@ public class TestEnableTableProcedure extends TestTableDDLProcedureBase {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
 
-    final byte[][] splitKeys = new byte[][] {
-      Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
-    };
+    final byte[][] splitKeys =
+      new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") };
     MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys, "f1", "f2");
     UTIL.getAdmin().disableTable(tableName);
     ProcedureTestingUtility.waitNoProcedureRunning(procExec);
     ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
 
     // Start the Enable procedure && kill the executor
-    long procId = procExec.submitProcedure(
-        new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
+    long procId =
+      procExec.submitProcedure(new EnableTableProcedure(procExec.getEnvironment(), tableName));
 
     int lastStep = 3; // fail before ENABLE_TABLE_SET_ENABLING_TABLE_STATE
     MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep);

http://git-wip-us.apache.org/repos/asf/hbase/blob/55fa8f4b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
index fe05dbe..5e574b5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java
@@ -289,7 +289,7 @@ public class TestMasterFailoverWithProcedures {
 
     // Start the Delete procedure && kill the executor
     long procId = procExec.submitProcedure(
-        new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
+        new EnableTableProcedure(procExec.getEnvironment(), tableName));
     testRecoveryAndDoubleExecution(UTIL, procId, step);
 
     MasterProcedureTestingUtility.validateTableIsEnabled(