You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/10/18 03:38:08 UTC

hbase git commit: HBASE-21330 ReopenTableRegionsProcedure will enter an infinite loop if we schedule a TRSP at the same time

Repository: hbase
Updated Branches:
  refs/heads/master 3a75505cf -> 5efa5f6de


HBASE-21330 ReopenTableRegionsProcedure will enter an infinite loop if we schedule a TRSP at the same time


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5efa5f6d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5efa5f6d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5efa5f6d

Branch: refs/heads/master
Commit: 5efa5f6de4388fc79919d977b58a351debcc8555
Parents: 3a75505
Author: Duo Zhang <zh...@apache.org>
Authored: Wed Oct 17 18:04:24 2018 +0800
Committer: Duo Zhang <zh...@apache.org>
Committed: Thu Oct 18 11:29:12 2018 +0800

----------------------------------------------------------------------
 .../hadoop/hbase/procedure2/Procedure.java      |  3 +-
 .../procedure/ReopenTableRegionsProcedure.java  | 27 ++++--
 ...ReopenTableRegionsProcedureInfiniteLoop.java | 90 ++++++++++++++++++++
 3 files changed, 114 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
index 0b7e0c0..191a4b0 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
@@ -322,7 +322,8 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
    * @see #holdLock(Object)
    * @return true if the procedure has the lock, false otherwise.
    */
-  protected final boolean hasLock() {
+  @VisibleForTesting
+  public final boolean hasLock() {
     return locked;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index 5b36f30..0634815 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -70,6 +70,18 @@ public class ReopenTableRegionsProcedure
     return TableOperationType.REGION_EDIT;
   }
 
+  private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
+    if (loc.getSeqNum() < 0) {
+      return false;
+    }
+    RegionStateNode regionNode =
+      env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
+    // If the region node is null, then at least in the next round we can remove this region to make
+    // progress. And the second condition is a normal one, if there are no TRSP with it then we can
+    // schedule one to make progress.
+    return regionNode == null || !regionNode.isInTransition();
+  }
+
   @Override
   protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
       throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
@@ -85,8 +97,12 @@ public class ReopenTableRegionsProcedure
         return Flow.HAS_MORE_STATE;
       case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
         for (HRegionLocation loc : regions) {
-          RegionStateNode regionNode = env.getAssignmentManager().getRegionStates()
-            .getOrCreateRegionStateNode(loc.getRegion());
+          RegionStateNode regionNode =
+            env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
+          // this possible, maybe the region has already been merged or split, see HBASE-20921
+          if (regionNode == null) {
+            continue;
+          }
           TransitRegionStateProcedure proc;
           regionNode.lock();
           try {
@@ -108,13 +124,13 @@ public class ReopenTableRegionsProcedure
         if (regions.isEmpty()) {
           return Flow.NO_MORE_STATE;
         }
-        if (regions.stream().anyMatch(l -> l.getSeqNum() >= 0)) {
+        if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
           attempt = 0;
           setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
           return Flow.HAS_MORE_STATE;
         }
-        // All the regions need to reopen are in OPENING state which means we can not schedule any
-        // MRPs.
+        // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
+        // again.
         long backoff = ProcedureUtil.getBackoffTimeMs(this.attempt++);
         LOG.info(
           "There are still {} region(s) which need to be reopened for table {} are in " +
@@ -138,6 +154,7 @@ public class ReopenTableRegionsProcedure
     env.getProcedureScheduler().addFront(this);
     return false; // 'false' means that this procedure handled the timeout
   }
+
   @Override
   protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
       throws IOException, InterruptedException {

http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java
new file mode 100644
index 0000000..870f3bf
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import java.io.IOException;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Testcase for HBASE-21330.
+ */
+@Category({ MasterTests.class, MediumTests.class })
+public class TestReopenTableRegionsProcedureInfiniteLoop {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureInfiniteLoop.class);
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  private static TableName TABLE_NAME = TableName.valueOf("InfiniteLoop");
+
+  private static byte[] CF = Bytes.toBytes("cf");
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
+    UTIL.startMiniCluster(1);
+    UTIL.createTable(TABLE_NAME, CF);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testInfiniteLoop() throws IOException {
+    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
+    AssignmentManager am = master.getAssignmentManager();
+    ProcedureExecutor<MasterProcedureEnv> exec = master.getMasterProcedureExecutor();
+    RegionInfo regionInfo = UTIL.getAdmin().getRegions(TABLE_NAME).get(0);
+    RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
+    long procId;
+    ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME);
+    regionNode.lock();
+    try {
+      procId = exec.submitProcedure(proc);
+      UTIL.waitFor(30000, () -> proc.hasLock());
+      TransitRegionStateProcedure trsp =
+        TransitRegionStateProcedure.reopen(exec.getEnvironment(), regionInfo);
+      regionNode.setProcedure(trsp);
+      exec.submitProcedure(trsp);
+    } finally {
+      regionNode.unlock();
+    }
+    UTIL.waitFor(60000, () -> exec.isFinished(procId));
+  }
+}