You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/10/18 03:38:08 UTC
hbase git commit: HBASE-21330 ReopenTableRegionsProcedure will enter
an infinite loop if we schedule a TRSP at the same time
Repository: hbase
Updated Branches:
refs/heads/master 3a75505cf -> 5efa5f6de
HBASE-21330 ReopenTableRegionsProcedure will enter an infinite loop if we schedule a TRSP at the same time
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5efa5f6d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5efa5f6d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5efa5f6d
Branch: refs/heads/master
Commit: 5efa5f6de4388fc79919d977b58a351debcc8555
Parents: 3a75505
Author: Duo Zhang <zh...@apache.org>
Authored: Wed Oct 17 18:04:24 2018 +0800
Committer: Duo Zhang <zh...@apache.org>
Committed: Thu Oct 18 11:29:12 2018 +0800
----------------------------------------------------------------------
.../hadoop/hbase/procedure2/Procedure.java | 3 +-
.../procedure/ReopenTableRegionsProcedure.java | 27 ++++--
...ReopenTableRegionsProcedureInfiniteLoop.java | 90 ++++++++++++++++++++
3 files changed, 114 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
index 0b7e0c0..191a4b0 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
@@ -322,7 +322,8 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
* @see #holdLock(Object)
* @return true if the procedure has the lock, false otherwise.
*/
- protected final boolean hasLock() {
+ @VisibleForTesting
+ public final boolean hasLock() {
return locked;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index 5b36f30..0634815 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -70,6 +70,18 @@ public class ReopenTableRegionsProcedure
return TableOperationType.REGION_EDIT;
}
+ private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
+ if (loc.getSeqNum() < 0) {
+ return false;
+ }
+ RegionStateNode regionNode =
+ env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
+ // If the region node is null, then at least in the next round we can remove this region to make
+ // progress. And the second condition is a normal one, if there are no TRSP with it then we can
+ // schedule one to make progress.
+ return regionNode == null || !regionNode.isInTransition();
+ }
+
@Override
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
@@ -85,8 +97,12 @@ public class ReopenTableRegionsProcedure
return Flow.HAS_MORE_STATE;
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
for (HRegionLocation loc : regions) {
- RegionStateNode regionNode = env.getAssignmentManager().getRegionStates()
- .getOrCreateRegionStateNode(loc.getRegion());
+ RegionStateNode regionNode =
+ env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
+ // this possible, maybe the region has already been merged or split, see HBASE-20921
+ if (regionNode == null) {
+ continue;
+ }
TransitRegionStateProcedure proc;
regionNode.lock();
try {
@@ -108,13 +124,13 @@ public class ReopenTableRegionsProcedure
if (regions.isEmpty()) {
return Flow.NO_MORE_STATE;
}
- if (regions.stream().anyMatch(l -> l.getSeqNum() >= 0)) {
+ if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
attempt = 0;
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
return Flow.HAS_MORE_STATE;
}
- // All the regions need to reopen are in OPENING state which means we can not schedule any
- // MRPs.
+ // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
+ // again.
long backoff = ProcedureUtil.getBackoffTimeMs(this.attempt++);
LOG.info(
"There are still {} region(s) which need to be reopened for table {} are in " +
@@ -138,6 +154,7 @@ public class ReopenTableRegionsProcedure
env.getProcedureScheduler().addFront(this);
return false; // 'false' means that this procedure handled the timeout
}
+
@Override
protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
throws IOException, InterruptedException {
http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java
new file mode 100644
index 0000000..870f3bf
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import java.io.IOException;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Testcase for HBASE-21330.
+ */
+@Category({ MasterTests.class, MediumTests.class })
+public class TestReopenTableRegionsProcedureInfiniteLoop {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureInfiniteLoop.class);
+
+ private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+ private static TableName TABLE_NAME = TableName.valueOf("InfiniteLoop");
+
+ private static byte[] CF = Bytes.toBytes("cf");
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
+ UTIL.startMiniCluster(1);
+ UTIL.createTable(TABLE_NAME, CF);
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testInfiniteLoop() throws IOException {
+ HMaster master = UTIL.getMiniHBaseCluster().getMaster();
+ AssignmentManager am = master.getAssignmentManager();
+ ProcedureExecutor<MasterProcedureEnv> exec = master.getMasterProcedureExecutor();
+ RegionInfo regionInfo = UTIL.getAdmin().getRegions(TABLE_NAME).get(0);
+ RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
+ long procId;
+ ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME);
+ regionNode.lock();
+ try {
+ procId = exec.submitProcedure(proc);
+ UTIL.waitFor(30000, () -> proc.hasLock());
+ TransitRegionStateProcedure trsp =
+ TransitRegionStateProcedure.reopen(exec.getEnvironment(), regionInfo);
+ regionNode.setProcedure(trsp);
+ exec.submitProcedure(trsp);
+ } finally {
+ regionNode.unlock();
+ }
+ UTIL.waitFor(60000, () -> exec.isFinished(procId));
+ }
+}