You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/08/20 22:16:24 UTC
[3/7] hbase git commit: HBASE-20881 Introduce a region transition
procedure to handle all the state transition for a region
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 775c8c2..db7a872 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -20,18 +20,17 @@ package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.MasterWalManager;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
@@ -98,7 +97,10 @@ public class ServerCrashProcedure
* #deserializeStateData(InputStream). Do not use directly.
*/
public ServerCrashProcedure() {
- super();
+ }
+
+ public boolean isInRecoverMetaState() {
+ return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META;
}
@Override
@@ -128,15 +130,7 @@ public class ServerCrashProcedure
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
break;
case SERVER_CRASH_ASSIGN_META:
- handleRIT(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
- addChildProcedure(env.getAssignmentManager()
- .createAssignProcedure(RegionInfoBuilder.FIRST_META_REGIONINFO));
- setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
- break;
- case SERVER_CRASH_PROCESS_META:
- // not used any more but still leave it here to keep compatible as there maybe old SCP
- // which is stored in ProcedureStore which has this state.
- processMeta(env);
+ assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
break;
case SERVER_CRASH_GET_REGIONS:
@@ -144,8 +138,8 @@ public class ServerCrashProcedure
if (env.getAssignmentManager().waitMetaLoaded(this)) {
throw new ProcedureSuspendedException();
}
- this.regionsOnCrashedServer = services.getAssignmentManager().getRegionStates()
- .getServerRegionInfoSet(serverName);
+ this.regionsOnCrashedServer =
+ services.getAssignmentManager().getRegionStates().getServerRegionInfoSet(serverName);
// Where to go next? Depends on whether we should split logs at all or
// if we should do distributed log splitting.
if (!this.shouldSplitWal) {
@@ -162,26 +156,15 @@ public class ServerCrashProcedure
// If no regions to assign, skip assign and skip to the finish.
// Filter out meta regions. Those are handled elsewhere in this procedure.
// Filter changes this.regionsOnCrashedServer.
- if (filterDefaultMetaRegions(regionsOnCrashedServer)) {
+ if (filterDefaultMetaRegions()) {
if (LOG.isTraceEnabled()) {
- LOG.trace("Assigning regions " +
- RegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + this +
- "; cycles=" + getCycles());
+ LOG
+ .trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer) +
+ ", " + this + "; cycles=" + getCycles());
}
- // Handle RIT against crashed server. Will cancel any ongoing assigns/unassigns.
- // Returns list of regions we need to reassign.
- // NOTE: there is nothing to stop a dispatch happening AFTER this point. Check for the
- // condition if a dispatch RPC fails inside in AssignProcedure/UnassignProcedure.
- // AssignProcedure just keeps retrying. UnassignProcedure is more complicated. See where
- // it does the check by calling am#isLogSplittingDone.
- List<RegionInfo> toAssign = handleRIT(env, regionsOnCrashedServer);
- AssignmentManager am = env.getAssignmentManager();
- // CreateAssignProcedure will try to use the old location for the region deploy.
- addChildProcedure(am.createAssignProcedures(toAssign));
- setNextState(ServerCrashState.SERVER_CRASH_HANDLE_RIT2);
- } else {
- setNextState(ServerCrashState.SERVER_CRASH_FINISH);
+ assignRegions(env, regionsOnCrashedServer);
}
+ setNextState(ServerCrashState.SERVER_CRASH_FINISH);
break;
case SERVER_CRASH_HANDLE_RIT2:
// Noop. Left in place because we used to call handleRIT here for a second time
@@ -201,28 +184,16 @@ public class ServerCrashProcedure
return Flow.HAS_MORE_STATE;
}
- private void processMeta(final MasterProcedureEnv env) throws IOException {
- LOG.debug("{}; processing hbase:meta", this);
-
- // Assign meta if still carrying it. Check again: region may be assigned because of RIT timeout
- final AssignmentManager am = env.getMasterServices().getAssignmentManager();
- for (RegionInfo hri: am.getRegionStates().getServerRegionInfoSet(serverName)) {
- if (!isDefaultMetaRegion(hri)) {
- continue;
- }
- addChildProcedure(new RecoverMetaProcedure(serverName, this.shouldSplitWal));
+ private boolean filterDefaultMetaRegions() {
+ if (regionsOnCrashedServer == null) {
+ return false;
}
+ regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion);
+ return !regionsOnCrashedServer.isEmpty();
}
- private boolean filterDefaultMetaRegions(final List<RegionInfo> regions) {
- if (regions == null) return false;
- regions.removeIf(this::isDefaultMetaRegion);
- return !regions.isEmpty();
- }
-
- private boolean isDefaultMetaRegion(final RegionInfo hri) {
- return hri.getTable().equals(TableName.META_TABLE_NAME) &&
- RegionReplicaUtil.isDefaultReplica(hri);
+ private boolean isDefaultMetaRegion(RegionInfo hri) {
+ return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri);
}
private void splitMetaLogs(MasterProcedureEnv env) throws IOException {
@@ -372,54 +343,37 @@ public class ServerCrashProcedure
}
/**
- * Handle any outstanding RIT that are up against this.serverName, the crashed server.
- * Notify them of crash. Remove assign entries from the passed in <code>regions</code>
- * otherwise we have two assigns going on and they will fight over who has lock.
- * Notify Unassigns. If unable to unassign because server went away, unassigns block waiting
- * on the below callback from a ServerCrashProcedure before proceeding.
- * @param regions Regions on the Crashed Server.
- * @return List of regions we should assign to new homes (not same as regions on crashed server).
+ * Assign the regions on the crashed RS to other Rses.
+ * <p/>
+ * In this method we will go through all the RegionStateNodes of the give regions to find out
+ * whether there is already an TRSP for the region, if so we interrupt it and let it retry on
+ * other server, otherwise we will schedule a TRSP to bring the region online.
+ * <p/>
+ * We will also check whether the table for a region is enabled, if not, we will skip assigning
+ * it.
*/
- private List<RegionInfo> handleRIT(final MasterProcedureEnv env, List<RegionInfo> regions) {
- if (regions == null || regions.isEmpty()) {
- return Collections.emptyList();
- }
+ private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException {
AssignmentManager am = env.getMasterServices().getAssignmentManager();
- List<RegionInfo> toAssign = new ArrayList<RegionInfo>(regions);
- // Get an iterator so can remove items.
- final Iterator<RegionInfo> it = toAssign.iterator();
- ServerCrashException sce = null;
- while (it.hasNext()) {
- final RegionInfo hri = it.next();
- RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(hri);
- if (rtp == null) {
- continue;
- }
- // Make sure the RIT is against this crashed server. In the case where there are many
- // processings of a crashed server -- backed up for whatever reason (slow WAL split) --
- // then a previous SCP may have already failed an assign, etc., and it may have a new
- // location target; DO NOT fail these else we make for assign flux.
- ServerName rtpServerName = rtp.getServer(env);
- if (rtpServerName == null) {
- LOG.warn("RIT with ServerName null! " + rtp);
- continue;
- }
- if (!rtpServerName.equals(this.serverName)) continue;
- LOG.info("pid=" + getProcId() + " found RIT " + rtp + "; " +
- rtp.getRegionState(env).toShortString());
- // Notify RIT on server crash.
- if (sce == null) {
- sce = new ServerCrashException(getProcId(), getServerName());
+ for (RegionInfo region : regions) {
+ RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region);
+ regionNode.lock();
+ try {
+ if (regionNode.getProcedure() != null) {
+ LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode);
+ regionNode.getProcedure().serverCrashed(env, regionNode, getServerName());
+ } else {
+ if (env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(),
+ TableState.State.DISABLING, TableState.State.DISABLED)) {
+ continue;
+ }
+ TransitRegionStateProcedure proc = TransitRegionStateProcedure.assign(env, region, null);
+ regionNode.setProcedure(proc);
+ addChildProcedure(proc);
+ }
+ } finally {
+ regionNode.unlock();
}
- rtp.remoteCallFailed(env, this.serverName, sce);
- // If an assign, remove from passed-in list of regions so we subsequently do not create
- // a new assign; the exisitng assign after the call to remoteCallFailed will recalibrate
- // and assign to a server other than the crashed one; no need to create new assign.
- // If an unassign, do not return this region; the above cancel will wake up the unassign and
- // it will complete. Done.
- it.remove();
}
- return toAssign;
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
index d6c8607..52da607 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
@@ -189,7 +189,7 @@ public class TruncateTableProcedure
@Override
protected TruncateTableState getState(final int stateId) {
- return TruncateTableState.valueOf(stateId);
+ return TruncateTableState.forNumber(stateId);
}
@Override
@@ -203,6 +203,11 @@ public class TruncateTableProcedure
}
@Override
+ protected boolean holdLock(MasterProcedureEnv env) {
+ return true;
+ }
+
+ @Override
public TableName getTableName() {
return tableName;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
deleted file mode 100644
index 1f22830..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.Region;
-import org.apache.hadoop.hbase.testclassification.MasterTests;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.JVMClusterUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.ClassRule;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@Category({MasterTests.class, MediumTests.class})
-public class TestAssignmentListener {
-
- @ClassRule
- public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestAssignmentListener.class);
-
- private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentListener.class);
-
- private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
- @Rule
- public TestName name = new TestName();
-
- static class DummyListener {
- protected AtomicInteger modified = new AtomicInteger(0);
-
- public void awaitModifications(int count) throws InterruptedException {
- while (!modified.compareAndSet(count, 0)) {
- Thread.sleep(100);
- }
- }
- }
-
- static class DummyAssignmentListener extends DummyListener implements AssignmentListener {
- private AtomicInteger closeCount = new AtomicInteger(0);
- private AtomicInteger openCount = new AtomicInteger(0);
-
- public DummyAssignmentListener() {
- }
-
- @Override
- public void regionOpened(final RegionInfo regionInfo, final ServerName serverName) {
- LOG.info("Assignment open region=" + regionInfo + " server=" + serverName);
- openCount.incrementAndGet();
- modified.incrementAndGet();
- }
-
- @Override
- public void regionClosed(final RegionInfo regionInfo) {
- LOG.info("Assignment close region=" + regionInfo);
- closeCount.incrementAndGet();
- modified.incrementAndGet();
- }
-
- public void reset() {
- openCount.set(0);
- closeCount.set(0);
- }
-
- public int getLoadCount() {
- return openCount.get();
- }
-
- public int getCloseCount() {
- return closeCount.get();
- }
- }
-
- static class DummyServerListener extends DummyListener implements ServerListener {
- private AtomicInteger removedCount = new AtomicInteger(0);
- private AtomicInteger addedCount = new AtomicInteger(0);
-
- public DummyServerListener() {
- }
-
- @Override
- public void serverAdded(final ServerName serverName) {
- LOG.info("Server added " + serverName);
- addedCount.incrementAndGet();
- modified.incrementAndGet();
- }
-
- @Override
- public void serverRemoved(final ServerName serverName) {
- LOG.info("Server removed " + serverName);
- removedCount.incrementAndGet();
- modified.incrementAndGet();
- }
-
- public void reset() {
- addedCount.set(0);
- removedCount.set(0);
- }
-
- public int getAddedCount() {
- return addedCount.get();
- }
-
- public int getRemovedCount() {
- return removedCount.get();
- }
- }
-
- @BeforeClass
- public static void beforeAllTests() throws Exception {
- TEST_UTIL.startMiniCluster(2);
- }
-
- @AfterClass
- public static void afterAllTests() throws Exception {
- TEST_UTIL.shutdownMiniCluster();
- }
-
- @Test
- public void testServerListener() throws IOException, InterruptedException {
- ServerManager serverManager = TEST_UTIL.getHBaseCluster().getMaster().getServerManager();
-
- DummyServerListener listener = new DummyServerListener();
- serverManager.registerListener(listener);
- try {
- MiniHBaseCluster miniCluster = TEST_UTIL.getMiniHBaseCluster();
-
- // Start a new Region Server
- miniCluster.startRegionServer();
- listener.awaitModifications(1);
- assertEquals(1, listener.getAddedCount());
- assertEquals(0, listener.getRemovedCount());
-
- // Start another Region Server
- listener.reset();
- miniCluster.startRegionServer();
- listener.awaitModifications(1);
- assertEquals(1, listener.getAddedCount());
- assertEquals(0, listener.getRemovedCount());
-
- int nrs = miniCluster.getRegionServerThreads().size();
-
- // Stop a Region Server
- listener.reset();
- miniCluster.stopRegionServer(nrs - 1);
- listener.awaitModifications(1);
- assertEquals(0, listener.getAddedCount());
- assertEquals(1, listener.getRemovedCount());
-
- // Stop another Region Server
- listener.reset();
- miniCluster.stopRegionServer(nrs - 2);
- listener.awaitModifications(1);
- assertEquals(0, listener.getAddedCount());
- assertEquals(1, listener.getRemovedCount());
- } finally {
- serverManager.unregisterListener(listener);
- }
- }
-
- @Test
- public void testAssignmentListener() throws IOException, InterruptedException {
- AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
- Admin admin = TEST_UTIL.getAdmin();
-
- DummyAssignmentListener listener = new DummyAssignmentListener();
- am.registerListener(listener);
- try {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- final byte[] FAMILY = Bytes.toBytes("cf");
-
- // Create a new table, with a single region
- LOG.info("Create Table");
- TEST_UTIL.createTable(tableName, FAMILY);
- listener.awaitModifications(1);
- assertEquals(1, listener.getLoadCount());
- assertEquals(0, listener.getCloseCount());
-
- // Add some data
- Table table = TEST_UTIL.getConnection().getTable(tableName);
- try {
- for (int i = 0; i < 10; ++i) {
- byte[] key = Bytes.toBytes("row-" + i);
- Put put = new Put(key);
- put.addColumn(FAMILY, null, key);
- table.put(put);
- }
- } finally {
- table.close();
- }
-
- // Split the table in two
- LOG.info("Split Table");
- listener.reset();
- admin.split(tableName, Bytes.toBytes("row-3"));
- listener.awaitModifications(3);
- assertEquals(2, listener.getLoadCount()); // daughters added
- assertEquals(1, listener.getCloseCount()); // parent removed
-
- // Wait for the Regions to be mergeable
- MiniHBaseCluster miniCluster = TEST_UTIL.getMiniHBaseCluster();
- int mergeable = 0;
- while (mergeable < 2) {
- Thread.sleep(100);
- admin.majorCompact(tableName);
- mergeable = 0;
- for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) {
- for (Region region: regionThread.getRegionServer().getRegions(tableName)) {
- mergeable += ((HRegion)region).isMergeable() ? 1 : 0;
- }
- }
- }
-
- // Merge the two regions
- LOG.info("Merge Regions");
- listener.reset();
- List<RegionInfo> regions = admin.getRegions(tableName);
- assertEquals(2, regions.size());
- boolean sameServer = areAllRegionsLocatedOnSameServer(tableName);
- // If the regions are located by different server, we need to move
- // regions to same server before merging. So the expected modifications
- // will increaes to 5. (open + close)
- final int expectedModifications = sameServer ? 3 : 5;
- final int expectedLoadCount = sameServer ? 1 : 2;
- final int expectedCloseCount = sameServer ? 2 : 3;
- admin.mergeRegionsAsync(regions.get(0).getEncodedNameAsBytes(),
- regions.get(1).getEncodedNameAsBytes(), true);
- listener.awaitModifications(expectedModifications);
- assertEquals(1, admin.getRegions(tableName).size());
- assertEquals(expectedLoadCount, listener.getLoadCount()); // new merged region added
- assertEquals(expectedCloseCount, listener.getCloseCount()); // daughters removed
-
- // Delete the table
- LOG.info("Drop Table");
- listener.reset();
- TEST_UTIL.deleteTable(tableName);
- listener.awaitModifications(1);
- assertEquals(0, listener.getLoadCount());
- assertEquals(1, listener.getCloseCount());
- } finally {
- am.unregisterListener(listener);
- }
- }
-
- private boolean areAllRegionsLocatedOnSameServer(TableName TABLE_NAME) {
- MiniHBaseCluster miniCluster = TEST_UTIL.getMiniHBaseCluster();
- int serverCount = 0;
- for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) {
- if (!regionThread.getRegionServer().getRegions(TABLE_NAME).isEmpty()) {
- ++serverCount;
- }
- if (serverCount > 1) {
- return false;
- }
- }
- return serverCount == 1;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterAbortAndRSGotKilled.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterAbortAndRSGotKilled.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterAbortAndRSGotKilled.java
index 41a8001..3df4929 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterAbortAndRSGotKilled.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterAbortAndRSGotKilled.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.Optional;
import java.util.concurrent.CountDownLatch;
-
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
@@ -30,12 +29,14 @@ import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
-import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.util.Threads;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
@@ -45,15 +46,14 @@ import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
@Category({ MasterTests.class, MediumTests.class })
public class TestMasterAbortAndRSGotKilled {
- private static Logger LOG = LoggerFactory
- .getLogger(TestMasterAbortAndRSGotKilled.class.getName());
+ private static Logger LOG =
+ LoggerFactory.getLogger(TestMasterAbortAndRSGotKilled.class.getName());
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestMasterAbortAndRSGotKilled.class);
+ HBaseClassTestRule.forClass(TestMasterAbortAndRSGotKilled.class);
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
@@ -61,14 +61,12 @@ public class TestMasterAbortAndRSGotKilled {
private static CountDownLatch countDownLatch = new CountDownLatch(1);
-
-
private static byte[] CF = Bytes.toBytes("cf");
@BeforeClass
public static void setUp() throws Exception {
UTIL.getConfiguration().setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
- DelayCloseCP.class.getName());
+ DelayCloseCP.class.getName());
UTIL.startMiniCluster(3);
UTIL.getAdmin().balancerSwitch(false, true);
UTIL.createTable(TABLE_NAME, CF);
@@ -84,48 +82,44 @@ public class TestMasterAbortAndRSGotKilled {
public void test() throws Exception {
JVMClusterUtil.RegionServerThread rsThread = null;
for (JVMClusterUtil.RegionServerThread t : UTIL.getMiniHBaseCluster()
- .getRegionServerThreads()) {
+ .getRegionServerThreads()) {
if (!t.getRegionServer().getRegions(TABLE_NAME).isEmpty()) {
rsThread = t;
break;
}
}
- //find the rs and hri of the table
+ // find the rs and hri of the table
HRegionServer rs = rsThread.getRegionServer();
RegionInfo hri = rs.getRegions(TABLE_NAME).get(0).getRegionInfo();
- MoveRegionProcedure moveRegionProcedure = new MoveRegionProcedure(
- UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
- .getEnvironment(),
- new RegionPlan(hri, rs.getServerName(), rs.getServerName()), true);
- long procID = UTIL.getMiniHBaseCluster().getMaster()
- .getMasterProcedureExecutor().submitProcedure(moveRegionProcedure);
+ TransitRegionStateProcedure moveRegionProcedure = TransitRegionStateProcedure.reopen(
+ UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getEnvironment(), hri);
+ RegionStateNode regionNode = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
+ .getRegionStates().getOrCreateRegionStateNode(hri);
+ regionNode.setProcedure(moveRegionProcedure);
+ UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()
+ .submitProcedure(moveRegionProcedure);
countDownLatch.await();
UTIL.getMiniHBaseCluster().stopMaster(0);
UTIL.getMiniHBaseCluster().startMaster();
- //wait until master initialized
- UTIL.waitFor(30000,
- () -> UTIL.getMiniHBaseCluster().getMaster() != null && UTIL
- .getMiniHBaseCluster().getMaster().isInitialized());
+ // wait until master initialized
+ UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster() != null &&
+ UTIL.getMiniHBaseCluster().getMaster().isInitialized());
Assert.assertTrue("Should be 3 RS after master restart",
- UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size() == 3);
+ UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size() == 3);
}
- public static class DelayCloseCP implements RegionCoprocessor,
- RegionObserver {
- @Override
- public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
- boolean abortRequested) throws IOException {
- try {
- if (!c.getEnvironment().getRegion().getRegionInfo().getTable().isSystemTable()) {
- LOG.error("begin to sleep");
- countDownLatch.countDown();
- //Sleep here so we can stuck the RPC call
- Thread.sleep(10000);
- LOG.error("finish sleep");
- }
- } catch (Throwable t) {
+ public static class DelayCloseCP implements RegionCoprocessor, RegionObserver {
+ @Override
+ public void preClose(ObserverContext<RegionCoprocessorEnvironment> c, boolean abortRequested)
+ throws IOException {
+ if (!c.getEnvironment().getRegion().getRegionInfo().getTable().isSystemTable()) {
+ LOG.info("begin to sleep");
+ countDownLatch.countDown();
+ // Sleep here so we can stuck the RPC call
+ Threads.sleep(10000);
+ LOG.info("finish sleep");
}
}
@@ -134,5 +128,4 @@ public class TestMasterAbortAndRSGotKilled {
return Optional.of(this);
}
}
-
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
index 9608e5c..7cf794a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master;
import java.util.List;
import java.util.concurrent.CountDownLatch;
-
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
@@ -31,7 +30,7 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
-import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.testclassification.MasterTests;
@@ -103,9 +102,9 @@ public class TestMergeTableRegionsWhileRSCrash {
MergeTableRegionsProcedure mergeTableRegionsProcedure = new MergeTableRegionsProcedure(
env, regionInfos.get(0), regionInfos.get(1));
executor.submitProcedure(mergeTableRegionsProcedure);
- UTIL.waitFor(30000, () -> executor.getProcedures().stream()
- .filter(p -> p instanceof UnassignProcedure)
- .map(p -> (UnassignProcedure) p)
+ UTIL.waitFor(30000,
+ () -> executor.getProcedures().stream().filter(p -> p instanceof TransitRegionStateProcedure)
+ .map(p -> (TransitRegionStateProcedure) p)
.anyMatch(p -> TABLE_NAME.equals(p.getTableName())));
UTIL.getMiniHBaseCluster().killRegionServer(
UTIL.getMiniHBaseCluster().getRegionServer(0).getServerName());
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
index 748cd0e..88cde00 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureCarryingMetaStuck.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.client.AsyncAdmin;
import org.apache.hadoop.hbase.client.AsyncConnection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
@@ -83,9 +83,10 @@ public class TestServerCrashProcedureCarryingMetaStuck {
rs.abort("For testing!");
UTIL.waitFor(30000,
- () -> executor.getProcedures().stream().filter(p -> p instanceof AssignProcedure)
- .map(p -> (AssignProcedure) p)
- .anyMatch(p -> Bytes.equals(hri.getRegionName(), p.getRegionInfo().getRegionName())));
+ () -> executor.getProcedures().stream()
+ .filter(p -> p instanceof TransitRegionStateProcedure)
+ .map(p -> (TransitRegionStateProcedure) p)
+ .anyMatch(p -> Bytes.equals(hri.getRegionName(), p.getRegion().getRegionName())));
proc.resume();
UTIL.waitFor(30000, () -> executor.isFinished(procId));
// see whether the move region procedure can finish properly
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
index 2681657..b6dedbe 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestServerCrashProcedureStuck.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.client.AsyncAdmin;
import org.apache.hadoop.hbase.client.AsyncConnection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
@@ -92,9 +92,10 @@ public class TestServerCrashProcedureStuck {
rs.abort("For testing!");
UTIL.waitFor(30000,
- () -> executor.getProcedures().stream().filter(p -> p instanceof AssignProcedure)
- .map(p -> (AssignProcedure) p)
- .anyMatch(p -> Bytes.equals(hri.getRegionName(), p.getRegionInfo().getRegionName())));
+ () -> executor.getProcedures().stream()
+ .filter(p -> p instanceof TransitRegionStateProcedure)
+ .map(p -> (TransitRegionStateProcedure) p)
+ .anyMatch(p -> Bytes.equals(hri.getRegionName(), p.getRegion().getRegionName())));
proc.resume();
UTIL.waitFor(30000, () -> executor.isFinished(procId));
// see whether the move region procedure can finish properly
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSplitRegionWhileRSCrash.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSplitRegionWhileRSCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSplitRegionWhileRSCrash.java
index a881575..fe5d1a2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSplitRegionWhileRSCrash.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSplitRegionWhileRSCrash.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master;
import java.util.List;
import java.util.concurrent.CountDownLatch;
-
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
@@ -31,7 +30,7 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure;
-import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.testclassification.MasterTests;
@@ -103,8 +102,8 @@ public class TestSplitRegionWhileRSCrash {
executor.submitProcedure(splitProcedure);
LOG.info("SplitProcedure submitted");
UTIL.waitFor(30000, () -> executor.getProcedures().stream()
- .filter(p -> p instanceof UnassignProcedure)
- .map(p -> (UnassignProcedure) p)
+ .filter(p -> p instanceof TransitRegionStateProcedure)
+ .map(p -> (TransitRegionStateProcedure) p)
.anyMatch(p -> TABLE_NAME.equals(p.getTableName())));
UTIL.getMiniHBaseCluster().killRegionServer(
UTIL.getMiniHBaseCluster().getRegionServer(0).getServerName());
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
index c4a2f03..3206877 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
@@ -319,7 +319,7 @@ public class MockMasterServices extends MockNoopMasterServices {
}
@Override
- public void updateRegionLocation(RegionStates.RegionStateNode regionNode) throws IOException {
+ public void updateRegionLocation(RegionStateNode regionNode) throws IOException {
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMAssignWithRandExec.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMAssignWithRandExec.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMAssignWithRandExec.java
new file mode 100644
index 0000000..71e0a27
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMAssignWithRandExec.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MasterTests.class, LargeTests.class })
+public class TestAMAssignWithRandExec extends TestAssignmentManagerBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestAMAssignWithRandExec.class);
+
+ private static final Logger LOG = LoggerFactory.getLogger(TestAMAssignWithRandExec.class);
+
+ @Test
+ public void testAssignWithRandExec() throws Exception {
+ TableName tableName = TableName.valueOf("testAssignWithRandExec");
+ RegionInfo hri = createRegionInfo(tableName, 1);
+
+ rsDispatcher.setMockRsExecutor(new RandRsExecutor());
+ // Loop a bunch of times so we hit various combos of exceptions.
+ for (int i = 0; i < 10; i++) {
+ LOG.info("ROUND=" + i);
+ TransitRegionStateProcedure proc = createAssignProcedure(hri);
+ waitOnFuture(submitProcedure(proc));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMServerFailedOpen.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMServerFailedOpen.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMServerFailedOpen.java
new file mode 100644
index 0000000..b4689e5
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMServerFailedOpen.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RetriesExhaustedException;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestAMServerFailedOpen extends TestAssignmentManagerBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestAMServerFailedOpen.class);
+
+ private static final Logger LOG = LoggerFactory.getLogger(TestAMServerFailedOpen.class);
+
+ @Override
+ protected int getAssignMaxAttempts() {
+ // do not need to retry so many times as we will finally fail...
+ return 10;
+ }
+
+ @Test
+ public void testServerNotYetRunning() throws Exception {
+ testRetriesExhaustedFailure(TableName.valueOf(this.name.getMethodName()),
+ new ServerNotYetRunningRsExecutor());
+ }
+
+ private void testRetriesExhaustedFailure(final TableName tableName, final MockRSExecutor executor)
+ throws Exception {
+ RegionInfo hri = createRegionInfo(tableName, 1);
+
+ // collect AM metrics before test
+ collectAssignmentManagerMetrics();
+
+ // Test Assign operation failure
+ rsDispatcher.setMockRsExecutor(executor);
+ try {
+ waitOnFuture(submitProcedure(createAssignProcedure(hri)));
+ fail("unexpected assign completion");
+ } catch (RetriesExhaustedException e) {
+ // expected exception
+ LOG.info("expected exception from assign operation: " + e.getMessage(), e);
+ }
+
+ // Assign the region (without problems)
+ rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
+ waitOnFuture(submitProcedure(createAssignProcedure(hri)));
+
+ // TODO: Currently unassign just keeps trying until it sees a server crash.
+ // There is no count on unassign.
+ /*
+ * // Test Unassign operation failure rsDispatcher.setMockRsExecutor(executor);
+ * waitOnFuture(submitProcedure(createUnassignProcedure(hri)));
+ * assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
+ * assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
+ * assertEquals(unassignSubmittedCount + 1,
+ * unassignProcMetrics.getSubmittedCounter().getCount()); // TODO: We supposed to have 1 failed
+ * assign, 1 successful assign and a failed unassign // operation. But ProcV2 framework marks
+ * aborted unassign operation as success. Fix it! assertEquals(unassignFailedCount,
+ * unassignProcMetrics.getFailedCounter().getCount());
+ */
+ }
+
+ @Test
+ public void testIOExceptionOnAssignment() throws Exception {
+ // collect AM metrics before test
+ collectAssignmentManagerMetrics();
+
+ testFailedOpen(TableName.valueOf("testExceptionOnAssignment"),
+ new FaultyRsExecutor(new IOException("test fault")));
+
+ assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
+ assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
+ }
+
+ @Test
+ public void testDoNotRetryExceptionOnAssignment() throws Exception {
+ // collect AM metrics before test
+ collectAssignmentManagerMetrics();
+
+ testFailedOpen(TableName.valueOf("testDoNotRetryExceptionOnAssignment"),
+ new FaultyRsExecutor(new DoNotRetryIOException("test do not retry fault")));
+
+ assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
+ assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
+ }
+
+ private void testFailedOpen(final TableName tableName, final MockRSExecutor executor)
+ throws Exception {
+ final RegionInfo hri = createRegionInfo(tableName, 1);
+
+ // Test Assign operation failure
+ rsDispatcher.setMockRsExecutor(executor);
+ try {
+ waitOnFuture(submitProcedure(createAssignProcedure(hri)));
+ fail("unexpected assign completion");
+ } catch (RetriesExhaustedException e) {
+ // expected exception
+ LOG.info("REGION STATE " + am.getRegionStates().getRegionStateNode(hri));
+ LOG.info("expected exception from assign operation: " + e.getMessage(), e);
+ assertEquals(true, am.getRegionStates().getRegionState(hri).isFailedOpen());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
index 443bbab..94963a0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
@@ -18,151 +18,43 @@
package org.apache.hadoop.hbase.master.assignment;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.net.SocketTimeoutException;
-import java.util.NavigableMap;
-import java.util.Random;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.concurrent.ConcurrentSkipListSet;
-import java.util.concurrent.ExecutionException;
+
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.NotServingRegionException;
-import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
-import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
-import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionState.State;
-import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
-import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
-import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
-import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher;
-import org.apache.hadoop.hbase.procedure2.Procedure;
-import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
-import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
-import org.apache.hadoop.hbase.regionserver.RegionServerAbortedException;
-import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.ipc.RemoteException;
-import org.junit.After;
-import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Ignore;
-import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CloseRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CloseRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest.RegionOpenInfo;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse.RegionOpeningState;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
-
-@Category({MasterTests.class, LargeTests.class})
-public class TestAssignmentManager {
+@Category({ MasterTests.class, LargeTests.class })
+public class TestAssignmentManager extends TestAssignmentManagerBase {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestAssignmentManager.class);
+ HBaseClassTestRule.forClass(TestAssignmentManager.class);
private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManager.class);
- @Rule public TestName name = new TestName();
- @Rule public final ExpectedException exception = ExpectedException.none();
-
- private static final int PROC_NTHREADS = 64;
- private static final int NREGIONS = 1 * 1000;
- private static final int NSERVERS = Math.max(1, NREGIONS / 100);
-
- private HBaseTestingUtility UTIL;
- private MockRSProcedureDispatcher rsDispatcher;
- private MockMasterServices master;
- private AssignmentManager am;
- private NavigableMap<ServerName, SortedSet<byte []>> regionsToRegionServers =
- new ConcurrentSkipListMap<ServerName, SortedSet<byte []>>();
- // Simple executor to run some simple tasks.
- private ScheduledExecutorService executor;
-
- private ProcedureMetrics assignProcMetrics;
- private ProcedureMetrics unassignProcMetrics;
-
- private long assignSubmittedCount = 0;
- private long assignFailedCount = 0;
- private long unassignSubmittedCount = 0;
- private long unassignFailedCount = 0;
-
- private void setupConfiguration(Configuration conf) throws Exception {
- FSUtils.setRootDir(conf, UTIL.getDataTestDir());
- conf.setBoolean(WALProcedureStore.USE_HSYNC_CONF_KEY, false);
- conf.setInt(WALProcedureStore.SYNC_WAIT_MSEC_CONF_KEY, 10);
- conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, PROC_NTHREADS);
- conf.setInt(RSProcedureDispatcher.RS_RPC_STARTUP_WAIT_TIME_CONF_KEY, 1000);
- conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 100); // Have many so we succeed eventually.
- }
-
- @Before
- public void setUp() throws Exception {
- UTIL = new HBaseTestingUtility();
- this.executor = Executors.newSingleThreadScheduledExecutor();
- setupConfiguration(UTIL.getConfiguration());
- master = new MockMasterServices(UTIL.getConfiguration(), this.regionsToRegionServers);
- rsDispatcher = new MockRSProcedureDispatcher(master);
- master.start(NSERVERS, rsDispatcher);
- am = master.getAssignmentManager();
- assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
- unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
- setUpMeta();
- }
-
- private void setUpMeta() throws Exception {
- rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
- am.assign(RegionInfoBuilder.FIRST_META_REGIONINFO);
- am.wakeMetaLoadedEvent();
- }
-
- @After
- public void tearDown() throws Exception {
- master.stop("tearDown");
- this.executor.shutdownNow();
- }
-
- @Test (expected=NullPointerException.class)
+ @Test(expected = NullPointerException.class)
public void testWaitServerReportEventWithNullServer() throws UnexpectedStateException {
// Test what happens if we pass in null server. I'd expect it throws NPE.
- if (this.am.waitServerReportEvent(null, null)) throw new UnexpectedStateException();
+ if (this.am.waitServerReportEvent(null, null)) {
+ throw new UnexpectedStateException();
+ }
}
@Test
@@ -173,61 +65,48 @@ public class TestAssignmentManager {
testAssign(new GoodRsExecutor());
assertEquals(assignSubmittedCount + NREGIONS,
- assignProcMetrics.getSubmittedCounter().getCount());
+ assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
}
@Test
public void testAssignAndCrashBeforeResponse() throws Exception {
- final TableName tableName = TableName.valueOf("testAssignAndCrashBeforeResponse");
- final RegionInfo hri = createRegionInfo(tableName, 1);
+ TableName tableName = TableName.valueOf("testAssignAndCrashBeforeResponse");
+ RegionInfo hri = createRegionInfo(tableName, 1);
rsDispatcher.setMockRsExecutor(new HangThenRSCrashExecutor());
- AssignProcedure proc = am.createAssignProcedure(hri);
+ TransitRegionStateProcedure proc = createAssignProcedure(hri);
waitOnFuture(submitProcedure(proc));
}
@Test
public void testUnassignAndCrashBeforeResponse() throws Exception {
- final TableName tableName = TableName.valueOf("testAssignAndCrashBeforeResponse");
- final RegionInfo hri = createRegionInfo(tableName, 1);
+ TableName tableName = TableName.valueOf("testAssignAndCrashBeforeResponse");
+ RegionInfo hri = createRegionInfo(tableName, 1);
rsDispatcher.setMockRsExecutor(new HangOnCloseThenRSCrashExecutor());
for (int i = 0; i < HangOnCloseThenRSCrashExecutor.TYPES_OF_FAILURE; i++) {
- AssignProcedure assign = am.createAssignProcedure(hri);
+ TransitRegionStateProcedure assign = createAssignProcedure(hri);
waitOnFuture(submitProcedure(assign));
- UnassignProcedure unassign = am.createUnassignProcedure(hri,
- am.getRegionStates().getRegionServerOfRegion(hri), false);
+ TransitRegionStateProcedure unassign = createUnassignProcedure(hri);
waitOnFuture(submitProcedure(unassign));
}
}
+ // Disabled for now. Since HBASE-18551, this mock is insufficient.
+ @Ignore
@Test
- public void testAssignWithRandExec() throws Exception {
- final TableName tableName = TableName.valueOf("testAssignWithRandExec");
- final RegionInfo hri = createRegionInfo(tableName, 1);
-
- rsDispatcher.setMockRsExecutor(new RandRsExecutor());
- // Loop a bunch of times so we hit various combos of exceptions.
- for (int i = 0; i < 10; i++) {
- LOG.info("ROUND=" + i);
- AssignProcedure proc = am.createAssignProcedure(hri);
- waitOnFuture(submitProcedure(proc));
- }
- }
-
- @Ignore @Test // Disabled for now. Since HBASE-18551, this mock is insufficient.
public void testSocketTimeout() throws Exception {
- final TableName tableName = TableName.valueOf(this.name.getMethodName());
- final RegionInfo hri = createRegionInfo(tableName, 1);
+ TableName tableName = TableName.valueOf(this.name.getMethodName());
+ RegionInfo hri = createRegionInfo(tableName, 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3));
- waitOnFuture(submitProcedure(am.createAssignProcedure(hri)));
+ waitOnFuture(submitProcedure(createAssignProcedure(hri)));
rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 1));
// exception.expect(ServerCrashException.class);
- waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false)));
+ waitOnFuture(submitProcedure(createUnassignProcedure(hri)));
assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
@@ -235,113 +114,26 @@ public class TestAssignmentManager {
assertEquals(unassignFailedCount + 1, unassignProcMetrics.getFailedCounter().getCount());
}
- @Test
- public void testServerNotYetRunning() throws Exception {
- testRetriesExhaustedFailure(TableName.valueOf(this.name.getMethodName()),
- new ServerNotYetRunningRsExecutor());
- }
-
- private void testRetriesExhaustedFailure(final TableName tableName,
- final MockRSExecutor executor) throws Exception {
- final RegionInfo hri = createRegionInfo(tableName, 1);
-
- // collect AM metrics before test
- collectAssignmentManagerMetrics();
-
- // Test Assign operation failure
- rsDispatcher.setMockRsExecutor(executor);
- try {
- waitOnFuture(submitProcedure(am.createAssignProcedure(hri)));
- fail("unexpected assign completion");
- } catch (RetriesExhaustedException e) {
- // expected exception
- LOG.info("expected exception from assign operation: " + e.getMessage(), e);
- }
-
- // Assign the region (without problems)
- rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
- waitOnFuture(submitProcedure(am.createAssignProcedure(hri)));
-
- // TODO: Currently unassign just keeps trying until it sees a server crash.
- // There is no count on unassign.
- /*
- // Test Unassign operation failure
- rsDispatcher.setMockRsExecutor(executor);
- waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false)));
-
- assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
- assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
- assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount());
-
- // TODO: We supposed to have 1 failed assign, 1 successful assign and a failed unassign
- // operation. But ProcV2 framework marks aborted unassign operation as success. Fix it!
- assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
- */
- }
-
-
- @Test
- public void testIOExceptionOnAssignment() throws Exception {
- // collect AM metrics before test
- collectAssignmentManagerMetrics();
-
- testFailedOpen(TableName.valueOf("testExceptionOnAssignment"),
- new FaultyRsExecutor(new IOException("test fault")));
-
- assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
- assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
- }
-
- @Test
- public void testDoNotRetryExceptionOnAssignment() throws Exception {
- // collect AM metrics before test
- collectAssignmentManagerMetrics();
-
- testFailedOpen(TableName.valueOf("testDoNotRetryExceptionOnAssignment"),
- new FaultyRsExecutor(new DoNotRetryIOException("test do not retry fault")));
-
- assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
- assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
- }
-
- private void testFailedOpen(final TableName tableName,
- final MockRSExecutor executor) throws Exception {
- final RegionInfo hri = createRegionInfo(tableName, 1);
-
- // Test Assign operation failure
- rsDispatcher.setMockRsExecutor(executor);
- try {
- waitOnFuture(submitProcedure(am.createAssignProcedure(hri)));
- fail("unexpected assign completion");
- } catch (RetriesExhaustedException e) {
- // expected exception
- LOG.info("REGION STATE " + am.getRegionStates().getRegionStateNode(hri));
- LOG.info("expected exception from assign operation: " + e.getMessage(), e);
- assertEquals(true, am.getRegionStates().getRegionState(hri).isFailedOpen());
- }
- }
-
private void testAssign(final MockRSExecutor executor) throws Exception {
testAssign(executor, NREGIONS);
}
- private void testAssign(final MockRSExecutor executor, final int nregions) throws Exception {
+ private void testAssign(MockRSExecutor executor, int nRegions) throws Exception {
rsDispatcher.setMockRsExecutor(executor);
- AssignProcedure[] assignments = new AssignProcedure[nregions];
+ TransitRegionStateProcedure[] assignments = new TransitRegionStateProcedure[nRegions];
long st = System.currentTimeMillis();
bulkSubmit(assignments);
for (int i = 0; i < assignments.length; ++i) {
- ProcedureTestingUtility.waitProcedure(
- master.getMasterProcedureExecutor(), assignments[i]);
+ ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), assignments[i]);
assertTrue(assignments[i].toString(), assignments[i].isSuccess());
}
long et = System.currentTimeMillis();
float sec = ((et - st) / 1000.0f);
- LOG.info(String.format("[T] Assigning %dprocs in %s (%.2fproc/sec)",
- assignments.length, StringUtils.humanTimeDiff(et - st), assignments.length / sec));
+ LOG.info(String.format("[T] Assigning %dprocs in %s (%.2fproc/sec)", assignments.length,
+ StringUtils.humanTimeDiff(et - st), assignments.length / sec));
}
@Test
@@ -354,7 +146,7 @@ public class TestAssignmentManager {
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
- final Future<byte[]> futureA = submitProcedure(am.createAssignProcedure(hri));
+ Future<byte[]> futureA = submitProcedure(createAssignProcedure(hri));
// wait first assign
waitOnFuture(futureA);
@@ -362,7 +154,7 @@ public class TestAssignmentManager {
// Second should be a noop. We should recognize region is already OPEN internally
// and skip out doing nothing.
// wait second assign
- final Future<byte[]> futureB = submitProcedure(am.createAssignProcedure(hri));
+ Future<byte[]> futureB = submitProcedure(createAssignProcedure(hri));
waitOnFuture(futureB);
am.getRegionStates().isRegionInState(hri, State.OPEN);
// TODO: What else can we do to ensure just a noop.
@@ -371,7 +163,6 @@ public class TestAssignmentManager {
// better way?
assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
-
}
@Test
@@ -385,18 +176,16 @@ public class TestAssignmentManager {
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
// assign the region first
- waitOnFuture(submitProcedure(am.createAssignProcedure(hri)));
+ waitOnFuture(submitProcedure(createAssignProcedure(hri)));
- final Future<byte[]> futureA = submitProcedure(am.createUnassignProcedure(hri, null, false));
+ final Future<byte[]> futureA = submitProcedure(createUnassignProcedure(hri));
// Wait first unassign.
waitOnFuture(futureA);
am.getRegionStates().isRegionInState(hri, State.CLOSED);
// Second should be a noop. We should recognize region is already CLOSED internally
// and skip out doing nothing.
- final Future<byte[]> futureB =
- submitProcedure(am.createUnassignProcedure(hri,
- ServerName.valueOf("example.org,1234,1"), false));
+ final Future<byte[]> futureB = submitProcedure(createUnassignProcedure(hri));
waitOnFuture(futureB);
// Ensure we are still CLOSED.
am.getRegionStates().isRegionInState(hri, State.CLOSED);
@@ -411,17 +200,17 @@ public class TestAssignmentManager {
}
/**
- * It is possible that when AM send assign meta request to a RS successfully,
- * but RS can not send back any response, which cause master startup hangs forever
+ * It is possible that when AM send assign meta request to a RS successfully, but RS can not send
+ * back any response, which cause master startup hangs forever
*/
@Test
public void testAssignMetaAndCrashBeforeResponse() throws Exception {
tearDown();
// See setUp(), start HBase until set up meta
- UTIL = new HBaseTestingUtility();
+ util = new HBaseTestingUtility();
this.executor = Executors.newSingleThreadScheduledExecutor();
- setupConfiguration(UTIL.getConfiguration());
- master = new MockMasterServices(UTIL.getConfiguration(), this.regionsToRegionServers);
+ setupConfiguration(util.getConfiguration());
+ master = new MockMasterServices(util.getConfiguration(), this.regionsToRegionServers);
rsDispatcher = new MockRSProcedureDispatcher(master);
master.start(NSERVERS, rsDispatcher);
am = master.getAssignmentManager();
@@ -434,418 +223,4 @@ public class TestAssignmentManager {
// set it back as default, see setUpMeta()
am.wakeMetaLoadedEvent();
}
-
- private Future<byte[]> submitProcedure(final Procedure<MasterProcedureEnv> proc) {
- return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
- }
-
- private byte[] waitOnFuture(final Future<byte[]> future) throws Exception {
- try {
- return future.get(5, TimeUnit.SECONDS);
- } catch (ExecutionException e) {
- LOG.info("ExecutionException", e);
- Exception ee = (Exception)e.getCause();
- if (ee instanceof InterruptedIOException) {
- for (Procedure<?> p: this.master.getMasterProcedureExecutor().getProcedures()) {
- LOG.info(p.toStringDetails());
- }
- }
- throw (Exception)e.getCause();
- }
- }
-
- // ============================================================================================
- // Helpers
- // ============================================================================================
- private void bulkSubmit(final AssignProcedure[] procs) throws Exception {
- final Thread[] threads = new Thread[PROC_NTHREADS];
- for (int i = 0; i < threads.length; ++i) {
- final int threadId = i;
- threads[i] = new Thread() {
- @Override
- public void run() {
- TableName tableName = TableName.valueOf("table-" + threadId);
- int n = (procs.length / threads.length);
- int start = threadId * n;
- int stop = start + n;
- for (int j = start; j < stop; ++j) {
- procs[j] = createAndSubmitAssign(tableName, j);
- }
- }
- };
- threads[i].start();
- }
- for (int i = 0; i < threads.length; ++i) {
- threads[i].join();
- }
- for (int i = procs.length - 1; i >= 0 && procs[i] == null; --i) {
- procs[i] = createAndSubmitAssign(TableName.valueOf("table-sync"), i);
- }
- }
-
- private AssignProcedure createAndSubmitAssign(TableName tableName, int regionId) {
- RegionInfo hri = createRegionInfo(tableName, regionId);
- AssignProcedure proc = am.createAssignProcedure(hri);
- master.getMasterProcedureExecutor().submitProcedure(proc);
- return proc;
- }
-
- private RegionInfo createRegionInfo(final TableName tableName, final long regionId) {
- return RegionInfoBuilder.newBuilder(tableName)
- .setStartKey(Bytes.toBytes(regionId))
- .setEndKey(Bytes.toBytes(regionId + 1))
- .setSplit(false)
- .setRegionId(0)
- .build();
- }
-
- private void sendTransitionReport(final ServerName serverName,
- final org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo regionInfo,
- final TransitionCode state) throws IOException {
- ReportRegionStateTransitionRequest.Builder req =
- ReportRegionStateTransitionRequest.newBuilder();
- req.setServer(ProtobufUtil.toServerName(serverName));
- req.addTransition(RegionStateTransition.newBuilder()
- .addRegionInfo(regionInfo)
- .setTransitionCode(state)
- .setOpenSeqNum(1)
- .build());
- am.reportRegionStateTransition(req.build());
- }
-
- private void doCrash(final ServerName serverName) {
- this.am.submitServerCrash(serverName, false/*No WALs here*/);
- }
-
- private void doRestart(final ServerName serverName) {
- try {
- this.master.restartRegionServer(serverName);
- } catch (IOException e) {
- LOG.warn("Can not restart RS with new startcode");
- }
- }
-
- private class NoopRsExecutor implements MockRSExecutor {
- @Override
- public ExecuteProceduresResponse sendRequest(ServerName server,
- ExecuteProceduresRequest request) throws IOException {
- if (request.getOpenRegionCount() > 0) {
- for (OpenRegionRequest req : request.getOpenRegionList()) {
- for (RegionOpenInfo openReq : req.getOpenInfoList()) {
- execOpenRegion(server, openReq);
- }
- }
- }
- if (request.getCloseRegionCount() > 0) {
- for (CloseRegionRequest req : request.getCloseRegionList()) {
- execCloseRegion(server, req.getRegion().getValue().toByteArray());
- }
- }
- return ExecuteProceduresResponse.newBuilder().build();
- }
-
- protected RegionOpeningState execOpenRegion(ServerName server, RegionOpenInfo regionInfo)
- throws IOException {
- return null;
- }
-
- protected CloseRegionResponse execCloseRegion(ServerName server, byte[] regionName)
- throws IOException {
- return null;
- }
- }
-
- private class GoodRsExecutor extends NoopRsExecutor {
- @Override
- protected RegionOpeningState execOpenRegion(ServerName server, RegionOpenInfo openReq)
- throws IOException {
- sendTransitionReport(server, openReq.getRegion(), TransitionCode.OPENED);
- // Concurrency?
- // Now update the state of our cluster in regionsToRegionServers.
- SortedSet<byte []> regions = regionsToRegionServers.get(server);
- if (regions == null) {
- regions = new ConcurrentSkipListSet<byte[]>(Bytes.BYTES_COMPARATOR);
- regionsToRegionServers.put(server, regions);
- }
- RegionInfo hri = ProtobufUtil.toRegionInfo(openReq.getRegion());
- if (regions.contains(hri.getRegionName())) {
- throw new UnsupportedOperationException(hri.getRegionNameAsString());
- }
- regions.add(hri.getRegionName());
- return RegionOpeningState.OPENED;
- }
-
- @Override
- protected CloseRegionResponse execCloseRegion(ServerName server, byte[] regionName)
- throws IOException {
- RegionInfo hri = am.getRegionInfo(regionName);
- sendTransitionReport(server, ProtobufUtil.toRegionInfo(hri), TransitionCode.CLOSED);
- return CloseRegionResponse.newBuilder().setClosed(true).build();
- }
- }
-
- private static class ServerNotYetRunningRsExecutor implements MockRSExecutor {
- @Override
- public ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req)
- throws IOException {
- throw new ServerNotRunningYetException("wait on server startup");
- }
- }
-
- private static class FaultyRsExecutor implements MockRSExecutor {
- private final IOException exception;
-
- public FaultyRsExecutor(final IOException exception) {
- this.exception = exception;
- }
-
- @Override
- public ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req)
- throws IOException {
- throw exception;
- }
- }
-
- private class SocketTimeoutRsExecutor extends GoodRsExecutor {
- private final int maxSocketTimeoutRetries;
- private final int maxServerRetries;
-
- private ServerName lastServer;
- private int sockTimeoutRetries;
- private int serverRetries;
-
- public SocketTimeoutRsExecutor(int maxSocketTimeoutRetries, int maxServerRetries) {
- this.maxServerRetries = maxServerRetries;
- this.maxSocketTimeoutRetries = maxSocketTimeoutRetries;
- }
-
- @Override
- public ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req)
- throws IOException {
- // SocketTimeoutException should be a temporary problem
- // unless the server will be declared dead.
- if (sockTimeoutRetries++ < maxSocketTimeoutRetries) {
- if (sockTimeoutRetries == 1) assertNotEquals(lastServer, server);
- lastServer = server;
- LOG.debug("Socket timeout for server=" + server + " retries=" + sockTimeoutRetries);
- throw new SocketTimeoutException("simulate socket timeout");
- } else if (serverRetries++ < maxServerRetries) {
- LOG.info("Mark server=" + server + " as dead. serverRetries=" + serverRetries);
- master.getServerManager().moveFromOnlineToDeadServers(server);
- sockTimeoutRetries = 0;
- throw new SocketTimeoutException("simulate socket timeout");
- } else {
- return super.sendRequest(server, req);
- }
- }
- }
-
- /**
- * Takes open request and then returns nothing so acts like a RS that went zombie.
- * No response (so proc is stuck/suspended on the Master and won't wake up.). We
- * then send in a crash for this server after a few seconds; crash is supposed to
- * take care of the suspended procedures.
- */
- private class HangThenRSCrashExecutor extends GoodRsExecutor {
- private int invocations;
-
- @Override
- protected RegionOpeningState execOpenRegion(final ServerName server, RegionOpenInfo openReq)
- throws IOException {
- if (this.invocations++ > 0) {
- // Return w/o problem the second time through here.
- return super.execOpenRegion(server, openReq);
- }
- // The procedure on master will just hang forever because nothing comes back
- // from the RS in this case.
- LOG.info("Return null response from serverName=" + server + "; means STUCK...TODO timeout");
- executor.schedule(new Runnable() {
- @Override
- public void run() {
- LOG.info("Sending in CRASH of " + server);
- doCrash(server);
- }
- }, 1, TimeUnit.SECONDS);
- return null;
- }
- }
-
- /**
- * Takes open request and then returns nothing so acts like a RS that went zombie.
- * No response (so proc is stuck/suspended on the Master and won't wake up.).
- * Different with HangThenRSCrashExecutor, HangThenRSCrashExecutor will create
- * ServerCrashProcedure to handle the server crash. However, this HangThenRSRestartExecutor
- * will restart RS directly, situation for RS crashed when SCP is not enabled.
- */
- private class HangThenRSRestartExecutor extends GoodRsExecutor {
- private int invocations;
-
- @Override
- protected RegionOpeningState execOpenRegion(final ServerName server, RegionOpenInfo openReq)
- throws IOException {
- if (this.invocations++ > 0) {
- // Return w/o problem the second time through here.
- return super.execOpenRegion(server, openReq);
- }
- // The procedure on master will just hang forever because nothing comes back
- // from the RS in this case.
- LOG.info("Return null response from serverName=" + server + "; means STUCK...TODO timeout");
- executor.schedule(new Runnable() {
- @Override
- public void run() {
- LOG.info("Restarting RS of " + server);
- doRestart(server);
- }
- }, 1, TimeUnit.SECONDS);
- return null;
- }
- }
-
- private class HangOnCloseThenRSCrashExecutor extends GoodRsExecutor {
- public static final int TYPES_OF_FAILURE = 6;
- private int invocations;
-
- @Override
- protected CloseRegionResponse execCloseRegion(ServerName server, byte[] regionName)
- throws IOException {
- switch (this.invocations++) {
- case 0: throw new NotServingRegionException("Fake");
- case 1:
- executor.schedule(new Runnable() {
- @Override
- public void run() {
- LOG.info("Sending in CRASH of " + server);
- doCrash(server);
- }
- }, 1, TimeUnit.SECONDS);
- throw new RegionServerAbortedException("Fake!");
- case 2:
- executor.schedule(new Runnable() {
- @Override
- public void run() {
- LOG.info("Sending in CRASH of " + server);
- doCrash(server);
- }
- }, 1, TimeUnit.SECONDS);
- throw new RegionServerStoppedException("Fake!");
- case 3: throw new ServerNotRunningYetException("Fake!");
- case 4:
- LOG.info("Returned null from serverName={}; means STUCK...TODO timeout", server);
- executor.schedule(new Runnable() {
- @Override
- public void run() {
- LOG.info("Sending in CRASH of " + server);
- doCrash(server);
- }
- }, 1, TimeUnit.SECONDS);
- return null;
- default:
- return super.execCloseRegion(server, regionName);
- }
- }
- }
-
- private class RandRsExecutor extends NoopRsExecutor {
- private final Random rand = new Random();
-
- @Override
- public ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req)
- throws IOException {
- switch (rand.nextInt(5)) {
- case 0: throw new ServerNotRunningYetException("wait on server startup");
- case 1: throw new SocketTimeoutException("simulate socket timeout");
- case 2: throw new RemoteException("java.io.IOException", "unexpected exception");
- default:
- // fall out
- }
- return super.sendRequest(server, req);
- }
-
- @Override
- protected RegionOpeningState execOpenRegion(final ServerName server, RegionOpenInfo openReq)
- throws IOException {
- switch (rand.nextInt(6)) {
- case 0:
- LOG.info("Return OPENED response");
- sendTransitionReport(server, openReq.getRegion(), TransitionCode.OPENED);
- return OpenRegionResponse.RegionOpeningState.OPENED;
- case 1:
- LOG.info("Return transition report that OPENED/ALREADY_OPENED response");
- sendTransitionReport(server, openReq.getRegion(), TransitionCode.OPENED);
- return OpenRegionResponse.RegionOpeningState.ALREADY_OPENED;
- case 2:
- LOG.info("Return transition report that FAILED_OPEN/FAILED_OPENING response");
- sendTransitionReport(server, openReq.getRegion(), TransitionCode.FAILED_OPEN);
- return OpenRegionResponse.RegionOpeningState.FAILED_OPENING;
- default:
- // fall out
- }
- // The procedure on master will just hang forever because nothing comes back
- // from the RS in this case.
- LOG.info("Return null as response; means proc stuck so we send in a crash report after a few seconds...");
- executor.schedule(new Runnable() {
- @Override
- public void run() {
- LOG.info("Delayed CRASHING of " + server);
- doCrash(server);
- }
- }, 5, TimeUnit.SECONDS);
- return null;
- }
-
- @Override
- protected CloseRegionResponse execCloseRegion(ServerName server, byte[] regionName)
- throws IOException {
- CloseRegionResponse.Builder resp = CloseRegionResponse.newBuilder();
- boolean closed = rand.nextBoolean();
- if (closed) {
- RegionInfo hri = am.getRegionInfo(regionName);
- sendTransitionReport(server, ProtobufUtil.toRegionInfo(hri), TransitionCode.CLOSED);
- }
- resp.setClosed(closed);
- return resp.build();
- }
- }
-
- private interface MockRSExecutor {
- ExecuteProceduresResponse sendRequest(ServerName server, ExecuteProceduresRequest req)
- throws IOException;
- }
-
- private class MockRSProcedureDispatcher extends RSProcedureDispatcher {
- private MockRSExecutor mockRsExec;
-
- public MockRSProcedureDispatcher(final MasterServices master) {
- super(master);
- }
-
- public void setMockRsExecutor(final MockRSExecutor mockRsExec) {
- this.mockRsExec = mockRsExec;
- }
-
- @Override
- protected void remoteDispatch(ServerName serverName, Set<RemoteProcedure> remoteProcedures) {
- submitTask(new MockRemoteCall(serverName, remoteProcedures));
- }
-
- private class MockRemoteCall extends ExecuteProceduresRemoteCall {
- public MockRemoteCall(final ServerName serverName,
- final Set<RemoteProcedure> operations) {
- super(serverName, operations);
- }
-
- @Override
- protected ExecuteProceduresResponse sendRequest(final ServerName serverName,
- final ExecuteProceduresRequest request) throws IOException {
- return mockRsExec.sendRequest(serverName, request);
- }
- }
- }
-
- private void collectAssignmentManagerMetrics() {
- assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
- assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
- unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
- unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
- }
}