You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/08/28 06:49:37 UTC
hbase git commit: HBASE-21017 Revisit the expected states for
open/close
Repository: hbase
Updated Branches:
refs/heads/master bd0435892 -> 3afe9fb7e
HBASE-21017 Revisit the expected states for open/close
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3afe9fb7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3afe9fb7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3afe9fb7
Branch: refs/heads/master
Commit: 3afe9fb7e6ebfa71187cbe131558a83fae61cecd
Parents: bd04358
Author: zhangduo <zh...@apache.org>
Authored: Tue Aug 28 06:46:00 2018 +0800
Committer: Duo Zhang <zh...@apache.org>
Committed: Tue Aug 28 14:49:22 2018 +0800
----------------------------------------------------------------------
.../master/assignment/AssignmentManager.java | 55 +++++++++++++---
.../master/assignment/CloseRegionProcedure.java | 6 ++
.../master/assignment/OpenRegionProcedure.java | 6 ++
.../assignment/RegionRemoteProcedureBase.java | 40 +++++++++---
.../hbase/master/assignment/RegionStates.java | 16 -----
.../hadoop/hbase/client/TestEnableTable.java | 68 --------------------
6 files changed, 88 insertions(+), 103 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/3afe9fb7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index a91f8e4..745703f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -552,7 +552,7 @@ public class AssignmentManager implements ServerListener {
TransitRegionStateProcedure proc;
regionNode.lock();
try {
- preTransitCheck(regionNode, RegionStates.STATES_EXPECTED_ON_OPEN);
+ preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN);
proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(), regionInfo, sn);
regionNode.setProcedure(proc);
} finally {
@@ -573,7 +573,7 @@ public class AssignmentManager implements ServerListener {
TransitRegionStateProcedure proc;
regionNode.lock();
try {
- preTransitCheck(regionNode, RegionStates.STATES_EXPECTED_ON_CLOSE);
+ preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);
proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionInfo);
regionNode.setProcedure(proc);
} finally {
@@ -591,7 +591,7 @@ public class AssignmentManager implements ServerListener {
TransitRegionStateProcedure proc;
regionNode.lock();
try {
- preTransitCheck(regionNode, RegionStates.STATES_EXPECTED_ON_CLOSE);
+ preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);
regionNode.checkOnline();
proc = TransitRegionStateProcedure.move(getProcedureEnvironment(), regionInfo, targetServer);
regionNode.setProcedure(proc);
@@ -1411,6 +1411,35 @@ public class AssignmentManager implements ServerListener {
}
// ============================================================================================
+ // Expected states on region state transition.
+ // Notice that there is expected states for transiting to OPENING state, this is because SCP.
+ // See the comments in regionOpening method for more details.
+ // ============================================================================================
+ private static final State[] STATES_EXPECTED_ON_OPEN = {
+ State.OPENING, // Normal case
+ State.OPEN // Retrying
+ };
+
+ private static final State[] STATES_EXPECTED_ON_CLOSING = {
+ State.OPEN, // Normal case
+ State.CLOSING, // Retrying
+ State.SPLITTING, // Offline the split parent
+ State.MERGING // Offline the merge parents
+ };
+
+ private static final State[] STATES_EXPECTED_ON_CLOSED = {
+ State.CLOSING, // Normal case
+ State.CLOSED // Retrying
+ };
+
+ // This is for manually scheduled region assign, can add other states later if we find out other
+ // usages
+ private static final State[] STATES_EXPECTED_ON_ASSIGN = { State.CLOSED, State.OFFLINE };
+
+ // We only allow unassign or move a region which is in OPEN state.
+ private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN };
+
+ // ============================================================================================
// Region Status update
// Should only be called in TransitRegionStateProcedure
// ============================================================================================
@@ -1432,7 +1461,10 @@ public class AssignmentManager implements ServerListener {
// should be called within the synchronized block of RegionStateNode
void regionOpening(RegionStateNode regionNode) throws IOException {
- transitStateAndUpdate(regionNode, State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN);
+ // As in SCP, for performance reason, there is no TRSP attached with this region, we will not
+ // update the region state, which means that the region could be in any state when we want to
+ // assign it after a RS crash. So here we do not pass the expectedStates parameter.
+ transitStateAndUpdate(regionNode, State.OPENING);
regionStates.addRegionToServer(regionNode);
// update the operation count metrics
metrics.incrementOperationCounter();
@@ -1468,7 +1500,7 @@ public class AssignmentManager implements ServerListener {
void regionOpened(RegionStateNode regionNode) throws IOException {
// TODO: OPENING Updates hbase:meta too... we need to do both here and there?
// That is a lot of hbase:meta writing.
- transitStateAndUpdate(regionNode, State.OPEN, RegionStates.STATES_EXPECTED_ON_OPEN);
+ transitStateAndUpdate(regionNode, State.OPEN, STATES_EXPECTED_ON_OPEN);
RegionInfo hri = regionNode.getRegionInfo();
if (isMetaRegion(hri)) {
// Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
@@ -1483,8 +1515,7 @@ public class AssignmentManager implements ServerListener {
// should be called within the synchronized block of RegionStateNode
void regionClosing(RegionStateNode regionNode) throws IOException {
- transitStateAndUpdate(regionNode, State.CLOSING, RegionStates.STATES_EXPECTED_ON_CLOSE);
- regionStateStore.updateRegionLocation(regionNode);
+ transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING);
RegionInfo hri = regionNode.getRegionInfo();
// Set meta has not initialized early. so people trying to create/edit tables will wait
@@ -1502,8 +1533,13 @@ public class AssignmentManager implements ServerListener {
void regionClosed(RegionStateNode regionNode, boolean normally) throws IOException {
RegionState.State state = regionNode.getState();
ServerName regionLocation = regionNode.getRegionLocation();
- regionNode.transitionState(normally ? State.CLOSED : State.ABNORMALLY_CLOSED,
- RegionStates.STATES_EXPECTED_ON_CLOSE);
+ if (normally) {
+ regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED);
+ } else {
+ // For SCP
+ regionNode.transitionState(State.ABNORMALLY_CLOSED);
+ }
+ regionNode.setRegionLocation(null);
boolean succ = false;
try {
regionStateStore.updateRegionLocation(regionNode);
@@ -1517,7 +1553,6 @@ public class AssignmentManager implements ServerListener {
}
if (regionLocation != null) {
regionNode.setLastHost(regionLocation);
- regionNode.setRegionLocation(null);
regionStates.removeRegionFromServer(regionLocation, regionNode);
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/3afe9fb7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
index e446e17..0d22a9c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.assignment;
import java.io.IOException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
@@ -79,4 +80,9 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase {
assignCandidate = ProtobufUtil.toServerName(data.getAssignCandidate());
}
}
+
+ @Override
+ protected boolean shouldDispatch(RegionStateNode regionNode) {
+ return !regionNode.isInState(RegionState.State.CLOSED, RegionState.State.ABNORMALLY_CLOSED);
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/3afe9fb7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
index 1a79697..125ef11 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.assignment;
import java.io.IOException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
@@ -64,4 +65,9 @@ public class OpenRegionProcedure extends RegionRemoteProcedureBase {
super.deserializeStateData(serializer);
serializer.deserialize(OpenRegionProcedureStateData.class);
}
+
+ @Override
+ protected boolean shouldDispatch(RegionStateNode regionNode) {
+ return !regionNode.isInState(RegionState.State.OPEN);
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/3afe9fb7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
index 6770e68..3e05cde 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
@@ -77,16 +77,16 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
throw new UnsupportedOperationException();
}
- private ProcedureEvent<?> getRegionEvent(MasterProcedureEnv env) {
- return env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
- .getProcedureEvent();
+ private RegionStateNode getRegionNode(MasterProcedureEnv env) {
+ return env.getAssignmentManager().getRegionStates().getRegionStateNode(region);
}
@Override
- public void remoteCallFailed(MasterProcedureEnv env, ServerName remote,
- IOException exception) {
- ProcedureEvent<?> event = getRegionEvent(env);
- synchronized (event) {
+ public void remoteCallFailed(MasterProcedureEnv env, ServerName remote, IOException exception) {
+ RegionStateNode regionNode = getRegionNode(env);
+ regionNode.lock();
+ try {
+ ProcedureEvent<?> event = regionNode.getProcedureEvent();
if (event.isReady()) {
LOG.warn(
"The procedure event of procedure {} for region {} to server {} is not suspended, " +
@@ -97,6 +97,8 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
LOG.warn("The remote operation {} for region {} to server {} failed", this, region,
targetServer, exception);
event.wake(env.getProcedureScheduler());
+ } finally {
+ regionNode.unlock();
}
}
@@ -115,6 +117,17 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
return false;
}
+ /**
+ * Check whether we still need to make the call to RS.
+ * <p/>
+ * Usually this will not happen if we do not allow assigning a already onlined region. But if we
+ * have something wrong in the RSProcedureDispatcher, where we have already sent the request to
+ * RS, but then we tell the upper layer the remote call is failed due to rpc timeout or connection
+ * closed or anything else, then this issue can still happen. So here we add a check to make it
+ * more robust.
+ */
+ protected abstract boolean shouldDispatch(RegionStateNode regionNode);
+
@Override
protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
@@ -122,8 +135,15 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
// we are done, the parent procedure will check whether we are succeeded.
return null;
}
- ProcedureEvent<?> event = getRegionEvent(env);
- synchronized (event) {
+ RegionStateNode regionNode = getRegionNode(env);
+ regionNode.lock();
+ try {
+ if (!shouldDispatch(regionNode)) {
+ return null;
+ }
+ // The code which wakes us up also needs to lock the RSN so here we do not need to synchronize
+ // on the event.
+ ProcedureEvent<?> event = regionNode.getProcedureEvent();
try {
env.getRemoteDispatcher().addOperationToNode(targetServer, this);
} catch (FailedRemoteDispatchException e) {
@@ -136,6 +156,8 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
event.suspend();
event.suspendIfNotReady(this);
throw new ProcedureSuspendedException();
+ } finally {
+ regionNode.unlock();
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/3afe9fb7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 26a6884..eeb9252 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -55,22 +55,6 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti
public class RegionStates {
private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);
- // TODO: need to be more specific, i.e, OPENING vs. OPEN, CLOSING vs. CLOSED.
- static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
- State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
- // but before complete finish of AssignProcedure. HBASE-20100.
- State.OFFLINE, State.CLOSED, State.ABNORMALLY_CLOSED, // disable/offline
- State.SPLITTING, // ServerCrashProcedure
- State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
- State.MERGED, State.SPLITTING_NEW
- };
-
- static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
- State.SPLITTING, State.MERGING, State.OPENING, // ServerCrashProcedure
- State.OPEN, // enabled/open
- State.CLOSING // already in-progress (retrying)
- };
-
// This comparator sorts the RegionStates by time stamp then Region name.
// Comparing by timestamp alone can lead us to discard different RegionStates that happen
// to share a timestamp.
http://git-wip-us.apache.org/repos/asf/hbase/blob/3afe9fb7/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestEnableTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestEnableTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestEnableTable.java
index 7a1bc55..4e2d6ba 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestEnableTable.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestEnableTable.java
@@ -18,33 +18,26 @@
package org.apache.hadoop.hbase.client;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Optional;
import java.util.concurrent.CountDownLatch;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.MasterObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
@@ -55,10 +48,6 @@ import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hbase.thirdparty.com.google.common.base.Predicate;
-import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
-import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
-
@Category({ MasterTests.class, MediumTests.class })
public class TestEnableTable {
@@ -85,63 +74,6 @@ public class TestEnableTable {
TEST_UTIL.shutdownMiniCluster();
}
- @Test
- public void testEnableTableWithNoRegionServers() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
- final HMaster m = cluster.getMaster();
- final Admin admin = TEST_UTIL.getAdmin();
- final HTableDescriptor desc = new HTableDescriptor(tableName);
- desc.addFamily(new HColumnDescriptor(FAMILYNAME));
- admin.createTable(desc);
- admin.disableTable(tableName);
- TEST_UTIL.waitTableDisabled(tableName.getName());
-
- admin.enableTable(tableName);
- TEST_UTIL.waitTableEnabled(tableName);
- // disable once more
- admin.disableTable(tableName);
-
- TEST_UTIL.waitUntilNoRegionsInTransition(60000);
- // now stop region servers
- JVMClusterUtil.RegionServerThread rs = cluster.getRegionServerThreads().get(0);
- rs.getRegionServer().stop("stop");
- cluster.waitForRegionServerToStop(rs.getRegionServer().getServerName(), 10000);
-
- // We used to enable the table here but AMv2 would hang waiting on a RS to check-in.
- // Revisit.
-
- JVMClusterUtil.RegionServerThread rs2 = cluster.startRegionServer();
- cluster.waitForRegionServerToStart(rs2.getRegionServer().getServerName().getHostname(),
- rs2.getRegionServer().getServerName().getPort(), 60000);
-
- LOG.debug("Now enabling table " + tableName);
- admin.enableTable(tableName);
- assertTrue(admin.isTableEnabled(tableName));
-
- List<HRegionInfo> regions = TEST_UTIL.getAdmin().getTableRegions(tableName);
- assertEquals(1, regions.size());
- for (HRegionInfo region : regions) {
- TEST_UTIL.getAdmin().assign(region.getEncodedNameAsBytes());
- }
- LOG.debug("Waiting for table assigned " + tableName);
- TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
- List<HRegionInfo> onlineRegions = admin.getOnlineRegions(
- rs2.getRegionServer().getServerName());
- ArrayList<HRegionInfo> tableRegions = filterTableRegions(tableName, onlineRegions);
- assertEquals(1, tableRegions.size());
- }
-
- private ArrayList<HRegionInfo> filterTableRegions(final TableName tableName,
- List<HRegionInfo> onlineRegions) {
- return Lists.newArrayList(Iterables.filter(onlineRegions, new Predicate<HRegionInfo>() {
- @Override
- public boolean apply(HRegionInfo input) {
- return input.getTable().equals(tableName);
- }
- }));
- }
-
/**
* We were only clearing rows that had a hregioninfo column in hbase:meta. Mangled rows that
* were missing the hregioninfo because of error were being left behind messing up any