You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/08/20 22:16:26 UTC
[5/7] hbase git commit: HBASE-20881 Introduce a region transition
procedure to handle all the state transition for a region
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
new file mode 100644
index 0000000..6770e68
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.io.IOException;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
+import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
+import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
+import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
+import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionRemoteProcedureBaseStateData;
+
+/**
+ * The base class for the remote procedures used to open/close a region.
+ * <p/>
+ * Notice that here we do not care about the result of the remote call, if the remote call is
+ * finished, either succeeded or not, we will always finish the procedure. The parent procedure
+ * should take care of the result and try to reschedule if the result is not good.
+ */
+@InterfaceAudience.Private
+public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedureEnv>
+ implements TableProcedureInterface, RemoteProcedure<MasterProcedureEnv, ServerName> {
+
+ private static final Logger LOG = LoggerFactory.getLogger(RegionRemoteProcedureBase.class);
+
+ protected RegionInfo region;
+
+ private ServerName targetServer;
+
+ private boolean dispatched;
+
+ protected RegionRemoteProcedureBase() {
+ }
+
+ protected RegionRemoteProcedureBase(RegionInfo region, ServerName targetServer) {
+ this.region = region;
+ this.targetServer = targetServer;
+ }
+
+ @Override
+ public void remoteOperationCompleted(MasterProcedureEnv env) {
+ // should not be called since we use reportRegionStateTransition to report the result
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void remoteOperationFailed(MasterProcedureEnv env, RemoteProcedureException error) {
+ // should not be called since we use reportRegionStateTransition to report the result
+ throw new UnsupportedOperationException();
+ }
+
+ private ProcedureEvent<?> getRegionEvent(MasterProcedureEnv env) {
+ return env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
+ .getProcedureEvent();
+ }
+
+ @Override
+ public void remoteCallFailed(MasterProcedureEnv env, ServerName remote,
+ IOException exception) {
+ ProcedureEvent<?> event = getRegionEvent(env);
+ synchronized (event) {
+ if (event.isReady()) {
+ LOG.warn(
+ "The procedure event of procedure {} for region {} to server {} is not suspended, " +
+ "usually this should not happen, but anyway let's skip the following wake up code, ",
+ this, region, targetServer);
+ return;
+ }
+ LOG.warn("The remote operation {} for region {} to server {} failed", this, region,
+ targetServer, exception);
+ event.wake(env.getProcedureScheduler());
+ }
+ }
+
+ @Override
+ public TableName getTableName() {
+ return region.getTable();
+ }
+
+ @Override
+ protected void rollback(MasterProcedureEnv env) throws IOException, InterruptedException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ protected boolean abort(MasterProcedureEnv env) {
+ return false;
+ }
+
+ @Override
+ protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
+ throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
+ if (dispatched) {
+ // we are done, the parent procedure will check whether we are succeeded.
+ return null;
+ }
+ ProcedureEvent<?> event = getRegionEvent(env);
+ synchronized (event) {
+ try {
+ env.getRemoteDispatcher().addOperationToNode(targetServer, this);
+ } catch (FailedRemoteDispatchException e) {
+ LOG.warn("Can not add remote operation {} for region {} to server {}, this usually " +
+ "because the server is alread dead, give up and mark the procedure as complete, " +
+ "the parent procedure will take care of this.", this, region, targetServer, e);
+ return null;
+ }
+ dispatched = true;
+ event.suspend();
+ event.suspendIfNotReady(this);
+ throw new ProcedureSuspendedException();
+ }
+ }
+
+ @Override
+ protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
+ serializer.serialize(RegionRemoteProcedureBaseStateData.newBuilder()
+ .setRegion(ProtobufUtil.toRegionInfo(region))
+ .setTargetServer(ProtobufUtil.toServerName(targetServer)).setDispatched(dispatched).build());
+ }
+
+ @Override
+ protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
+ RegionRemoteProcedureBaseStateData data =
+ serializer.deserialize(RegionRemoteProcedureBaseStateData.class);
+ region = ProtobufUtil.toRegionInfo(data.getRegion());
+ targetServer = ProtobufUtil.toServerName(data.getTargetServer());
+ dispatched = data.getDispatched();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
new file mode 100644
index 0000000..81e6f78
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
@@ -0,0 +1,313 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.util.Arrays;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionOfflineException;
+import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.RegionState.State;
+import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Current Region State. Most fields are synchronized with meta region, i.e, we will update meta
+ * immediately after we modify this RegionStateNode, and usually under the lock. The only exception
+ * is {@link #lastHost}, which should not be used for critical condition.
+ * <p/>
+ * Typically, the only way to modify this class is through {@link TransitRegionStateProcedure}, and
+ * we will record the TRSP along with this RegionStateNode to make sure that there could at most one
+ * TRSP. For other operations, such as SCP, we will first get the lock, and then try to schedule a
+ * TRSP. If there is already one, then the solution will be different:
+ * <ul>
+ * <li>For SCP, we will update the region state in meta to tell the TRSP to retry.</li>
+ * <li>For DisableTableProcedure, as we have the xlock, we can make sure that the TRSP has not been
+ * executed yet, so just unset it and attach a new one. The original one will quit immediately when
+ * executing.</li>
+ * <li>For split/merge, we will fail immediately as there is no actual operations yet so no
+ * harm.</li>
+ * <li>For EnableTableProcedure/TruncateTableProcedure, we can make sure that there will be no TRSP
+ * attached with the RSNs.</li>
+ * <li>For other procedures, you'd better use ReopenTableRegionsProcedure. The RTRP will take care
+ * of lots of corner cases when reopening regions.</li>
+ * </ul>
+ * <p/>
+ * Several fields are declared with {@code volatile}, which means you are free to get it without
+ * lock, but usually you should not use these fields without locking for critical condition, as it
+ * will be easily to introduce inconsistency. For example, you are free to dump the status and show
+ * it on web without locking, but if you want to change the state of the RegionStateNode by checking
+ * the current state, you'd better have the lock...
+ */
+@InterfaceAudience.Private
+public class RegionStateNode implements Comparable<RegionStateNode> {
+
+ private static final Logger LOG = LoggerFactory.getLogger(RegionStateNode.class);
+
+ private static final class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
+ public AssignmentProcedureEvent(final RegionInfo regionInfo) {
+ super(regionInfo);
+ }
+ }
+
+ @VisibleForTesting
+ final Lock lock = new ReentrantLock();
+ private final RegionInfo regionInfo;
+ private final ProcedureEvent<?> event;
+ private final ConcurrentMap<RegionInfo, RegionStateNode> ritMap;
+
+ // volatile only for getLastUpdate and test usage, the upper layer should sync on the
+ // RegionStateNode before accessing usually.
+ private volatile TransitRegionStateProcedure procedure = null;
+ private volatile ServerName regionLocation = null;
+ // notice that, the lastHost will only be updated when a region is successfully CLOSED through
+ // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
+ // with the data in meta.
+ private volatile ServerName lastHost = null;
+ /**
+ * A Region-in-Transition (RIT) moves through states. See {@link State} for complete list. A
+ * Region that is opened moves from OFFLINE => OPENING => OPENED.
+ */
+ private volatile State state = State.OFFLINE;
+
+ /**
+ * Updated whenever a call to {@link #setRegionLocation(ServerName)} or
+ * {@link #setState(State, State...)}.
+ */
+ private volatile long lastUpdate = 0;
+
+ private volatile long openSeqNum = HConstants.NO_SEQNUM;
+
+ RegionStateNode(RegionInfo regionInfo, ConcurrentMap<RegionInfo, RegionStateNode> ritMap) {
+ this.regionInfo = regionInfo;
+ this.event = new AssignmentProcedureEvent(regionInfo);
+ this.ritMap = ritMap;
+ }
+
+ /**
+ * @param update new region state this node should be assigned.
+ * @param expected current state should be in this given list of expected states
+ * @return true, if current state is in expected list; otherwise false.
+ */
+ public boolean setState(final State update, final State... expected) {
+ if (!isInState(expected)) {
+ return false;
+ }
+ this.state = update;
+ this.lastUpdate = EnvironmentEdgeManager.currentTime();
+ return true;
+ }
+
+ /**
+ * Put region into OFFLINE mode (set state and clear location).
+ * @return Last recorded server deploy
+ */
+ public ServerName offline() {
+ setState(State.OFFLINE);
+ return setRegionLocation(null);
+ }
+
+ /**
+ * Set new {@link State} but only if currently in <code>expected</code> State (if not, throw
+ * {@link UnexpectedStateException}.
+ */
+ public void transitionState(final State update, final State... expected)
+ throws UnexpectedStateException {
+ if (!setState(update, expected)) {
+ throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
+ " so could move to " + update + " but current state=" + getState());
+ }
+ }
+
+ public boolean isInState(final State... expected) {
+ if (expected != null && expected.length > 0) {
+ boolean expectedState = false;
+ for (int i = 0; i < expected.length; ++i) {
+ expectedState |= (getState() == expected[i]);
+ }
+ return expectedState;
+ }
+ return true;
+ }
+
+ public boolean isStuck() {
+ return isInState(State.FAILED_OPEN) && getProcedure() != null;
+ }
+
+ public boolean isInTransition() {
+ return getProcedure() != null;
+ }
+
+ public long getLastUpdate() {
+ TransitRegionStateProcedure proc = this.procedure;
+ return proc != null ? proc.getLastUpdate() : lastUpdate;
+ }
+
+ public void setLastHost(final ServerName serverName) {
+ this.lastHost = serverName;
+ }
+
+ public void setOpenSeqNum(final long seqId) {
+ this.openSeqNum = seqId;
+ }
+
+ public ServerName setRegionLocation(final ServerName serverName) {
+ ServerName lastRegionLocation = this.regionLocation;
+ if (LOG.isTraceEnabled() && serverName == null) {
+ LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
+ }
+ this.regionLocation = serverName;
+ this.lastUpdate = EnvironmentEdgeManager.currentTime();
+ return lastRegionLocation;
+ }
+
+ public void setProcedure(TransitRegionStateProcedure proc) {
+ assert this.procedure == null;
+ this.procedure = proc;
+ ritMap.put(regionInfo, this);
+ }
+
+ public void unsetProcedure(TransitRegionStateProcedure proc) {
+ assert this.procedure == proc;
+ this.procedure = null;
+ ritMap.remove(regionInfo, this);
+ }
+
+ public TransitRegionStateProcedure getProcedure() {
+ return procedure;
+ }
+
+ public ProcedureEvent<?> getProcedureEvent() {
+ return event;
+ }
+
+ public RegionInfo getRegionInfo() {
+ return regionInfo;
+ }
+
+ public TableName getTable() {
+ return getRegionInfo().getTable();
+ }
+
+ public boolean isSystemTable() {
+ return getTable().isSystemTable();
+ }
+
+ public ServerName getLastHost() {
+ return lastHost;
+ }
+
+ public ServerName getRegionLocation() {
+ return regionLocation;
+ }
+
+ public State getState() {
+ return state;
+ }
+
+ public long getOpenSeqNum() {
+ return openSeqNum;
+ }
+
+ public int getFormatVersion() {
+ // we don't have any format for now
+ // it should probably be in regionInfo.getFormatVersion()
+ return 0;
+ }
+
+ public RegionState toRegionState() {
+ return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
+ }
+
+ @Override
+ public int compareTo(final RegionStateNode other) {
+ // NOTE: RegionInfo sort by table first, so we are relying on that.
+ // we have a TestRegionState#testOrderedByTable() that check for that.
+ return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
+ }
+
+ @Override
+ public int hashCode() {
+ return getRegionInfo().hashCode();
+ }
+
+ @Override
+ public boolean equals(final Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (!(other instanceof RegionStateNode)) {
+ return false;
+ }
+ return compareTo((RegionStateNode) other) == 0;
+ }
+
+ @Override
+ public String toString() {
+ return toDescriptiveString();
+ }
+
+ public String toShortString() {
+ // rit= is the current Region-In-Transition State -- see State enum.
+ return String.format("rit=%s, location=%s", getState(), getRegionLocation());
+ }
+
+ public String toDescriptiveString() {
+ return String.format("%s, table=%s, region=%s", toShortString(), getTable(),
+ getRegionInfo().getEncodedName());
+ }
+
+ public void checkOnline() throws DoNotRetryRegionException {
+ RegionInfo ri = getRegionInfo();
+ State s = state;
+ if (s != State.OPEN) {
+ throw new DoNotRetryRegionException(ri.getEncodedName() + " is no OPEN; state=" + s);
+ }
+ if (ri.isSplitParent()) {
+ throw new DoNotRetryRegionException(
+ ri.getEncodedName() + " is not online (splitParent=true)");
+ }
+ if (ri.isSplit()) {
+ throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true");
+ }
+ if (ri.isOffline()) {
+ // RegionOfflineException is not instance of DNRIOE so wrap it.
+ throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName()));
+ }
+ }
+
+ public void lock() {
+ lock.lock();
+ }
+
+ public void unlock() {
+ lock.unlock();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
index aeef835..48ec4fb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
@@ -127,7 +127,7 @@ public class RegionStateStore {
}
}
- public void updateRegionLocation(RegionStates.RegionStateNode regionStateNode)
+ public void updateRegionLocation(RegionStateNode regionStateNode)
throws IOException {
if (regionStateNode.getRegionInfo().isMetaRegion()) {
updateMetaLocation(regionStateNode.getRegionInfo(), regionStateNode.getRegionLocation(),
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 9f01293..26a6884 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -1,5 +1,4 @@
/**
- *
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -16,11 +15,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.hadoop.hbase.master.assignment;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
@@ -28,7 +25,6 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
@@ -41,12 +37,9 @@ import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionState.State;
-import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,247 +55,22 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti
public class RegionStates {
private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);
- protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
+ // TODO: need to be more specific, i.e, OPENING vs. OPEN, CLOSING vs. CLOSED.
+ static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
// but before complete finish of AssignProcedure. HBASE-20100.
- State.OFFLINE, State.CLOSED, // disable/offline
- State.SPLITTING, State.SPLIT, // ServerCrashProcedure
+ State.OFFLINE, State.CLOSED, State.ABNORMALLY_CLOSED, // disable/offline
+ State.SPLITTING, // ServerCrashProcedure
State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
+ State.MERGED, State.SPLITTING_NEW
};
- protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
- State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure
+ static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
+ State.SPLITTING, State.MERGING, State.OPENING, // ServerCrashProcedure
State.OPEN, // enabled/open
State.CLOSING // already in-progress (retrying)
};
- private static class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
- public AssignmentProcedureEvent(final RegionInfo regionInfo) {
- super(regionInfo);
- }
- }
-
- private static class ServerReportEvent extends ProcedureEvent<ServerName> {
- public ServerReportEvent(final ServerName serverName) {
- super(serverName);
- }
- }
-
- /**
- * Current Region State.
- * In-memory only. Not persisted.
- */
- // Mutable/Immutable? Changes have to be synchronized or not?
- // Data members are volatile which seems to say multi-threaded access is fine.
- // In the below we do check and set but the check state could change before
- // we do the set because no synchronization....which seems dodgy. Clear up
- // understanding here... how many threads accessing? Do locks make it so one
- // thread at a time working on a single Region's RegionStateNode? Lets presume
- // so for now. Odd is that elsewhere in this RegionStates, we synchronize on
- // the RegionStateNode instance. TODO.
- public static class RegionStateNode implements Comparable<RegionStateNode> {
- private final RegionInfo regionInfo;
- private final ProcedureEvent<?> event;
-
- private volatile RegionTransitionProcedure procedure = null;
- private volatile ServerName regionLocation = null;
- // notice that, the lastHost will only be updated when a region is successfully CLOSED through
- // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
- // with the data in meta.
- private volatile ServerName lastHost = null;
- /**
- * A Region-in-Transition (RIT) moves through states.
- * See {@link State} for complete list. A Region that
- * is opened moves from OFFLINE => OPENING => OPENED.
- */
- private volatile State state = State.OFFLINE;
-
- /**
- * Updated whenever a call to {@link #setRegionLocation(ServerName)}
- * or {@link #setState(State, State...)}.
- */
- private volatile long lastUpdate = 0;
-
- private volatile long openSeqNum = HConstants.NO_SEQNUM;
-
- public RegionStateNode(final RegionInfo regionInfo) {
- this.regionInfo = regionInfo;
- this.event = new AssignmentProcedureEvent(regionInfo);
- }
-
- /**
- * @param update new region state this node should be assigned.
- * @param expected current state should be in this given list of expected states
- * @return true, if current state is in expected list; otherwise false.
- */
- public boolean setState(final State update, final State... expected) {
- if (!isInState(expected)) {
- return false;
- }
- this.state = update;
- this.lastUpdate = EnvironmentEdgeManager.currentTime();
- return true;
- }
-
- /**
- * Put region into OFFLINE mode (set state and clear location).
- * @return Last recorded server deploy
- */
- public ServerName offline() {
- setState(State.OFFLINE);
- return setRegionLocation(null);
- }
-
- /**
- * Set new {@link State} but only if currently in <code>expected</code> State
- * (if not, throw {@link UnexpectedStateException}.
- */
- public void transitionState(final State update, final State... expected)
- throws UnexpectedStateException {
- if (!setState(update, expected)) {
- throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
- " so could move to " + update + " but current state=" + getState());
- }
- }
-
- public boolean isInState(final State... expected) {
- if (expected != null && expected.length > 0) {
- boolean expectedState = false;
- for (int i = 0; i < expected.length; ++i) {
- expectedState |= (getState() == expected[i]);
- }
- return expectedState;
- }
- return true;
- }
-
- public boolean isStuck() {
- return isInState(State.FAILED_OPEN) && getProcedure() != null;
- }
-
- public boolean isInTransition() {
- return getProcedure() != null;
- }
-
- public long getLastUpdate() {
- return procedure != null ? procedure.getLastUpdate() : lastUpdate;
- }
-
- public void setLastHost(final ServerName serverName) {
- this.lastHost = serverName;
- }
-
- public void setOpenSeqNum(final long seqId) {
- this.openSeqNum = seqId;
- }
-
- public ServerName setRegionLocation(final ServerName serverName) {
- ServerName lastRegionLocation = this.regionLocation;
- if (LOG.isTraceEnabled() && serverName == null) {
- LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
- }
- this.regionLocation = serverName;
- this.lastUpdate = EnvironmentEdgeManager.currentTime();
- return lastRegionLocation;
- }
-
- public boolean setProcedure(final RegionTransitionProcedure proc) {
- if (this.procedure != null && this.procedure != proc) {
- return false;
- }
- this.procedure = proc;
- return true;
- }
-
- public boolean unsetProcedure(final RegionTransitionProcedure proc) {
- if (this.procedure != null && this.procedure != proc) {
- return false;
- }
- this.procedure = null;
- return true;
- }
-
- public RegionTransitionProcedure getProcedure() {
- return procedure;
- }
-
- public ProcedureEvent<?> getProcedureEvent() {
- return event;
- }
-
- public RegionInfo getRegionInfo() {
- return regionInfo;
- }
-
- public TableName getTable() {
- return getRegionInfo().getTable();
- }
-
- public boolean isSystemTable() {
- return getTable().isSystemTable();
- }
-
- public ServerName getLastHost() {
- return lastHost;
- }
-
- public ServerName getRegionLocation() {
- return regionLocation;
- }
-
- public State getState() {
- return state;
- }
-
- public long getOpenSeqNum() {
- return openSeqNum;
- }
-
- public int getFormatVersion() {
- // we don't have any format for now
- // it should probably be in regionInfo.getFormatVersion()
- return 0;
- }
-
- public RegionState toRegionState() {
- return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
- }
-
- @Override
- public int compareTo(final RegionStateNode other) {
- // NOTE: RegionInfo sort by table first, so we are relying on that.
- // we have a TestRegionState#testOrderedByTable() that check for that.
- return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
- }
-
- @Override
- public int hashCode() {
- return getRegionInfo().hashCode();
- }
-
- @Override
- public boolean equals(final Object other) {
- if (this == other) return true;
- if (!(other instanceof RegionStateNode)) return false;
- return compareTo((RegionStateNode)other) == 0;
- }
-
- @Override
- public String toString() {
- return toDescriptiveString();
- }
-
- public String toShortString() {
- // rit= is the current Region-In-Transition State -- see State enum.
- return String.format("rit=%s, location=%s", getState(), getRegionLocation());
- }
-
- public String toDescriptiveString() {
- return String.format("%s, table=%s, region=%s",
- toShortString(), getTable(), getRegionInfo().getEncodedName());
- }
- }
-
// This comparator sorts the RegionStates by time stamp then Region name.
// Comparing by timestamp alone can lead us to discard different RegionStates that happen
// to share a timestamp.
@@ -314,130 +82,6 @@ public class RegionStates {
}
}
- /**
- * Server State.
- */
- public enum ServerState {
- /**
- * Initial state. Available.
- */
- ONLINE,
-
- /**
- * Only server which carries meta can have this state. We will split wal for meta and then
- * assign meta first before splitting other wals.
- */
- SPLITTING_META,
-
- /**
- * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
- * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
- * details.
- */
- SPLITTING_META_DONE,
-
- /**
- * Server expired/crashed. Currently undergoing WAL splitting.
- */
- SPLITTING,
-
- /**
- * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
- * quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
- */
- OFFLINE
- }
-
- /**
- * State of Server; list of hosted regions, etc.
- */
- public static class ServerStateNode implements Comparable<ServerStateNode> {
- private final ServerReportEvent reportEvent;
-
- private final Set<RegionStateNode> regions;
- private final ServerName serverName;
-
- private volatile ServerState state = ServerState.ONLINE;
-
- public ServerStateNode(final ServerName serverName) {
- this.serverName = serverName;
- this.regions = ConcurrentHashMap.newKeySet();
- this.reportEvent = new ServerReportEvent(serverName);
- }
-
- public ServerName getServerName() {
- return serverName;
- }
-
- public ServerState getState() {
- return state;
- }
-
- public ProcedureEvent<?> getReportEvent() {
- return reportEvent;
- }
-
- public boolean isInState(final ServerState... expected) {
- boolean expectedState = false;
- if (expected != null) {
- for (int i = 0; i < expected.length; ++i) {
- expectedState |= (state == expected[i]);
- }
- }
- return expectedState;
- }
-
- private void setState(final ServerState state) {
- this.state = state;
- }
-
- public Set<RegionStateNode> getRegions() {
- return regions;
- }
-
- public int getRegionCount() {
- return regions.size();
- }
-
- public ArrayList<RegionInfo> getRegionInfoList() {
- ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size());
- for (RegionStateNode region: regions) {
- hris.add(region.getRegionInfo());
- }
- return hris;
- }
-
- public void addRegion(final RegionStateNode regionNode) {
- this.regions.add(regionNode);
- }
-
- public void removeRegion(final RegionStateNode regionNode) {
- this.regions.remove(regionNode);
- }
-
- @Override
- public int compareTo(final ServerStateNode other) {
- return getServerName().compareTo(other.getServerName());
- }
-
- @Override
- public int hashCode() {
- return getServerName().hashCode();
- }
-
- @Override
- public boolean equals(final Object other) {
- if (this == other) return true;
- if (!(other instanceof ServerStateNode)) return false;
- return compareTo((ServerStateNode)other) == 0;
- }
-
- @Override
- public String toString() {
- return String.format("ServerStateNode(%s)", getServerName());
- }
- }
-
public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
new RegionStateStampComparator();
@@ -482,22 +126,23 @@ public class RegionStates {
// ==========================================================================
// RegionStateNode helpers
// ==========================================================================
- protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) {
- RegionStateNode newNode = new RegionStateNode(regionInfo);
+ @VisibleForTesting
+ RegionStateNode createRegionStateNode(RegionInfo regionInfo) {
+ RegionStateNode newNode = new RegionStateNode(regionInfo, regionInTransition);
RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode);
return oldNode != null ? oldNode : newNode;
}
- protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) {
- RegionStateNode node = regionsMap.get(regionInfo.getRegionName());
+ public RegionStateNode getOrCreateRegionStateNode(RegionInfo regionInfo) {
+ RegionStateNode node = getRegionStateNodeFromName(regionInfo.getRegionName());
return node != null ? node : createRegionStateNode(regionInfo);
}
- RegionStateNode getRegionStateNodeFromName(final byte[] regionName) {
+ RegionStateNode getRegionStateNodeFromName(byte[] regionName) {
return regionsMap.get(regionName);
}
- protected RegionStateNode getRegionStateNode(final RegionInfo regionInfo) {
+ public RegionStateNode getRegionStateNode(RegionInfo regionInfo) {
return getRegionStateNodeFromName(regionInfo.getRegionName());
}
@@ -593,7 +238,8 @@ public class RegionStates {
}
private HRegionLocation createRegionForReopen(RegionStateNode node) {
- synchronized (node) {
+ node.lock();
+ try {
if (!include(node, false)) {
return null;
}
@@ -605,6 +251,8 @@ public class RegionStates {
} else {
return null;
}
+ } finally {
+ node.unlock();
}
}
@@ -649,7 +297,8 @@ public class RegionStates {
if (node == null) {
return null;
}
- synchronized (node) {
+ node.lock();
+ try {
if (oldLoc.getSeqNum() >= 0) {
// in OPEN state before
if (node.isInState(State.OPEN)) {
@@ -683,6 +332,8 @@ public class RegionStates {
return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum);
}
}
+ } finally {
+ node.unlock();
}
}
@@ -726,8 +377,10 @@ public class RegionStates {
* @return set of RegionInfo hosted by the specified server
*/
public List<RegionInfo> getServerRegionInfoSet(final ServerName serverName) {
- final ServerStateNode serverInfo = getServerNode(serverName);
- if (serverInfo == null) return Collections.emptyList();
+ ServerStateNode serverInfo = getServerNode(serverName);
+ if (serverInfo == null) {
+ return Collections.emptyList();
+ }
synchronized (serverInfo) {
return serverInfo.getRegionInfoList();
@@ -779,10 +432,13 @@ public class RegionStates {
setServerState(serverName, ServerState.OFFLINE);
}
- public void updateRegionState(final RegionInfo regionInfo, final State state) {
- final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo);
- synchronized (regionNode) {
+ public void updateRegionState(RegionInfo regionInfo, State state) {
+ RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo);
+ regionNode.lock();
+ try {
regionNode.setState(state);
+ } finally {
+ regionNode.unlock();
}
}
@@ -799,11 +455,14 @@ public class RegionStates {
return result;
}
- public boolean isRegionInState(final RegionInfo regionInfo, final State... state) {
- final RegionStateNode region = getRegionStateNode(regionInfo);
- if (region != null) {
- synchronized (region) {
- return region.isInState(state);
+ public boolean isRegionInState(RegionInfo regionInfo, State... state) {
+ RegionStateNode regionNode = getRegionStateNode(regionInfo);
+ if (regionNode != null) {
+ regionNode.lock();
+ try {
+ return regionNode.isInState(state);
+ } finally {
+ regionNode.unlock();
}
}
return false;
@@ -866,12 +525,15 @@ public class RegionStates {
return tableRegions;
}
- public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) {
- final RegionStateNode region = getRegionStateNode(regionInfo);
- if (region != null) {
- synchronized (region) {
- ServerName server = region.getRegionLocation();
- return server != null ? server : region.getLastHost();
+ public ServerName getRegionServerOfRegion(RegionInfo regionInfo) {
+ RegionStateNode regionNode = getRegionStateNode(regionInfo);
+ if (regionNode != null) {
+ regionNode.lock();
+ try {
+ ServerName server = regionNode.getRegionLocation();
+ return server != null ? server : regionNode.getLastHost();
+ } finally {
+ regionNode.unlock();
}
}
return null;
@@ -938,20 +600,6 @@ public class RegionStates {
// ==========================================================================
// Region in transition helpers
// ==========================================================================
- protected boolean addRegionInTransition(final RegionStateNode regionNode,
- final RegionTransitionProcedure procedure) {
- if (procedure != null && !regionNode.setProcedure(procedure)) return false;
-
- regionInTransition.put(regionNode.getRegionInfo(), regionNode);
- return true;
- }
-
- protected void removeRegionInTransition(final RegionStateNode regionNode,
- final RegionTransitionProcedure procedure) {
- regionInTransition.remove(regionNode.getRegionInfo());
- regionNode.unsetProcedure(procedure);
- }
-
public boolean hasRegionsInTransition() {
return !regionInTransition.isEmpty();
}
@@ -961,21 +609,17 @@ public class RegionStates {
return node != null ? node.isInTransition() : false;
}
- /**
- * @return If a procedure-in-transition for <code>hri</code>, return it else null.
- */
- public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) {
+ public RegionState getRegionTransitionState(RegionInfo hri) {
RegionStateNode node = regionInTransition.get(hri);
- if (node == null) return null;
- return node.getProcedure();
- }
-
- public RegionState getRegionTransitionState(final RegionInfo hri) {
- RegionStateNode node = regionInTransition.get(hri);
- if (node == null) return null;
+ if (node == null) {
+ return null;
+ }
- synchronized (node) {
+ node.lock();
+ try {
return node.isInTransition() ? node.toRegionState() : null;
+ } finally {
+ node.unlock();
}
}
@@ -1110,7 +754,7 @@ public class RegionStates {
serverMap.remove(serverName);
}
- protected ServerStateNode getServerNode(final ServerName serverName) {
+ ServerStateNode getServerNode(final ServerName serverName) {
return serverMap.get(serverName);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
index 0db8676..2f94765 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
@@ -1,5 +1,4 @@
/**
- *
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -24,100 +23,41 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
-import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
-import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
import org.apache.hadoop.hbase.procedure2.Procedure;
-import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
/**
- * Base class for the Assign and Unassign Procedure.
- *
- * Locking:
- * Takes exclusive lock on the region being assigned/unassigned. Thus, there can only be one
- * RegionTransitionProcedure per region running at a time (see MasterProcedureScheduler).
- *
- * <p>This procedure is asynchronous and responds to external events.
- * The AssignmentManager will notify this procedure when the RS completes
- * the operation and reports the transitioned state
- * (see the Assign and Unassign class for more detail).</p>
- *
- * <p>Procedures move from the REGION_TRANSITION_QUEUE state when they are
- * first submitted, to the REGION_TRANSITION_DISPATCH state when the request
- * to remote server is sent and the Procedure is suspended waiting on external
- * event to be woken again. Once the external event is triggered, Procedure
- * moves to the REGION_TRANSITION_FINISH state.</p>
- *
- * <p>NOTE: {@link AssignProcedure} and {@link UnassignProcedure} should not be thought of
- * as being asymmetric, at least currently.
- * <ul>
- * <li>{@link AssignProcedure} moves through all the above described states and implements methods
- * associated with each while {@link UnassignProcedure} starts at state
- * REGION_TRANSITION_DISPATCH and state REGION_TRANSITION_QUEUE is not supported.</li>
- *
- * <li>When any step in {@link AssignProcedure} fails, failure handler
- * AssignProcedure#handleFailure(MasterProcedureEnv, RegionStateNode) re-attempts the
- * assignment by setting the procedure state to REGION_TRANSITION_QUEUE and forces
- * assignment to a different target server by setting {@link AssignProcedure#forceNewPlan}. When
- * the number of attempts reaches threshold configuration 'hbase.assignment.maximum.attempts',
- * the procedure is aborted. For {@link UnassignProcedure}, similar re-attempts are
- * intentionally not implemented. It is a 'one shot' procedure. See its class doc for how it
- * handles failure.
- * </li>
- * <li>If we find a region in an 'unexpected' state, we'll complain and retry with backoff forever.
- * The 'unexpected' state needs to be fixed either by another running Procedure or by operator
- * intervention (Regions in 'unexpected' state indicates bug or unexpected transition type).
- * For this to work, subclasses need to persist the 'attempt' counter kept in this class when
- * they do serializeStateData and restore it inside their deserializeStateData, just as they do
- * for {@link #regionInfo}.
- * </li>
- * </ul>
- * </p>
- *
- * <p>TODO: Considering it is a priority doing all we can to get make a region available as soon as
- * possible, re-attempting with any target makes sense if specified target fails in case of
- * {@link AssignProcedure}. For {@link UnassignProcedure}, our concern is preventing data loss
- * on failed unassign. See class doc for explanation.
+ * Leave here only for checking if we can successfully start the master.
+ * @deprecated Do not use any more.
+ * @see TransitRegionStateProcedure
*/
+@Deprecated
@InterfaceAudience.Private
-public abstract class RegionTransitionProcedure
- extends Procedure<MasterProcedureEnv>
- implements TableProcedureInterface,
- RemoteProcedure<MasterProcedureEnv, ServerName> {
- private static final Logger LOG = LoggerFactory.getLogger(RegionTransitionProcedure.class);
+public abstract class RegionTransitionProcedure extends Procedure<MasterProcedureEnv>
+ implements TableProcedureInterface, RemoteProcedure<MasterProcedureEnv, ServerName> {
protected final AtomicBoolean aborted = new AtomicBoolean(false);
private RegionTransitionState transitionState = RegionTransitionState.REGION_TRANSITION_QUEUE;
- /**
- * This data member must be persisted. Expectation is that it is done by subclasses in their
- * {@link #serializeStateData(ProcedureStateSerializer)} call, restoring {@link #regionInfo}
- * in their {@link #deserializeStateData(ProcedureStateSerializer)} method.
- */
+
private RegionInfo regionInfo;
- /**
- * Like {@link #regionInfo}, the expectation is that subclasses persist the value of this
- * data member. It is used doing backoff when Procedure gets stuck.
- */
private int attempt;
// Required by the Procedure framework to create the procedure on replay
- public RegionTransitionProcedure() {}
+ public RegionTransitionProcedure() {
+ }
public RegionTransitionProcedure(final RegionInfo regionInfo) {
this.regionInfo = regionInfo;
@@ -128,22 +68,10 @@ public abstract class RegionTransitionProcedure
return regionInfo;
}
- /**
- * This setter is for subclasses to call in their
- * {@link #deserializeStateData(ProcedureStateSerializer)} method. Expectation is that
- * subclasses will persist `regioninfo` in their
- * {@link #serializeStateData(ProcedureStateSerializer)} method and then restore `regionInfo` on
- * deserialization by calling.
- */
protected void setRegionInfo(final RegionInfo regionInfo) {
this.regionInfo = regionInfo;
}
- /**
- * This setter is for subclasses to call in their
- * {@link #deserializeStateData(ProcedureStateSerializer)} method.
- * @see #setRegionInfo(RegionInfo)
- */
protected void setAttempt(int attempt) {
this.attempt = attempt;
}
@@ -155,7 +83,7 @@ public abstract class RegionTransitionProcedure
@Override
public TableName getTableName() {
RegionInfo hri = getRegionInfo();
- return hri != null? hri.getTable(): null;
+ return hri != null ? hri.getTable() : null;
}
public boolean isMeta() {
@@ -168,7 +96,7 @@ public abstract class RegionTransitionProcedure
sb.append(" table=");
sb.append(getTableName());
sb.append(", region=");
- sb.append(getRegionInfo() == null? null: getRegionInfo().getEncodedName());
+ sb.append(getRegionInfo() == null ? null : getRegionInfo().getEncodedName());
}
public RegionStateNode getRegionState(final MasterProcedureEnv env) {
@@ -184,113 +112,26 @@ public abstract class RegionTransitionProcedure
}
protected abstract boolean startTransition(MasterProcedureEnv env, RegionStateNode regionNode)
- throws IOException, ProcedureSuspendedException;
-
- /**
- * Called when the Procedure is in the REGION_TRANSITION_DISPATCH state.
- * In here we do the RPC call to OPEN/CLOSE the region. The suspending of
- * the thread so it sleeps until it gets update that the OPEN/CLOSE has
- * succeeded is complicated. Read the implementations to learn more.
- */
+ throws IOException, ProcedureSuspendedException;
+
protected abstract boolean updateTransition(MasterProcedureEnv env, RegionStateNode regionNode)
- throws IOException, ProcedureSuspendedException;
+ throws IOException, ProcedureSuspendedException;
protected abstract void finishTransition(MasterProcedureEnv env, RegionStateNode regionNode)
- throws IOException, ProcedureSuspendedException;
+ throws IOException, ProcedureSuspendedException;
- protected abstract void reportTransition(MasterProcedureEnv env,
- RegionStateNode regionNode, TransitionCode code, long seqId) throws UnexpectedStateException;
+ protected abstract void reportTransition(MasterProcedureEnv env, RegionStateNode regionNode,
+ TransitionCode code, long seqId) throws UnexpectedStateException;
@Override
public abstract RemoteOperation remoteCallBuild(MasterProcedureEnv env, ServerName serverName);
- /**
- * @return True if processing of fail is complete; the procedure will be woken from its suspend
- * and we'll go back to running through procedure steps:
- * otherwise if false we leave the procedure in suspended state.
- */
- protected abstract boolean remoteCallFailed(MasterProcedureEnv env,
- RegionStateNode regionNode, IOException exception);
+ protected abstract boolean remoteCallFailed(MasterProcedureEnv env, RegionStateNode regionNode,
+ IOException exception);
@Override
public synchronized void remoteCallFailed(final MasterProcedureEnv env,
final ServerName serverName, final IOException exception) {
- final RegionStateNode regionNode = getRegionState(env);
- LOG.warn("Remote call failed {}; {}; {}; exception={}", serverName,
- this, regionNode.toShortString(), exception.getClass().getSimpleName(), exception);
- if (remoteCallFailed(env, regionNode, exception)) {
- // NOTE: This call to wakeEvent puts this Procedure back on the scheduler.
- // Thereafter, another Worker can be in here so DO NOT MESS WITH STATE beyond
- // this method. Just get out of this current processing quickly.
- regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
- }
- // else leave the procedure in suspended state; it is waiting on another call to this callback
- }
-
- /**
- * Be careful! At the end of this method, the procedure has either succeeded
- * and this procedure has been set into a suspended state OR, we failed and
- * this procedure has been put back on the scheduler ready for another worker
- * to pick it up. In both cases, we need to exit the current Worker processing
- * immediately!
- * @return True if we successfully dispatched the call and false if we failed;
- * if failed, we need to roll back any setup done for the dispatch.
- */
- protected boolean addToRemoteDispatcher(final MasterProcedureEnv env,
- final ServerName targetServer) {
- LOG.info("Dispatch {}; {}", this, getRegionState(env).toShortString());
-
- // Put this procedure into suspended mode to wait on report of state change
- // from remote regionserver. Means Procedure associated ProcedureEvent is marked not 'ready'.
- getRegionState(env).getProcedureEvent().suspend();
-
- // Tricky because the below call to addOperationToNode can fail. If it fails, we need to
- // backtrack on stuff like the 'suspend' done above -- tricky as the 'wake' requests us -- and
- // ditto up in the caller; it needs to undo state changes. Inside in remoteCallFailed, it does
- // wake to undo the above suspend.
- try {
- env.getRemoteDispatcher().addOperationToNode(targetServer, this);
- } catch (FailedRemoteDispatchException frde) {
- remoteCallFailed(env, targetServer, frde);
- return false;
- }
- return true;
- }
-
- protected void reportTransition(final MasterProcedureEnv env, final ServerName serverName,
- final TransitionCode code, final long seqId) throws UnexpectedStateException {
- final RegionStateNode regionNode = getRegionState(env);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Received report " + code + " seqId=" + seqId + ", " +
- this + "; " + regionNode.toShortString());
- }
- if (!serverName.equals(regionNode.getRegionLocation())) {
- if (isMeta() && regionNode.getRegionLocation() == null) {
- regionNode.setRegionLocation(serverName);
- } else {
- throw new UnexpectedStateException(String.format(
- "Unexpected state=%s from server=%s; expected server=%s; %s; %s",
- code, serverName, regionNode.getRegionLocation(),
- this, regionNode.toShortString()));
- }
- }
-
- reportTransition(env, regionNode, code, seqId);
-
- // NOTE: This call adds this procedure back on the scheduler.
- // This makes it so this procedure can run again. Another worker will take
- // processing to the next stage. At an extreme, the other worker may run in
- // parallel so DO NOT CHANGE any state hereafter! This should be last thing
- // done in this processing step.
- regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
- }
-
- protected boolean isServerOnline(final MasterProcedureEnv env, final RegionStateNode regionNode) {
- return isServerOnline(env, regionNode.getRegionLocation());
- }
-
- protected boolean isServerOnline(final MasterProcedureEnv env, final ServerName serverName) {
- return env.getMasterServices().getServerManager().isServerOnline(serverName);
}
@Override
@@ -303,108 +144,12 @@ public abstract class RegionTransitionProcedure
}
@Override
- protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException {
- final AssignmentManager am = env.getAssignmentManager();
- final RegionStateNode regionNode = getRegionState(env);
- if (!am.addRegionInTransition(regionNode, this)) {
- String msg = String.format(
- "There is already another procedure running on this region this=%s owner=%s",
- this, regionNode.getProcedure());
- LOG.warn(msg + " " + this + "; " + regionNode.toShortString());
- setAbortFailure(getClass().getSimpleName(), msg);
- return null;
- }
- try {
- boolean retry;
- do {
- retry = false;
- switch (transitionState) {
- case REGION_TRANSITION_QUEUE:
- // 1. push into the AM queue for balancer policy
- if (!startTransition(env, regionNode)) {
- // The operation figured it is done or it aborted; check getException()
- am.removeRegionInTransition(getRegionState(env), this);
- return null;
- }
- transitionState = RegionTransitionState.REGION_TRANSITION_DISPATCH;
- if (regionNode.getProcedureEvent().suspendIfNotReady(this)) {
- // Why this suspend? Because we want to ensure Store happens before proceed?
- throw new ProcedureSuspendedException();
- }
- break;
-
- case REGION_TRANSITION_DISPATCH:
- // 2. send the request to the target server
- if (!updateTransition(env, regionNode)) {
- // The operation figured it is done or it aborted; check getException()
- am.removeRegionInTransition(regionNode, this);
- return null;
- }
- if (transitionState != RegionTransitionState.REGION_TRANSITION_DISPATCH) {
- retry = true;
- break;
- }
- if (regionNode.getProcedureEvent().suspendIfNotReady(this)) {
- throw new ProcedureSuspendedException();
- }
- break;
-
- case REGION_TRANSITION_FINISH:
- // 3. wait assignment response. completion/failure
- LOG.debug("Finishing {}; {}", this, regionNode.toShortString());
- finishTransition(env, regionNode);
- am.removeRegionInTransition(regionNode, this);
- return null;
- }
- } while (retry);
- // If here, success so clear out the attempt counter so we start fresh each time we get stuck.
- this.attempt = 0;
- } catch (IOException e) {
- long backoff = getBackoffTime(this.attempt++);
- LOG.warn("Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " +
- "by other Procedure or operator intervention", backoff / 1000, this,
- regionNode.toShortString(), e);
- getRegionState(env).getProcedureEvent().suspend();
- if (getRegionState(env).getProcedureEvent().suspendIfNotReady(this)) {
- setTimeout(Math.toIntExact(backoff));
- setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
- throw new ProcedureSuspendedException();
- }
- }
-
- return new Procedure[] {this};
- }
-
- private long getBackoffTime(int attempts) {
- long backoffTime = (long)(1000 * Math.pow(2, attempts));
- long maxBackoffTime = 60 * 60 * 1000; // An hour. Hard-coded for for now.
- return backoffTime < maxBackoffTime? backoffTime: maxBackoffTime;
- }
-
- /**
- * At end of timeout, wake ourselves up so we run again.
- */
- @Override
- protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
- setState(ProcedureProtos.ProcedureState.RUNNABLE);
- getRegionState(env).getProcedureEvent().wake(env.getProcedureScheduler());
- return false; // 'false' means that this procedure handled the timeout
+ protected Procedure[] execute(final MasterProcedureEnv env) {
+ return null;
}
@Override
- protected void rollback(final MasterProcedureEnv env) {
- if (isRollbackSupported(transitionState)) {
- // Nothing done up to this point. abort safely.
- // This should happen when something like disableTable() is triggered.
- env.getAssignmentManager().removeRegionInTransition(getRegionState(env), this);
- return;
- }
-
- // There is no rollback for assignment unless we cancel the operation by
- // dropping/disabling the table.
- throw new UnsupportedOperationException("Unhandled state " + transitionState +
- "; there is no rollback for assignment unless we cancel the operation by " +
- "dropping/disabling the table");
+ protected void rollback(MasterProcedureEnv env) {
}
protected abstract boolean isRollbackSupported(final RegionTransitionState state);
@@ -419,54 +164,6 @@ public abstract class RegionTransitionProcedure
}
@Override
- protected boolean waitInitialized(MasterProcedureEnv env) {
- // Unless we are assigning meta, wait for meta to be available and loaded.
- if (isMeta()) {
- return false;
- }
- AssignmentManager am = env.getAssignmentManager();
- return am.waitMetaLoaded(this) || am.waitMetaAssigned(this, regionInfo);
- }
-
- @Override
- protected LockState acquireLock(final MasterProcedureEnv env) {
- // TODO: Revisit this and move it to the executor
- if (env.getProcedureScheduler().waitRegion(this, getRegionInfo())) {
- try {
- LOG.debug(LockState.LOCK_EVENT_WAIT + " pid=" + getProcId() + " " +
- env.getProcedureScheduler().dumpLocks());
- } catch (IOException e) {
- // ignore, just for logging
- }
- return LockState.LOCK_EVENT_WAIT;
- }
- return LockState.LOCK_ACQUIRED;
- }
-
- @Override
- protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureScheduler().wakeRegion(this, getRegionInfo());
- }
-
- @Override
- protected boolean holdLock(final MasterProcedureEnv env) {
- return true;
- }
-
- @Override
- protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
- // The operation is triggered internally on the server
- // the client does not know about this procedure.
- return false;
- }
-
- /**
- * Used by ServerCrashProcedure to see if this Assign/Unassign needs processing.
- * @return ServerName the Assign or Unassign is going against.
- */
- public abstract ServerName getServer(final MasterProcedureEnv env);
-
- @Override
public void remoteOperationCompleted(MasterProcedureEnv env) {
// should not be called for region operation until we modified the open/close region procedure
throw new UnsupportedOperationException();
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
new file mode 100644
index 0000000..6925c42
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerState.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Server State.
+ */
+@InterfaceAudience.Private
+enum ServerState {
+ /**
+ * Initial state. Available.
+ */
+ ONLINE,
+
+ /**
+ * Only server which carries meta can have this state. We will split wal for meta and then
+ * assign meta first before splitting other wals.
+ */
+ SPLITTING_META,
+
+ /**
+ * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
+ * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
+ * details.
+ */
+ SPLITTING_META_DONE,
+
+ /**
+ * Server expired/crashed. Currently undergoing WAL splitting.
+ */
+ SPLITTING,
+
+ /**
+ * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
+ * quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
+ */
+ OFFLINE
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
new file mode 100644
index 0000000..2042214
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/ServerStateNode.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.util.ArrayList;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * State of Server; list of hosted regions, etc.
+ */
+@InterfaceAudience.Private
+class ServerStateNode implements Comparable<ServerStateNode> {
+
+ private static final class ServerReportEvent extends ProcedureEvent<ServerName> {
+ public ServerReportEvent(final ServerName serverName) {
+ super(serverName);
+ }
+ }
+
+ private final ServerReportEvent reportEvent;
+
+ private final Set<RegionStateNode> regions;
+ private final ServerName serverName;
+
+ private volatile ServerState state = ServerState.ONLINE;
+
+ public ServerStateNode(final ServerName serverName) {
+ this.serverName = serverName;
+ this.regions = ConcurrentHashMap.newKeySet();
+ this.reportEvent = new ServerReportEvent(serverName);
+ }
+
+ public ServerName getServerName() {
+ return serverName;
+ }
+
+ public ServerState getState() {
+ return state;
+ }
+
+ public ProcedureEvent<?> getReportEvent() {
+ return reportEvent;
+ }
+
+ public boolean isInState(final ServerState... expected) {
+ boolean expectedState = false;
+ if (expected != null) {
+ for (int i = 0; i < expected.length; ++i) {
+ expectedState |= (state == expected[i]);
+ }
+ }
+ return expectedState;
+ }
+
+ void setState(final ServerState state) {
+ this.state = state;
+ }
+
+ public Set<RegionStateNode> getRegions() {
+ return regions;
+ }
+
+ public int getRegionCount() {
+ return regions.size();
+ }
+
+ public ArrayList<RegionInfo> getRegionInfoList() {
+ ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(regions.size());
+ for (RegionStateNode region : regions) {
+ hris.add(region.getRegionInfo());
+ }
+ return hris;
+ }
+
+ public void addRegion(final RegionStateNode regionNode) {
+ this.regions.add(regionNode);
+ }
+
+ public void removeRegion(final RegionStateNode regionNode) {
+ this.regions.remove(regionNode);
+ }
+
+ @Override
+ public int compareTo(final ServerStateNode other) {
+ return getServerName().compareTo(other.getServerName());
+ }
+
+ @Override
+ public int hashCode() {
+ return getServerName().hashCode();
+ }
+
+ @Override
+ public boolean equals(final Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (!(other instanceof ServerStateNode)) {
+ return false;
+ }
+ return compareTo((ServerStateNode) other) == 0;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("ServerStateNode(%s)", getServerName());
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
index 29dbabb..4e292c5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@@ -15,7 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.hadoop.hbase.master.assignment;
import java.io.IOException;
@@ -32,6 +31,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -45,13 +45,11 @@ import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.RegionState.State;
-import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
@@ -139,16 +137,6 @@ public class SplitTableRegionProcedure
}
/**
- * Check whether there are recovered.edits in the parent closed region.
- * @param env master env
- * @throws IOException IOException
- */
- static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException {
- return WALSplitter.hasRecoveredEdits(env.getMasterServices().getFileSystem(),
- env.getMasterConfiguration(), ri);
- }
-
- /**
* Check whether the region is splittable
* @param env MasterProcedureEnv
* @param regionToSplit parent Region to be split
@@ -169,12 +157,13 @@ public class SplitTableRegionProcedure
if (node != null) {
try {
if (bestSplitRow == null || bestSplitRow.length == 0) {
- LOG.info("splitKey isn't explicitly specified, " + " will try to find a best split key from RS");
+ LOG
+ .info("splitKey isn't explicitly specified, will try to find a best split key from RS");
}
// Always set bestSplitRow request as true here,
// need to call Region#checkSplit to check it splittable or not
- GetRegionInfoResponse response =
- Util.getRegionInfoResponse(env, node.getRegionLocation(), node.getRegionInfo(), true);
+ GetRegionInfoResponse response = AssignmentManagerUtil.getRegionInfoResponse(env,
+ node.getRegionLocation(), node.getRegionInfo(), true);
if(bestSplitRow == null || bestSplitRow.length == 0) {
bestSplitRow = response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null;
}
@@ -189,14 +178,17 @@ public class SplitTableRegionProcedure
}
if (!splittable) {
- IOException e = new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
- if (splittableCheckIOE != null) e.initCause(splittableCheckIOE);
+ IOException e =
+ new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable");
+ if (splittableCheckIOE != null) {
+ e.initCause(splittableCheckIOE);
+ }
throw e;
}
- if(bestSplitRow == null || bestSplitRow.length == 0) {
- throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, "
- + "maybe table is too small for auto split. For force split, try specifying split row");
+ if (bestSplitRow == null || bestSplitRow.length == 0) {
+ throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " +
+ "maybe table is too small for auto split. For force split, try specifying split row");
}
if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) {
@@ -205,9 +197,8 @@ public class SplitTableRegionProcedure
}
if (!regionToSplit.containsRow(bestSplitRow)) {
- throw new DoNotRetryIOException(
- "Split row is not inside region key range splitKey:" + Bytes.toStringBinary(splitRow) +
- " region: " + regionToSplit);
+ throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" +
+ Bytes.toStringBinary(splitRow) + " region: " + regionToSplit);
}
}
@@ -228,8 +219,19 @@ public class SplitTableRegionProcedure
return rid;
}
+ private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException {
+ AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()),
+ getRegionReplication(env));
+ }
+
+ private void checkClosedRegions(MasterProcedureEnv env) throws IOException {
+ // theoretically this should not happen any more after we use TRSP, but anyway let's add a check
+ // here
+ AssignmentManagerUtil.checkClosedRegion(env, getParentRegion());
+ }
+
@Override
- protected Flow executeFromState(final MasterProcedureEnv env, final SplitTableRegionState state)
+ protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state)
throws InterruptedException {
LOG.trace("{} execute state={}", this, state);
@@ -247,24 +249,15 @@ public class SplitTableRegionProcedure
setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
break;
case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
- addChildProcedure(createUnassignProcedures(env, getRegionReplication(env)));
+ addChildProcedure(createUnassignProcedures(env));
setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
break;
case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
- if (hasRecoveredEdits(env, getRegion())) {
- // If recovered edits, reopen parent region and then re-run the close by going back to
- // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times
- // (TODO: Add being able to open a region in read-only mode). Open the primary replica
- // in this case only where we just want to pickup the left-out replicated.edits.
- LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!",
- getRegion().getEncodedName());
- addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion()));
- setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
- } else {
- setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
- }
+ checkClosedRegions(env);
+ setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
break;
case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
+ removeNonDefaultReplicas(env);
createDaughterRegions(env);
setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE);
break;
@@ -285,7 +278,7 @@ public class SplitTableRegionProcedure
setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS);
break;
case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS:
- addChildProcedure(createAssignProcedures(env, getRegionReplication(env)));
+ addChildProcedure(createAssignProcedures(env));
setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION);
break;
case SPLIT_TABLE_REGION_POST_OPERATION:
@@ -544,24 +537,14 @@ public class SplitTableRegionProcedure
/**
* Rollback close parent region
- * @param env MasterProcedureEnv
*/
- private void openParentRegion(final MasterProcedureEnv env) throws IOException {
- // Check whether the region is closed; if so, open it in the same server
- final int regionReplication = getRegionReplication(env);
- final ServerName serverName = getParentRegionServerName(env);
-
- final AssignProcedure[] procs = new AssignProcedure[regionReplication];
- for (int i = 0; i < regionReplication; ++i) {
- final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i);
- procs[i] = env.getAssignmentManager().createAssignProcedure(hri, serverName);
- }
- env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs);
+ private void openParentRegion(MasterProcedureEnv env) throws IOException {
+ AssignmentManagerUtil.reopenRegionsForRollback(env, Stream.of(getParentRegion()),
+ getRegionReplication(env), getParentRegionServerName(env));
}
/**
* Create daughter regions
- * @param env MasterProcedureEnv
*/
@VisibleForTesting
public void createDaughterRegions(final MasterProcedureEnv env) throws IOException {
@@ -818,35 +801,21 @@ public class SplitTableRegionProcedure
}
private ServerName getParentRegionServerName(final MasterProcedureEnv env) {
- return env.getMasterServices().getAssignmentManager()
- .getRegionStates().getRegionServerOfRegion(getParentRegion());
+ return env.getMasterServices().getAssignmentManager().getRegionStates()
+ .getRegionServerOfRegion(getParentRegion());
}
- private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env,
- final int regionReplication) {
- final UnassignProcedure[] procs = new UnassignProcedure[regionReplication];
- for (int i = 0; i < procs.length; ++i) {
- final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i);
- procs[i] = env.getAssignmentManager().
- createUnassignProcedure(hri, null, true, !RegionReplicaUtil.isDefaultReplica(hri));
- }
- return procs;
+ private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env)
+ throws IOException {
+ return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env,
+ Stream.of(getParentRegion()), getRegionReplication(env));
}
- private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env,
- final int regionReplication) {
- final ServerName targetServer = getParentRegionServerName(env);
- final AssignProcedure[] procs = new AssignProcedure[regionReplication * 2];
- int procsIdx = 0;
- for (int i = 0; i < regionReplication; ++i) {
- final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_1_RI, i);
- procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer);
- }
- for (int i = 0; i < regionReplication; ++i) {
- final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_2_RI, i);
- procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer);
- }
- return procs;
+ private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
+ throws IOException {
+ return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env,
+ Stream.of(daughter_1_RI, daughter_2_RI), getRegionReplication(env),
+ getParentRegionServerName(env));
}
private int getRegionReplication(final MasterProcedureEnv env) throws IOException {