You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/08/20 22:16:25 UTC
[4/7] hbase git commit: HBASE-20881 Introduce a region transition
procedure to handle all the state transition for a region
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
new file mode 100644
index 0000000..e853b9b
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -0,0 +1,569 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import edu.umd.cs.findbugs.annotations.Nullable;
+import java.io.IOException;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.RetriesExhaustedException;
+import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
+import org.apache.hadoop.hbase.master.RegionState.State;
+import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
+import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionStateData;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
+
+/**
+ * The procedure to deal with the state transition of a region. A region with a TRSP in place is
+ * called RIT, i.e, RegionInTransition.
+ * <p/>
+ * It can be used to assign/unassign/reopen/move a region, and for
+ * {@link #unassign(MasterProcedureEnv, RegionInfo)} and
+ * {@link #reopen(MasterProcedureEnv, RegionInfo)}, you do not need to specify a target server, and
+ * for {@link #assign(MasterProcedureEnv, RegionInfo, ServerName)} and
+ * {@link #move(MasterProcedureEnv, RegionInfo, ServerName)}, if you want to you can provide a
+ * target server. And for {@link #move(MasterProcedureEnv, RegionInfo, ServerName)}, if you do not
+ * specify a targetServer, we will select one randomly.
+ * <p/>
+ * <p/>
+ * The typical state transition for assigning a region is:
+ *
+ * <pre>
+ * GET_ASSIGN_CANDIDATE ------> OPEN -----> CONFIRM_OPENED
+ * </pre>
+ *
+ * Notice that, if there are failures we may go back to the {@code GET_ASSIGN_CANDIDATE} state to
+ * try again.
+ * <p/>
+ * The typical state transition for unassigning a region is:
+ *
+ * <pre>
+ * CLOSE -----> CONFIRM_CLOSED
+ * </pre>
+ *
+ * Here things go a bit different, if there are failures, especially that if there is a server
+ * crash, we will go to the {@code GET_ASSIGN_CANDIDATE} state to bring the region online first, and
+ * then go through the normal way to unassign it.
+ * <p/>
+ * The typical state transition for reopening/moving a region is:
+ *
+ * <pre>
+ * CLOSE -----> CONFIRM_CLOSED -----> GET_ASSIGN_CANDIDATE ------> OPEN -----> CONFIRM_OPENED
+ * </pre>
+ *
+ * The retry logic is the same with the above assign/unassign.
+ * <p/>
+ * Notice that, although we allow specify a target server, it just acts as a candidate, we do not
+ * guarantee that the region will finally be on the target server. If this is important for you, you
+ * should check whether the region is on the target server after the procedure is finished.
+ * <p/>
+ * When you want to schedule a TRSP, please check whether there is still one for this region, and
+ * the check should be under the RegionStateNode lock. We will remove the TRSP from a
+ * RegionStateNode when we are done, see the code in {@code reportTransition} method below. There
+ * could be at most one TRSP for a give region.
+ */
+@InterfaceAudience.Private
+public class TransitRegionStateProcedure
+ extends AbstractStateMachineRegionProcedure<RegionStateTransitionState> {
+
+ private static final Logger LOG = LoggerFactory.getLogger(TransitRegionStateProcedure.class);
+
+ private RegionStateTransitionState initialState;
+
+ private RegionStateTransitionState lastState;
+
+ // the candidate where we want to assign the region to.
+ private ServerName assignCandidate;
+
+ private boolean forceNewPlan;
+
+ private int attempt;
+
+ public TransitRegionStateProcedure() {
+ }
+
+ private TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri,
+ ServerName assignCandidate, boolean forceNewPlan, RegionStateTransitionState initialState,
+ RegionStateTransitionState lastState) {
+ super(env, hri);
+ this.assignCandidate = assignCandidate;
+ this.forceNewPlan = forceNewPlan;
+ this.initialState = initialState;
+ this.lastState = lastState;
+ }
+
+ @Override
+ public TableOperationType getTableOperationType() {
+ // TODO: maybe we should make another type here, REGION_TRANSITION?
+ return TableOperationType.REGION_EDIT;
+ }
+
+ @Override
+ protected boolean waitInitialized(MasterProcedureEnv env) {
+ if (TableName.isMetaTableName(getTableName())) {
+ return false;
+ }
+ // First we need meta to be loaded, and second, if meta is not online then we will likely to
+ // fail when updating meta so we wait until it is assigned.
+ AssignmentManager am = env.getAssignmentManager();
+ return am.waitMetaLoaded(this) || am.waitMetaAssigned(this, getRegion());
+ }
+
+ private void queueAssign(MasterProcedureEnv env, RegionStateNode regionNode)
+ throws ProcedureSuspendedException {
+ // Here the assumption is that, the region must be in CLOSED state, so the region location
+ // will be null. And if we fail to open the region and retry here, the forceNewPlan will be
+ // true, and also we will set the region location to null.
+ boolean retain = false;
+ if (!forceNewPlan) {
+ if (assignCandidate != null) {
+ retain = assignCandidate.equals(regionNode.getLastHost());
+ regionNode.setRegionLocation(assignCandidate);
+ } else if (regionNode.getLastHost() != null) {
+ retain = true;
+ LOG.info("Setting lastHost as the region location {}", regionNode.getLastHost());
+ regionNode.setRegionLocation(regionNode.getLastHost());
+ }
+ }
+ LOG.info("Starting {}; {}; forceNewPlan={}, retain={}", this, regionNode.toShortString(),
+ forceNewPlan, retain);
+ env.getAssignmentManager().queueAssign(regionNode);
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_OPEN);
+ if (regionNode.getProcedureEvent().suspendIfNotReady(this)) {
+ throw new ProcedureSuspendedException();
+ }
+ }
+
+ private void openRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
+ ServerName loc = regionNode.getRegionLocation();
+ if (loc == null) {
+ LOG.warn("No location specified for {}, jump back to state {} to get one", getRegion(),
+ RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
+ return;
+ }
+ env.getAssignmentManager().regionOpening(regionNode);
+ addChildProcedure(new OpenRegionProcedure(getRegion(), loc));
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED);
+ }
+
+ private Flow confirmOpened(MasterProcedureEnv env, RegionStateNode regionNode)
+ throws IOException {
+ // notice that, for normal case, if we successfully opened a region, we will not arrive here, as
+ // in reportTransition we will call unsetProcedure, and in executeFromState we will return
+ // directly. But if the master is crashed before we finish the procedure, then next time we will
+ // arrive here. So we still need to add code for normal cases.
+ if (regionNode.isInState(State.OPEN)) {
+ attempt = 0;
+ if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED) {
+ // we are the last state, finish
+ regionNode.unsetProcedure(this);
+ return Flow.NO_MORE_STATE;
+ }
+ // It is possible that we arrive here but confirm opened is not the last state, for example,
+ // when merging or splitting a region, we unassign the region from a RS and the RS is crashed,
+ // then there will be recovered edits for this region, we'd better make the region online
+ // again and then unassign it, otherwise we have to fail the merge/split procedure as we may
+ // loss data.
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE);
+ return Flow.HAS_MORE_STATE;
+ }
+
+ if (incrementAndCheckMaxAttempts(env, regionNode)) {
+ env.getAssignmentManager().regionFailedOpen(regionNode, true);
+ setFailure(getClass().getSimpleName(), new RetriesExhaustedException(
+ "Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded"));
+ regionNode.unsetProcedure(this);
+ return Flow.NO_MORE_STATE;
+ }
+ env.getAssignmentManager().regionFailedOpen(regionNode, false);
+ // we failed to assign the region, force a new plan
+ forceNewPlan = true;
+ regionNode.setRegionLocation(null);
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
+ // Here we do not throw exception because we want to the region to be online ASAP
+ return Flow.HAS_MORE_STATE;
+ }
+
+ private void closeRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
+ if (regionNode.isInState(State.OPEN, State.CLOSING, State.MERGING, State.SPLITTING)) {
+ // this is the normal case
+ env.getAssignmentManager().regionClosing(regionNode);
+ addChildProcedure(
+ new CloseRegionProcedure(getRegion(), regionNode.getRegionLocation(), assignCandidate));
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED);
+ } else {
+ forceNewPlan = true;
+ regionNode.setRegionLocation(null);
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
+ }
+ }
+
+ private Flow confirmClosed(MasterProcedureEnv env, RegionStateNode regionNode)
+ throws IOException {
+ // notice that, for normal case, if we successfully opened a region, we will not arrive here, as
+ // in reportTransition we will call unsetProcedure, and in executeFromState we will return
+ // directly. But if the master is crashed before we finish the procedure, then next time we will
+ // arrive here. So we still need to add code for normal cases.
+ if (regionNode.isInState(State.CLOSED)) {
+ attempt = 0;
+ if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) {
+ // we are the last state, finish
+ regionNode.unsetProcedure(this);
+ return Flow.NO_MORE_STATE;
+ }
+ // This means we need to open the region again, should be a move or reopen
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
+ return Flow.HAS_MORE_STATE;
+ }
+ if (regionNode.isInState(State.CLOSING)) {
+ // This is possible, think the target RS crashes and restarts immediately, the close region
+ // operation will return a NotServingRegionException soon, we can only recover after SCP takes
+ // care of this RS. So here we throw an IOException to let upper layer to retry with backoff.
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE);
+ throw new HBaseIOException("Failed to close region");
+ }
+ // abnormally closed, need to reopen it, no matter what is the last state, see the comment in
+ // confirmOpened for more details that why we need to reopen the region first even if we just
+ // want to close it.
+ // The only exception is for non-default replica, where we do not need to deal with recovered
+ // edits. Notice that the region will remain in ABNORMALLY_CLOSED state, the upper layer need to
+ // deal with this state. For non-default replica, this is usually the same with CLOSED.
+ assert regionNode.isInState(State.ABNORMALLY_CLOSED);
+ if (!RegionReplicaUtil.isDefaultReplica(getRegion()) &&
+ lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) {
+ regionNode.unsetProcedure(this);
+ return Flow.NO_MORE_STATE;
+ }
+ attempt = 0;
+ setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
+ return Flow.HAS_MORE_STATE;
+ }
+
+ // Override to lock RegionStateNode
+ @SuppressWarnings("rawtypes")
+ @Override
+ protected Procedure[] execute(MasterProcedureEnv env)
+ throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
+ RegionStateNode regionNode =
+ env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion());
+ regionNode.lock();
+ try {
+ return super.execute(env);
+ } finally {
+ regionNode.unlock();
+ }
+ }
+
+ private RegionStateNode getRegionStateNode(MasterProcedureEnv env) {
+ return env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion());
+ }
+
+ @Override
+ protected Flow executeFromState(MasterProcedureEnv env, RegionStateTransitionState state)
+ throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
+ RegionStateNode regionNode = getRegionStateNode(env);
+ if (regionNode.getProcedure() != this) {
+ // This is possible, and is the normal case, as we will call unsetProcedure in
+ // reportTransition, this means we have already done
+ // This is because that, when we mark the region as OPENED or CLOSED, then all the works
+ // should have already been done, and logically we could have another TRSP scheduled for this
+ // region immediately(think of a RS crash at the point...).
+ return Flow.NO_MORE_STATE;
+ }
+ try {
+ switch (state) {
+ case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
+ queueAssign(env, regionNode);
+ return Flow.HAS_MORE_STATE;
+ case REGION_STATE_TRANSITION_OPEN:
+ openRegion(env, regionNode);
+ return Flow.HAS_MORE_STATE;
+ case REGION_STATE_TRANSITION_CONFIRM_OPENED:
+ return confirmOpened(env, regionNode);
+ case REGION_STATE_TRANSITION_CLOSE:
+ closeRegion(env, regionNode);
+ return Flow.HAS_MORE_STATE;
+ case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
+ return confirmClosed(env, regionNode);
+ default:
+ throw new UnsupportedOperationException("unhandled state=" + state);
+ }
+ } catch (IOException e) {
+ long backoff = getBackoffTime(this.attempt++);
+ LOG.warn(
+ "Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " +
+ "by other Procedure or operator intervention",
+ backoff / 1000, this, regionNode.toShortString(), e);
+ regionNode.getProcedureEvent().suspend();
+ if (regionNode.getProcedureEvent().suspendIfNotReady(this)) {
+ setTimeout(Math.toIntExact(backoff));
+ setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
+ throw new ProcedureSuspendedException();
+ }
+ return Flow.HAS_MORE_STATE;
+ }
+ }
+
+ /**
+ * At end of timeout, wake ourselves up so we run again.
+ */
+ @Override
+ protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
+ setState(ProcedureProtos.ProcedureState.RUNNABLE);
+ getRegionStateNode(env).getProcedureEvent().wake(env.getProcedureScheduler());
+ return false; // 'false' means that this procedure handled the timeout
+ }
+
+ private void reportTransitionOpened(MasterProcedureEnv env, RegionStateNode regionNode,
+ ServerName serverName, TransitionCode code, long openSeqNum) throws IOException {
+ switch (code) {
+ case OPENED:
+ if (openSeqNum < 0) {
+ throw new UnexpectedStateException("Received report unexpected " + code +
+ " transition openSeqNum=" + openSeqNum + ", " + regionNode);
+ }
+ if (openSeqNum <= regionNode.getOpenSeqNum()) {
+ if (openSeqNum != 0) {
+ LOG.warn("Skip update of openSeqNum for {} with {} because the currentSeqNum={}",
+ regionNode, openSeqNum, regionNode.getOpenSeqNum());
+ }
+ } else {
+ regionNode.setOpenSeqNum(openSeqNum);
+ }
+ env.getAssignmentManager().regionOpened(regionNode);
+ if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED) {
+ // we are done
+ regionNode.unsetProcedure(this);
+ }
+ regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
+ break;
+ case FAILED_OPEN:
+ // just wake up the procedure and see if we can retry
+ regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
+ break;
+ default:
+ throw new UnexpectedStateException(
+ "Received report unexpected " + code + " transition openSeqNum=" + openSeqNum + ", " +
+ regionNode.toShortString() + ", " + this + ", expected OPENED or FAILED_OPEN.");
+ }
+ }
+
+ // we do not need seqId for closing a region
+ private void reportTransitionClosed(MasterProcedureEnv env, RegionStateNode regionNode,
+ ServerName serverName, TransitionCode code) throws IOException {
+ switch (code) {
+ case CLOSED:
+ env.getAssignmentManager().regionClosed(regionNode, true);
+ if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) {
+ // we are done
+ regionNode.unsetProcedure(this);
+ }
+ regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
+ break;
+ default:
+ throw new UnexpectedStateException("Received report unexpected " + code + " transition, " +
+ regionNode.toShortString() + ", " + this + ", expected CLOSED.");
+ }
+ }
+
+ // Should be called with RegionStateNode locked
+ public void reportTransition(MasterProcedureEnv env, RegionStateNode regionNode,
+ ServerName serverName, TransitionCode code, long seqId) throws IOException {
+ switch (getCurrentState()) {
+ case REGION_STATE_TRANSITION_CONFIRM_OPENED:
+ reportTransitionOpened(env, regionNode, serverName, code, seqId);
+ break;
+ case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
+ reportTransitionClosed(env, regionNode, serverName, code);
+ break;
+ default:
+ LOG.warn("{} received unexpected report transition call from {}, code={}, seqId={}", this,
+ serverName, code, seqId);
+ }
+ }
+
+ // Should be called with RegionStateNode locked
+ public void serverCrashed(MasterProcedureEnv env, RegionStateNode regionNode,
+ ServerName serverName) throws IOException {
+ // Notice that, in this method, we do not change the procedure state, instead, we update the
+ // region state in hbase:meta. This is because that, the procedure state change will not be
+ // persisted until the region is woken up and finish one step, if we crash before that then the
+ // information will be lost. So here we will update the region state in hbase:meta, and when the
+ // procedure is woken up, it will process the error and jump to the correct procedure state.
+ RegionStateTransitionState currentState = getCurrentState();
+ switch (currentState) {
+ case REGION_STATE_TRANSITION_CLOSE:
+ case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
+ case REGION_STATE_TRANSITION_CONFIRM_OPENED:
+ // for these 3 states, the region may still be online on the crashed server
+ if (serverName.equals(regionNode.getRegionLocation())) {
+ env.getAssignmentManager().regionClosed(regionNode, false);
+ if (currentState != RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE) {
+ regionNode.getProcedureEvent().wake(env.getProcedureScheduler());
+ }
+ }
+ break;
+ default:
+ // If the procedure is in other 2 states, then actually we should not arrive here, as we
+ // know that the region is not online on any server, so we need to do nothing... But anyway
+ // let's add a log here
+ LOG.warn("{} received unexpected server crash call for region {} from {}", this, regionNode,
+ serverName);
+
+ }
+ }
+
+ private long getBackoffTime(int attempts) {
+ long backoffTime = (long) (1000 * Math.pow(2, attempts));
+ long maxBackoffTime = 60 * 60 * 1000; // An hour. Hard-coded for for now.
+ return backoffTime < maxBackoffTime ? backoffTime : maxBackoffTime;
+ }
+
+ private boolean incrementAndCheckMaxAttempts(MasterProcedureEnv env, RegionStateNode regionNode) {
+ int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode)
+ .incrementAndGetRetries();
+ int max = env.getAssignmentManager().getAssignMaxAttempts();
+ LOG.info(
+ "Retry=" + retries + " of max=" + max + "; " + this + "; " + regionNode.toShortString());
+ return retries >= max;
+ }
+
+ @Override
+ protected void rollbackState(MasterProcedureEnv env, RegionStateTransitionState state)
+ throws IOException, InterruptedException {
+ // no rollback
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ protected RegionStateTransitionState getState(int stateId) {
+ return RegionStateTransitionState.forNumber(stateId);
+ }
+
+ @Override
+ protected int getStateId(RegionStateTransitionState state) {
+ return state.getNumber();
+ }
+
+ @Override
+ protected RegionStateTransitionState getInitialState() {
+ return initialState;
+ }
+
+ @Override
+ protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
+ super.serializeStateData(serializer);
+ RegionStateTransitionStateData.Builder builder = RegionStateTransitionStateData.newBuilder()
+ .setInitialState(initialState).setLastState(lastState).setForceNewPlan(forceNewPlan);
+ if (assignCandidate != null) {
+ builder.setAssignCandidate(ProtobufUtil.toServerName(assignCandidate));
+ }
+ serializer.serialize(builder.build());
+ }
+
+ @Override
+ protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
+ super.deserializeStateData(serializer);
+ RegionStateTransitionStateData data =
+ serializer.deserialize(RegionStateTransitionStateData.class);
+ initialState = data.getInitialState();
+ lastState = data.getLastState();
+ forceNewPlan = data.getForceNewPlan();
+ if (data.hasAssignCandidate()) {
+ assignCandidate = ProtobufUtil.toServerName(data.getAssignCandidate());
+ }
+ }
+
+ @Override
+ protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
+ // TODO: need to reimplement the metrics system for assign/unassign
+ if (initialState == RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE) {
+ return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics();
+ } else {
+ return env.getAssignmentManager().getAssignmentManagerMetrics().getUnassignProcMetrics();
+ }
+ }
+
+ @Override
+ public void toStringClassDetails(StringBuilder sb) {
+ super.toStringClassDetails(sb);
+ if (initialState == RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE) {
+ sb.append(", ASSIGN");
+ } else if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) {
+ sb.append(", UNASSIGN");
+ } else {
+ sb.append(", REOPEN/MOVE");
+ }
+ }
+
+ private static TransitRegionStateProcedure setOwner(MasterProcedureEnv env,
+ TransitRegionStateProcedure proc) {
+ proc.setOwner(env.getRequestUser().getShortName());
+ return proc;
+ }
+
+ // Be careful that, when you call these 4 methods below, you need to manually attach the returned
+ // procedure with the RegionStateNode, otherwise the procedure will quit immediately without doing
+ // anything. See the comment in executeFromState to find out why we need this assumption.
+ public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region,
+ @Nullable ServerName targetServer) {
+ return setOwner(env,
+ new TransitRegionStateProcedure(env, region, targetServer, false,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED));
+ }
+
+ public static TransitRegionStateProcedure unassign(MasterProcedureEnv env, RegionInfo region) {
+ return setOwner(env,
+ new TransitRegionStateProcedure(env, region, null, false,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED));
+ }
+
+ public static TransitRegionStateProcedure reopen(MasterProcedureEnv env, RegionInfo region) {
+ return setOwner(env,
+ new TransitRegionStateProcedure(env, region, null, false,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED));
+ }
+
+ public static TransitRegionStateProcedure move(MasterProcedureEnv env, RegionInfo region,
+ @Nullable ServerName targetServer) {
+ return setOwner(env,
+ new TransitRegionStateProcedure(env, region, targetServer, targetServer == null,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
index 4f58a0f..def8fd5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java
@@ -20,105 +20,38 @@
package org.apache.hadoop.hbase.master.assignment;
import java.io.IOException;
-
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
-import org.apache.hadoop.hbase.favored.FavoredNodesManager;
-import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
-import org.apache.hadoop.hbase.master.RegionState.State;
-import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation;
-import org.apache.hadoop.hbase.master.procedure.ServerCrashException;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
-import org.apache.hadoop.hbase.regionserver.RegionServerAbortedException;
-import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
+
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.UnassignRegionStateData;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
/**
- * Procedure that describes the unassignment of a single region.
- * There can only be one RegionTransitionProcedure -- i.e. an assign or an unassign -- per region
- * running at a time, since each procedure takes a lock on the region.
- *
- * <p>The Unassign starts by placing a "close region" request in the Remote Dispatcher
- * queue, and the procedure will then go into a "waiting state" (suspend).
- * The Remote Dispatcher will batch the various requests for that server and
- * they will be sent to the RS for execution.
- * The RS will complete the open operation by calling master.reportRegionStateTransition().
- * The AM will intercept the transition report, and notify this procedure.
- * The procedure will wakeup and finish the unassign by publishing its new state on meta.
- * <p>If we are unable to contact the remote regionserver whether because of ConnectException
- * or socket timeout, we will call expire on the server we were trying to contact. We will remain
- * in suspended state waiting for a wake up from the ServerCrashProcedure that is processing the
- * failed server. The basic idea is that if we notice a crashed server, then we have a
- * responsibility; i.e. we should not let go of the region until we are sure the server that was
- * hosting has had its crash processed. If we let go of the region before then, an assign might
- * run before the logs have been split which would make for data loss.
- *
- * <p>TODO: Rather than this tricky coordination between SCP and this Procedure, instead, work on
- * returning a SCP as our subprocedure; probably needs work on the framework to do this,
- * especially if the SCP already created.
+ * Leave here only for checking if we can successfully start the master.
+ * @deprecated Do not use any more.
+ * @see TransitRegionStateProcedure
*/
+@Deprecated
@InterfaceAudience.Private
public class UnassignProcedure extends RegionTransitionProcedure {
- private static final Logger LOG = LoggerFactory.getLogger(UnassignProcedure.class);
- /**
- * Where to send the unassign RPC.
- */
protected volatile ServerName hostingServer;
- /**
- * The Server we will subsequently assign the region too (can be null).
- */
+
protected volatile ServerName destinationServer;
- // TODO: should this be in a reassign procedure?
- // ...and keep unassign for 'disable' case?
private boolean force;
- /**
- * Whether deleting the region from in-memory states after unassigning the region.
- */
private boolean removeAfterUnassigning;
public UnassignProcedure() {
- // Required by the Procedure framework to create the procedure on replay
- super();
- }
-
- public UnassignProcedure(final RegionInfo regionInfo, final ServerName hostingServer,
- final boolean force, final boolean removeAfterUnassigning) {
- this(regionInfo, hostingServer, null, force, removeAfterUnassigning);
- }
-
- public UnassignProcedure(final RegionInfo regionInfo,
- final ServerName hostingServer, final ServerName destinationServer, final boolean force) {
- this(regionInfo, hostingServer, destinationServer, force, false);
- }
-
- public UnassignProcedure(final RegionInfo regionInfo, final ServerName hostingServer,
- final ServerName destinationServer, final boolean force,
- final boolean removeAfterUnassigning) {
- super(regionInfo);
- this.hostingServer = hostingServer;
- this.destinationServer = destinationServer;
- this.force = force;
- this.removeAfterUnassigning = removeAfterUnassigning;
-
- // we don't need REGION_TRANSITION_QUEUE, we jump directly to sending the request
- setTransitionState(RegionTransitionState.REGION_TRANSITION_DISPATCH);
}
@Override
@@ -138,10 +71,9 @@ public class UnassignProcedure extends RegionTransitionProcedure {
}
@Override
- protected void serializeStateData(ProcedureStateSerializer serializer)
- throws IOException {
- UnassignRegionStateData.Builder state = UnassignRegionStateData.newBuilder()
- .setTransitionState(getTransitionState())
+ protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
+ UnassignRegionStateData.Builder state =
+ UnassignRegionStateData.newBuilder().setTransitionState(getTransitionState())
.setHostingServer(ProtobufUtil.toServerName(this.hostingServer))
.setRegionInfo(ProtobufUtil.toRegionInfo(getRegionInfo()));
if (this.destinationServer != null) {
@@ -160,10 +92,8 @@ public class UnassignProcedure extends RegionTransitionProcedure {
}
@Override
- protected void deserializeStateData(ProcedureStateSerializer serializer)
- throws IOException {
- final UnassignRegionStateData state =
- serializer.deserialize(UnassignRegionStateData.class);
+ protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
+ final UnassignRegionStateData state = serializer.deserialize(UnassignRegionStateData.class);
setTransitionState(state.getTransitionState());
setRegionInfo(ProtobufUtil.toRegionInfo(state.getRegionInfo()));
this.hostingServer = ProtobufUtil.toServerName(state.getHostingServer());
@@ -178,7 +108,8 @@ public class UnassignProcedure extends RegionTransitionProcedure {
}
@Override
- protected boolean startTransition(final MasterProcedureEnv env, final RegionStateNode regionNode) {
+ protected boolean startTransition(final MasterProcedureEnv env,
+ final RegionStateNode regionNode) {
// nothing to do here. we skip the step in the constructor
// by jumping to REGION_TRANSITION_DISPATCH
throw new UnsupportedOperationException();
@@ -186,53 +117,18 @@ public class UnassignProcedure extends RegionTransitionProcedure {
@Override
protected boolean updateTransition(final MasterProcedureEnv env, final RegionStateNode regionNode)
- throws IOException {
- // if the region is already closed or offline we can't do much...
- if (regionNode.isInState(State.CLOSED, State.OFFLINE)) {
- LOG.info("Not unassigned " + this + "; " + regionNode.toShortString());
- return false;
- }
-
- // if we haven't started the operation yet, we can abort
- if (aborted.get() && regionNode.isInState(State.OPEN)) {
- setAbortFailure(getClass().getSimpleName(), "abort requested");
- return false;
- }
-
-
- // Mark the region as CLOSING.
- env.getAssignmentManager().markRegionAsClosing(regionNode);
-
- // Add the close region operation to the server dispatch queue.
- if (!addToRemoteDispatcher(env, regionNode.getRegionLocation())) {
- // If addToRemoteDispatcher fails, it calls the callback #remoteCallFailed.
- }
-
- // Return true to keep the procedure running.
+ throws IOException {
return true;
}
@Override
protected void finishTransition(final MasterProcedureEnv env, final RegionStateNode regionNode)
throws IOException {
- AssignmentManager am = env.getAssignmentManager();
- RegionInfo regionInfo = getRegionInfo();
-
- if (!removeAfterUnassigning) {
- am.markRegionAsClosed(regionNode);
- } else {
- // Remove from in-memory states
- am.getRegionStates().deleteRegion(regionInfo);
- env.getMasterServices().getServerManager().removeRegion(regionInfo);
- FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager();
- if (fnm != null) {
- fnm.deleteFavoredNodesForRegions(Lists.newArrayList(regionInfo));
- }
- }
}
@Override
- public RemoteOperation remoteCallBuild(final MasterProcedureEnv env, final ServerName serverName) {
+ public RemoteOperation remoteCallBuild(final MasterProcedureEnv env,
+ final ServerName serverName) {
assert serverName.equals(getRegionState(env).getRegionLocation());
return new RegionCloseOperation(this, getRegionInfo(), this.destinationServer);
}
@@ -240,48 +136,6 @@ public class UnassignProcedure extends RegionTransitionProcedure {
@Override
protected void reportTransition(final MasterProcedureEnv env, final RegionStateNode regionNode,
final TransitionCode code, final long seqId) throws UnexpectedStateException {
- switch (code) {
- case CLOSED:
- setTransitionState(RegionTransitionState.REGION_TRANSITION_FINISH);
- break;
- default:
- throw new UnexpectedStateException(String.format(
- "Received report unexpected transition state=%s for region=%s server=%s, expected CLOSED.",
- code, regionNode.getRegionInfo(), regionNode.getRegionLocation()));
- }
- }
-
- /**
- * Our remote call failed but there are a few states where it is safe to proceed with the
- * unassign; e.g. if a server crash and it has had all of its WALs processed, then we can allow
- * this unassign to go to completion.
- * @return True if it is safe to proceed with the unassign.
- */
- private boolean isSafeToProceed(final MasterProcedureEnv env, final RegionStateNode regionNode,
- final IOException exception) {
- if (exception instanceof ServerCrashException) {
- // This exception comes from ServerCrashProcedure AFTER log splitting. Its a signaling
- // exception. SCP found this region as a RIT during its processing of the crash. Its call
- // into here says it is ok to let this procedure go complete.
- return true;
- }
- if (exception instanceof NotServingRegionException) {
- LOG.warn("IS OK? ANY LOGS TO REPLAY; ACTING AS THOUGH ALL GOOD {}", regionNode, exception);
- return true;
- }
- return false;
- }
-
- /**
- * Set it up so when procedure is unsuspended, we'll move to the procedure finish.
- */
- protected void proceed(final MasterProcedureEnv env, final RegionStateNode regionNode) {
- try {
- reportTransition(env, regionNode, TransitionCode.CLOSED, HConstants.NO_SEQNUM);
- } catch (UnexpectedStateException e) {
- // Should never happen.
- throw new RuntimeException(e);
- }
}
/**
@@ -290,61 +144,6 @@ public class UnassignProcedure extends RegionTransitionProcedure {
@Override
protected boolean remoteCallFailed(final MasterProcedureEnv env, final RegionStateNode regionNode,
final IOException exception) {
- // Be careful reading the below; we do returns in middle of the method a few times.
- if (isSafeToProceed(env, regionNode, exception)) {
- proceed(env, regionNode);
- } else if (exception instanceof RegionServerAbortedException ||
- exception instanceof RegionServerStoppedException) {
- // RS is aborting/stopping, we cannot offline the region since the region may need to do WAL
- // recovery. Until we see the RS expiration, stay suspended; return false.
- LOG.info("Ignoring; waiting on ServerCrashProcedure", exception);
- return false;
- } else if (exception instanceof ServerNotRunningYetException) {
- // This should not happen. If it does, procedure will be woken-up and we'll retry.
- // TODO: Needs a pause and backoff?
- LOG.info("Retry", exception);
- } else {
- // We failed to RPC this server. Set it as expired.
- ServerName serverName = regionNode.getRegionLocation();
- LOG.warn("Expiring {}, {} {}; exception={}", serverName, this, regionNode.toShortString(),
- exception.getClass().getSimpleName());
- if (!env.getMasterServices().getServerManager().expireServer(serverName)) {
- // Failed to queue an expire. Lots of possible reasons including it may be already expired.
- // In ServerCrashProcedure and RecoverMetaProcedure, there is a handleRIT stage where we
- // will iterator over all the RIT procedures for the related regions of a crashed RS and
- // fail them with ServerCrashException. You can see the isSafeToProceed method above for
- // more details.
- // This can work for most cases, but since we do not hold the region lock in handleRIT,
- // there could be race that we arrive here after the handleRIT stage of the SCP. So here we
- // need to check whether it is safe to quit.
- // Notice that, the first assumption is that we can only quit after the log splitting is
- // done, as MRP can schedule an AssignProcedure right after us, and if the log splitting has
- // not been done then there will be data loss. And in SCP, we will change the state from
- // SPLITTING to OFFLINE(or SPLITTING_META_DONE for meta log processing) after finishing the
- // log splitting, and then calling handleRIT, so checking the state here can be a safe
- // fence. If the state is not OFFLINE(or SPLITTING_META_DONE), then we can just leave this
- // procedure in suspended state as we can make sure that the handleRIT has not been executed
- // yet and it will wake us up later. And if the state is OFFLINE(or SPLITTING_META_DONE), we
- // can safely quit since there will be no data loss. There could be duplicated
- // AssignProcedures for the same region but it is OK as we will do a check at the beginning
- // of AssignProcedure to prevent double assign. And there we have region lock so there will
- // be no race.
- if (env.getAssignmentManager().isLogSplittingDone(serverName, isMeta())) {
- // Its ok to proceed with this unassign.
- LOG.info("{} is dead and processed; moving procedure to finished state; {}", serverName,
- this);
- proceed(env, regionNode);
- // Return true; wake up the procedure so we can act on proceed.
- return true;
- }
- }
- // Return false so this procedure stays in suspended state. It will be woken up by the
- // ServerCrashProcedure that was scheduled when we called #expireServer above. SCP calls
- // #handleRIT which will call this method only the exception will be a ServerCrashException
- // this time around (See above).
- // TODO: Add a SCP as a new subprocedure that we now come to depend on.
- return false;
- }
return true;
}
@@ -355,11 +154,6 @@ public class UnassignProcedure extends RegionTransitionProcedure {
}
@Override
- public ServerName getServer(final MasterProcedureEnv env) {
- return this.hostingServer;
- }
-
- @Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getAssignmentManager().getAssignmentManagerMetrics().getUnassignProcMetrics();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java
deleted file mode 100644
index ff04b21..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/Util.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master.assignment;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.ipc.HBaseRpcController;
-import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
-import org.apache.yetus.audience.InterfaceAudience;
-
-import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
-
-/**
- * Utility for this assignment package only.
- */
-@InterfaceAudience.Private
-class Util {
- private Util() {}
-
- /**
- * Raw call to remote regionserver to get info on a particular region.
- * @throws IOException Let it out so can report this IOE as reason for failure
- */
- static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env,
- final ServerName regionLocation, final RegionInfo hri)
- throws IOException {
- return getRegionInfoResponse(env, regionLocation, hri, false);
- }
-
- static GetRegionInfoResponse getRegionInfoResponse(final MasterProcedureEnv env,
- final ServerName regionLocation, final RegionInfo hri, boolean includeBestSplitRow)
- throws IOException {
- // TODO: There is no timeout on this controller. Set one!
- HBaseRpcController controller = env.getMasterServices().getClusterConnection().
- getRpcControllerFactory().newController();
- final AdminService.BlockingInterface admin =
- env.getMasterServices().getClusterConnection().getAdmin(regionLocation);
- GetRegionInfoRequest request = null;
- if (includeBestSplitRow) {
- request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName(), false, true);
- } else {
- request = RequestConverter.buildGetRegionInfoRequest(hri.getRegionName());
- }
- try {
- return admin.getRegionInfo(controller, request);
- } catch (ServiceException e) {
- throw ProtobufUtil.handleRemoteException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java
index 3b5e3b5..cf4818c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineRegionProcedure.java
@@ -40,13 +40,12 @@ public abstract class AbstractStateMachineRegionProcedure<TState>
extends AbstractStateMachineTableProcedure<TState> {
private RegionInfo hri;
- public AbstractStateMachineRegionProcedure(final MasterProcedureEnv env,
- final RegionInfo hri) {
+ protected AbstractStateMachineRegionProcedure(MasterProcedureEnv env, RegionInfo hri) {
super(env);
this.hri = hri;
}
- public AbstractStateMachineRegionProcedure() {
+ protected AbstractStateMachineRegionProcedure() {
// Required by the Procedure framework to create the procedure on replay
super();
}
@@ -54,7 +53,7 @@ public abstract class AbstractStateMachineRegionProcedure<TState>
/**
* @return The RegionInfo of the region we are operating on.
*/
- protected RegionInfo getRegion() {
+ public RegionInfo getRegion() {
return this.hri;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
index 4c77f6b..ca4e9d6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
@@ -28,12 +28,10 @@ import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionOfflineException;
import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.TableStateManager;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.hadoop.hbase.security.User;
import org.apache.yetus.audience.InterfaceAudience;
@@ -180,26 +178,13 @@ public abstract class AbstractStateMachineTableProcedure<TState>
/**
* Check region is online.
*/
- protected static void checkOnline(MasterProcedureEnv env, final RegionInfo ri)
+ protected static void checkOnline(MasterProcedureEnv env, RegionInfo ri)
throws DoNotRetryRegionException {
- RegionStates regionStates = env.getAssignmentManager().getRegionStates();
- RegionState rs = regionStates.getRegionState(ri);
- if (rs == null) {
+ RegionStateNode regionNode =
+ env.getAssignmentManager().getRegionStates().getRegionStateNode(ri);
+ if (regionNode == null) {
throw new UnknownRegionException("No RegionState found for " + ri.getEncodedName());
}
- if (!rs.isOpened()) {
- throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN; regionState=" + rs);
- }
- if (ri.isSplitParent()) {
- throw new DoNotRetryRegionException(ri.getEncodedName() +
- " is not online (splitParent=true)");
- }
- if (ri.isSplit()) {
- throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true");
- }
- if (ri.isOffline()) {
- // RegionOfflineException is not instance of DNRIOE so wrap it.
- throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName()));
- }
+ regionNode.checkOnline();
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
index faad3dd..bd6c371 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
@@ -160,7 +160,7 @@ public class CreateTableProcedure
@Override
protected CreateTableState getState(final int stateId) {
- return CreateTableState.valueOf(stateId);
+ return CreateTableState.forNumber(stateId);
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index 060af01..46dca20 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -177,7 +177,7 @@ public class DeleteTableProcedure
@Override
protected DeleteTableState getState(final int stateId) {
- return DeleteTableState.valueOf(stateId);
+ return DeleteTableState.forNumber(stateId);
}
@Override
@@ -191,6 +191,11 @@ public class DeleteTableProcedure
}
@Override
+ protected boolean holdLock(MasterProcedureEnv env) {
+ return true;
+ }
+
+ @Override
public TableName getTableName() {
return tableName;
}
@@ -297,7 +302,9 @@ public class DeleteTableProcedure
FileStatus[] files = fs.listStatus(tempdir);
if (files != null && files.length > 0) {
for (int i = 0; i < files.length; ++i) {
- if (!files[i].isDir()) continue;
+ if (!files[i].isDirectory()) {
+ continue;
+ }
HFileArchiver.archiveRegion(fs, mfs.getRootDir(), tempTableDir, files[i].getPath());
}
}
@@ -343,7 +350,6 @@ public class DeleteTableProcedure
* There may be items for this table still up in hbase:meta in the case where the
* info:regioninfo column was empty because of some write error. Remove ALL rows from hbase:meta
* that have to do with this table. See HBASE-12980.
- * @throws IOException
*/
private static void cleanAnyRemainingRows(final MasterProcedureEnv env,
final TableName tableName) throws IOException {
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
index 3a2a952..71d1fc9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
@@ -51,8 +51,6 @@ public class DisableTableProcedure
private TableName tableName;
private boolean skipTableStateCheck;
- private Boolean traceEnabled = null;
-
public DisableTableProcedure() {
super();
}
@@ -64,8 +62,7 @@ public class DisableTableProcedure
* @param skipTableStateCheck whether to check table state
*/
public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
- final boolean skipTableStateCheck)
- throws HBaseIOException {
+ final boolean skipTableStateCheck) throws HBaseIOException {
this(env, tableName, skipTableStateCheck, null);
}
@@ -77,7 +74,7 @@ public class DisableTableProcedure
*/
public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch)
- throws HBaseIOException {
+ throws HBaseIOException {
super(env, syncLatch);
this.tableName = tableName;
preflightChecks(env, true);
@@ -107,7 +104,8 @@ public class DisableTableProcedure
setNextState(DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE);
break;
case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
- addChildProcedure(env.getAssignmentManager().createUnassignProcedures(tableName));
+ addChildProcedure(
+ env.getAssignmentManager().createUnassignProceduresForDisabling(tableName));
setNextState(DisableTableState.DISABLE_TABLE_ADD_REPLICATION_BARRIER);
break;
case DISABLE_TABLE_ADD_REPLICATION_BARRIER:
@@ -180,7 +178,7 @@ public class DisableTableProcedure
@Override
protected DisableTableState getState(final int stateId) {
- return DisableTableState.valueOf(stateId);
+ return DisableTableState.forNumber(stateId);
}
@Override
@@ -219,6 +217,14 @@ public class DisableTableProcedure
skipTableStateCheck = disableTableMsg.getSkipTableStateCheck();
}
+ // For disabling a table, we does not care whether a region can be online so hold the table xlock
+ // for ever. This will simplify the logic as we will not be conflict with procedures other than
+ // SCP.
+ @Override
+ protected boolean holdLock(MasterProcedureEnv env) {
+ return true;
+ }
+
@Override
public TableName getTableName() {
return tableName;
@@ -233,7 +239,6 @@ public class DisableTableProcedure
* Action before any real action of disabling table. Set the exception in the procedure instead
* of throwing it. This approach is to deal with backward compatible with 1.0.
* @param env MasterProcedureEnv
- * @throws IOException
*/
private boolean prepareDisable(final MasterProcedureEnv env) throws IOException {
boolean canTableBeDisabled = true;
@@ -272,8 +277,6 @@ public class DisableTableProcedure
* Action before disabling table.
* @param env MasterProcedureEnv
* @param state the procedure state
- * @throws IOException
- * @throws InterruptedException
*/
protected void preDisable(final MasterProcedureEnv env, final DisableTableState state)
throws IOException, InterruptedException {
@@ -283,14 +286,11 @@ public class DisableTableProcedure
/**
* Mark table state to Disabling
* @param env MasterProcedureEnv
- * @throws IOException
*/
- protected static void setTableStateToDisabling(
- final MasterProcedureEnv env,
+ private static void setTableStateToDisabling(final MasterProcedureEnv env,
final TableName tableName) throws IOException {
// Set table disabling flag up in zk.
- env.getMasterServices().getTableStateManager().setTableState(
- tableName,
+ env.getMasterServices().getTableStateManager().setTableState(tableName,
TableState.State.DISABLING);
LOG.info("Set {} to state={}", tableName, TableState.State.DISABLING);
}
@@ -298,14 +298,11 @@ public class DisableTableProcedure
/**
* Mark table state to Disabled
* @param env MasterProcedureEnv
- * @throws IOException
*/
- protected static void setTableStateToDisabled(
- final MasterProcedureEnv env,
+ protected static void setTableStateToDisabled(final MasterProcedureEnv env,
final TableName tableName) throws IOException {
// Flip the table to disabled
- env.getMasterServices().getTableStateManager().setTableState(
- tableName,
+ env.getMasterServices().getTableStateManager().setTableState(tableName,
TableState.State.DISABLED);
LOG.info("Set {} to state={}", tableName, TableState.State.DISABLED);
}
@@ -314,8 +311,6 @@ public class DisableTableProcedure
* Action after disabling table.
* @param env MasterProcedureEnv
* @param state the procedure state
- * @throws IOException
- * @throws InterruptedException
*/
protected void postDisable(final MasterProcedureEnv env, final DisableTableState state)
throws IOException, InterruptedException {
@@ -323,23 +318,9 @@ public class DisableTableProcedure
}
/**
- * The procedure could be restarted from a different machine. If the variable is null, we need to
- * retrieve it.
- * @return traceEnabled
- */
- private Boolean isTraceEnabled() {
- if (traceEnabled == null) {
- traceEnabled = LOG.isTraceEnabled();
- }
- return traceEnabled;
- }
-
- /**
* Coprocessor Action.
* @param env MasterProcedureEnv
* @param state the procedure state
- * @throws IOException
- * @throws InterruptedException
*/
private void runCoprocessorAction(final MasterProcedureEnv env, final DisableTableState state)
throws IOException, InterruptedException {
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
index f2fbb7a..4e6211e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
@@ -84,7 +84,6 @@ public class EnableTableProcedure
this.skipTableStateCheck = skipTableStateCheck;
}
- @SuppressWarnings("deprecation")
@Override
protected Flow executeFromState(final MasterProcedureEnv env, final EnableTableState state)
throws InterruptedException {
@@ -255,7 +254,7 @@ public class EnableTableProcedure
@Override
protected EnableTableState getState(final int stateId) {
- return EnableTableState.valueOf(stateId);
+ return EnableTableState.forNumber(stateId);
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
index d984632..024f3ea 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
@@ -18,10 +18,11 @@
package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
+import java.util.Arrays;
import java.util.concurrent.CountDownLatch;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
@@ -32,7 +33,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.I
/**
* This procedure is used to initialize meta table for a new hbase deploy. It will just schedule an
- * {@link AssignProcedure} to assign meta.
+ * {@link TransitRegionStateProcedure} to assign meta.
*/
@InterfaceAudience.Private
public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMetaState> {
@@ -55,7 +56,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
switch (state) {
case INIT_META_ASSIGN_META:
addChildProcedure(env.getAssignmentManager()
- .createAssignProcedure(RegionInfoBuilder.FIRST_META_REGIONINFO));
+ .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)));
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
index 3b848fa..17b5ae3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
@@ -18,37 +18,23 @@
package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
-import java.util.Set;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
-import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
-import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
-
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverMetaState;
-
/**
- * This procedure recovers meta from prior shutdown/ crash of a server, and brings meta online by
- * assigning meta region/s. Any place where meta is accessed and requires meta to be online, need to
- * submit this procedure instead of duplicating steps to recover meta in the code.
- * <p/>
+ * Leave here only for checking if we can successfully start the master.
* @deprecated Do not use any more, leave it here only for compatible. The recovery work will be
* done in {@link ServerCrashProcedure} directly, and the initial work for meta table
* will be done by {@link InitMetaProcedure}.
@@ -66,146 +52,17 @@ public class RecoverMetaProcedure
private boolean shouldSplitWal;
private int replicaId;
- private final ProcedurePrepareLatch syncLatch;
private MasterServices master;
- /**
- * Call this constructor to queue up a {@link RecoverMetaProcedure} in response to meta
- * carrying server crash
- * @param failedMetaServer failed/ crashed region server that was carrying meta
- * @param shouldSplitLog split log file of meta region
- */
- public RecoverMetaProcedure(final ServerName failedMetaServer, final boolean shouldSplitLog) {
- this(failedMetaServer, shouldSplitLog, null);
- }
-
- /**
- * Constructor with latch, for blocking/ sync usage
- */
- public RecoverMetaProcedure(final ServerName failedMetaServer, final boolean shouldSplitLog,
- final ProcedurePrepareLatch latch) {
- this.failedMetaServer = failedMetaServer;
- this.shouldSplitWal = shouldSplitLog;
- this.replicaId = RegionInfo.DEFAULT_REPLICA_ID;
- this.syncLatch = latch;
- }
-
- /**
- * This constructor is also used when deserializing from a procedure store; we'll construct one
- * of these then call #deserializeStateData(InputStream). Do not use directly.
- */
public RecoverMetaProcedure() {
- this(null, false);
+
}
@Override
protected Flow executeFromState(MasterProcedureEnv env,
MasterProcedureProtos.RecoverMetaState state)
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
- prepare(env);
-
- if (!isRunRequired()) {
- LOG.info(this + "; Meta already initialized. Skipping run");
- return Flow.NO_MORE_STATE;
- }
-
- try {
- switch (state) {
- case RECOVER_META_PREPARE:
- // If Master is going down or cluster is up, skip this assign by returning NO_MORE_STATE
- if (!master.isClusterUp()) {
- String msg = "Cluster not up! Skipping hbase:meta assign.";
- LOG.warn(msg);
- return Flow.NO_MORE_STATE;
- }
- if (master.isStopping() || master.isStopped()) {
- String msg = "Master stopping=" + master.isStopping() + ", stopped=" +
- master.isStopped() + "; skipping hbase:meta assign.";
- LOG.warn(msg);
- return Flow.NO_MORE_STATE;
- }
- setNextState(RecoverMetaState.RECOVER_META_SPLIT_LOGS);
- break;
- case RECOVER_META_SPLIT_LOGS:
- LOG.info("Start " + this);
- if (shouldSplitWal) {
- // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment.
- AssignmentManager am = env.getMasterServices().getAssignmentManager();
- if (failedMetaServer != null) {
- am.getRegionStates().metaLogSplitting(failedMetaServer);
- master.getMasterWalManager().splitMetaLog(failedMetaServer);
- am.getRegionStates().metaLogSplit(failedMetaServer);
- } else {
- ServerName serverName =
- master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper());
- Set<ServerName> previouslyFailedServers =
- master.getMasterWalManager().getFailedServersFromLogFolders();
- if (serverName != null && previouslyFailedServers.contains(serverName)) {
- am.getRegionStates().metaLogSplitting(serverName);
- master.getMasterWalManager().splitMetaLog(serverName);
- am.getRegionStates().metaLogSplit(serverName);
- }
- }
- }
- setNextState(RecoverMetaState.RECOVER_META_ASSIGN_REGIONS);
- break;
- case RECOVER_META_ASSIGN_REGIONS:
- RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
- RegionInfoBuilder.FIRST_META_REGIONINFO, this.replicaId);
-
- AssignProcedure metaAssignProcedure;
- AssignmentManager am = master.getAssignmentManager();
- if (failedMetaServer != null) {
- handleRIT(env, hri, this.failedMetaServer);
- LOG.info(this + "; Assigning meta with new plan; previous server=" + failedMetaServer);
- metaAssignProcedure = am.createAssignProcedure(hri);
- } else {
- // get server carrying meta from zk
- ServerName metaServer =
- MetaTableLocator.getMetaRegionState(master.getZooKeeper()).getServerName();
- LOG.info(this + "; Retaining meta assignment to server=" + metaServer);
- metaAssignProcedure = am.createAssignProcedure(hri, metaServer);
- }
-
- addChildProcedure(metaAssignProcedure);
- return Flow.NO_MORE_STATE;
-
- default:
- throw new UnsupportedOperationException("unhandled state=" + state);
- }
- } catch (IOException|KeeperException e) {
- LOG.warn(this + "; Failed state=" + state + ", retry " + this + "; cycles=" +
- getCycles(), e);
- }
- return Flow.HAS_MORE_STATE;
- }
-
- /**
- * Is the region stuck assigning to this failedMetaServer? If so, cancel the call
- * just as we do over in ServerCrashProcedure#handleRIT except less to do here; less context
- * to carry.
- */
- // NOTE: Make sure any fix or improvement done here is also done in SCP#handleRIT; the methods
- // have overlap.
- private void handleRIT(MasterProcedureEnv env, RegionInfo ri, ServerName crashedServerName) {
- AssignmentManager am = env.getAssignmentManager();
- RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(ri);
- if (rtp == null) {
- return; // Nothing to do. Not in RIT.
- }
- // Make sure the RIT is against this crashed server. In the case where there are many
- // processings of a crashed server -- backed up for whatever reason (slow WAL split)
- // -- then a previous SCP may have already failed an assign, etc., and it may have a
- // new location target; DO NOT fail these else we make for assign flux.
- ServerName rtpServerName = rtp.getServer(env);
- if (rtpServerName == null) {
- LOG.warn("RIT with ServerName null! " + rtp);
- } else if (rtpServerName.equals(crashedServerName)) {
- LOG.info("pid=" + getProcId() + " found RIT " + rtp + "; " +
- rtp.getRegionState(env).toShortString());
- rtp.remoteCallFailed(env, crashedServerName,
- new ServerCrashException(getProcId(), crashedServerName));
- }
+ return Flow.NO_MORE_STATE;
}
@Override
@@ -241,11 +98,10 @@ public class RecoverMetaProcedure
}
@Override
- protected void serializeStateData(ProcedureStateSerializer serializer)
- throws IOException {
+ protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.serializeStateData(serializer);
MasterProcedureProtos.RecoverMetaStateData.Builder state =
- MasterProcedureProtos.RecoverMetaStateData.newBuilder().setShouldSplitWal(shouldSplitWal);
+ MasterProcedureProtos.RecoverMetaStateData.newBuilder().setShouldSplitWal(shouldSplitWal);
if (failedMetaServer != null) {
state.setFailedMetaServer(ProtobufUtil.toServerName(failedMetaServer));
}
@@ -254,50 +110,13 @@ public class RecoverMetaProcedure
}
@Override
- protected void deserializeStateData(ProcedureStateSerializer serializer)
- throws IOException {
+ protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.deserializeStateData(serializer);
MasterProcedureProtos.RecoverMetaStateData state =
- serializer.deserialize(MasterProcedureProtos.RecoverMetaStateData.class);
+ serializer.deserialize(MasterProcedureProtos.RecoverMetaStateData.class);
this.shouldSplitWal = state.hasShouldSplitWal() && state.getShouldSplitWal();
- this.failedMetaServer = state.hasFailedMetaServer() ?
- ProtobufUtil.toServerName(state.getFailedMetaServer()) : null;
+ this.failedMetaServer =
+ state.hasFailedMetaServer() ? ProtobufUtil.toServerName(state.getFailedMetaServer()) : null;
this.replicaId = state.hasReplicaId() ? state.getReplicaId() : RegionInfo.DEFAULT_REPLICA_ID;
}
-
- @Override
- protected LockState acquireLock(MasterProcedureEnv env) {
- if (env.getProcedureScheduler().waitMetaExclusiveLock(this)) {
- return LockState.LOCK_EVENT_WAIT;
- }
- return LockState.LOCK_ACQUIRED;
- }
-
- @Override
- protected void releaseLock(MasterProcedureEnv env) {
- env.getProcedureScheduler().wakeMetaExclusiveLock(this);
- }
-
- @Override
- protected void completionCleanup(MasterProcedureEnv env) {
- ProcedurePrepareLatch.releaseLatch(syncLatch, this);
- }
-
- /**
- * @return true if failedMetaServer is not null (meta carrying server crashed) or meta is
- * already initialized
- */
- private boolean isRunRequired() {
- return failedMetaServer != null || !master.getAssignmentManager().isMetaAssigned();
- }
-
- /**
- * Prepare for execution
- */
- private void prepare(MasterProcedureEnv env) {
- if (master == null) {
- master = env.getMasterServices();
- Preconditions.checkArgument(master != null);
- }
- }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/bb349413/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index 8f3aa22..9ccbc78 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -21,11 +21,10 @@ import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
-import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.master.RegionPlan;
-import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
@@ -39,8 +38,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R
/**
* Used for reopening the regions for a table.
- * <p/>
- * Currently we use {@link MoveRegionProcedure} to reopen regions.
*/
@InterfaceAudience.Private
public class ReopenTableRegionsProcedure
@@ -69,16 +66,6 @@ public class ReopenTableRegionsProcedure
return TableOperationType.REGION_EDIT;
}
- private MoveRegionProcedure createReopenProcedure(MasterProcedureEnv env, HRegionLocation loc) {
- try {
- return new MoveRegionProcedure(env,
- new RegionPlan(loc.getRegion(), loc.getServerName(), loc.getServerName()), false);
- } catch (HBaseIOException e) {
- // we skip the checks so this should not happen
- throw new AssertionError(e);
- }
- }
-
@Override
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
@@ -93,8 +80,22 @@ public class ReopenTableRegionsProcedure
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
return Flow.HAS_MORE_STATE;
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
- addChildProcedure(regions.stream().filter(l -> l.getSeqNum() >= 0)
- .map(l -> createReopenProcedure(env, l)).toArray(MoveRegionProcedure[]::new));
+ for (HRegionLocation loc : regions) {
+ RegionStateNode regionNode = env.getAssignmentManager().getRegionStates()
+ .getOrCreateRegionStateNode(loc.getRegion());
+ TransitRegionStateProcedure proc;
+ regionNode.lock();
+ try {
+ if (regionNode.getProcedure() != null) {
+ continue;
+ }
+ proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo());
+ regionNode.setProcedure(proc);
+ } finally {
+ regionNode.unlock();
+ }
+ addChildProcedure(proc);
+ }
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
return Flow.HAS_MORE_STATE;
case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED: