You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2018/10/18 13:22:31 UTC
hbase git commit: HBASE-21269 Forward-port HBASE-21213 [hbck2] bypass leaves behind state in RegionStates when assign/unassign
Repository: hbase
Updated Branches:
refs/heads/branch-2 92b9b0f26 -> 4a609db30
HBASE-21269 Forward-port HBASE-21213 [hbck2] bypass leaves behind state in RegionStates when assign/unassign
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/4a609db3
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/4a609db3
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/4a609db3
Branch: refs/heads/branch-2
Commit: 4a609db30c348e4f6f2d54bafc245a4042e67645
Parents: 92b9b0f
Author: jingyuntian <ti...@gmail.com>
Authored: Thu Oct 18 17:14:32 2018 +0800
Committer: Michael Stack <st...@apache.org>
Committed: Thu Oct 18 06:22:22 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hbase/client/HBaseHbck.java | 15 +-
.../org/apache/hadoop/hbase/client/Hbck.java | 35 +++-
.../hbase/shaded/protobuf/RequestConverter.java | 10 +-
.../hadoop/hbase/procedure2/Procedure.java | 29 +--
.../hbase/procedure2/ProcedureExecutor.java | 54 ++++--
.../hadoop/hbase/procedure2/ProcedureUtil.java | 2 +-
.../procedure2/RemoteProcedureException.java | 4 +
.../hbase/procedure2/TestProcedureBypass.java | 24 ++-
.../src/main/protobuf/Master.proto | 6 +-
.../src/main/protobuf/MasterProcedure.proto | 1 +
.../hbase/rsgroup/RSGroupInfoManagerImpl.java | 3 +-
.../org/apache/hadoop/hbase/master/HMaster.java | 8 +-
.../hadoop/hbase/master/MasterRpcServices.java | 19 +-
.../master/assignment/AssignmentManager.java | 47 ++++-
.../assignment/TransitRegionStateProcedure.java | 9 +-
.../hbase/master/balancer/BaseLoadBalancer.java | 13 +-
.../master/procedure/MasterProcedureUtil.java | 16 ++
.../master/procedure/ProcedureDescriber.java | 3 +-
.../master/procedure/ProcedurePrepareLatch.java | 2 +-
.../master/procedure/ProcedureSyncWait.java | 5 +-
.../hadoop/hbase/TestMetaTableAccessor.java | 4 +-
.../apache/hadoop/hbase/client/TestHbck.java | 3 +-
.../master/assignment/TestRegionBypass.java | 194 +++++++++++++++++++
.../main/ruby/shell/commands/list_procedures.rb | 3 +-
.../apache/hadoop/hbase/client/TestShell.java | 1 -
hbase-shell/src/test/ruby/shell/shell_test.rb | 6 +-
26 files changed, 426 insertions(+), 90 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
index a8daa7b..2d08825 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java
@@ -102,11 +102,12 @@ public class HBaseHbck implements Hbck {
}
@Override
- public List<Long> assigns(List<String> encodedRegionNames) throws IOException {
+ public List<Long> assigns(List<String> encodedRegionNames, boolean override)
+ throws IOException {
try {
MasterProtos.AssignsResponse response =
this.hbck.assigns(rpcControllerFactory.newController(),
- RequestConverter.toAssignRegionsRequest(encodedRegionNames));
+ RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
return response.getPidList();
} catch (ServiceException se) {
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
@@ -115,11 +116,12 @@ public class HBaseHbck implements Hbck {
}
@Override
- public List<Long> unassigns(List<String> encodedRegionNames) throws IOException {
+ public List<Long> unassigns(List<String> encodedRegionNames, boolean override)
+ throws IOException {
try {
MasterProtos.UnassignsResponse response =
this.hbck.unassigns(rpcControllerFactory.newController(),
- RequestConverter.toUnassignRegionsRequest(encodedRegionNames));
+ RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
return response.getPidList();
} catch (ServiceException se) {
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
@@ -132,7 +134,8 @@ public class HBaseHbck implements Hbck {
}
@Override
- public List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean force)
+ public List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override,
+ boolean recursive)
throws IOException {
MasterProtos.BypassProcedureResponse response = ProtobufUtil.call(
new Callable<MasterProtos.BypassProcedureResponse>() {
@@ -141,7 +144,7 @@ public class HBaseHbck implements Hbck {
try {
return hbck.bypassProcedure(rpcControllerFactory.newController(),
MasterProtos.BypassProcedureRequest.newBuilder().addAllProcId(pids).
- setWaitTime(waitTime).setForce(force).build());
+ setWaitTime(waitTime).setOverride(override).setRecursive(recursive).build());
} catch (Throwable t) {
LOG.error(pids.stream().map(i -> i.toString()).
collect(Collectors.joining(", ")), t);
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
index 5c9a862..5c97d97 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java
@@ -28,11 +28,14 @@ import org.apache.yetus.audience.InterfaceAudience;
/**
* Hbck fixup tool APIs. Obtain an instance from {@link ClusterConnection#getHbck()} and call
* {@link #close()} when done.
- * <p>WARNING: the below methods can damage the cluster. For experienced users only.
+ * <p>WARNING: the below methods can damage the cluster. It may leave the cluster in an
+ * indeterminate state, e.g. region not assigned, or some hdfs files left behind. After running
+ * any of the below, operators may have to do some clean up on hdfs or schedule some assign
+ * procedures to get regions back online. DO AT YOUR OWN RISK. For experienced users only.
*
* @see ConnectionFactory
* @see ClusterConnection
- * @since 2.2.0
+ * @since 2.0.2, 2.1.1
*/
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.HBCK)
public interface Hbck extends Abortable, Closeable {
@@ -49,22 +52,38 @@ public interface Hbck extends Abortable, Closeable {
* -- good if many Regions to online -- and it will schedule the assigns even in the case where
* Master is initializing (as long as the ProcedureExecutor is up). Does NOT call Coprocessor
* hooks.
+ * @param override You need to add the override for case where a region has previously been
+ * bypassed. When a Procedure has been bypassed, a Procedure will have completed
+ * but no other Procedure will be able to make progress on the target entity
+ * (intentionally). This override flag will override this fencing mechanism.
* @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding
* for hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an
* example of what a random user-space encoded Region name looks like.
*/
- List<Long> assigns(List<String> encodedRegionNames) throws IOException;
+ List<Long> assigns(List<String> encodedRegionNames, boolean override) throws IOException;
+
+ default List<Long> assigns(List<String> encodedRegionNames) throws IOException {
+ return assigns(encodedRegionNames, false);
+ }
/**
* Like {@link Admin#unassign(byte[], boolean)} but 'raw' in that it can do more than one Region
* at a time -- good if many Regions to offline -- and it will schedule the assigns even in the
* case where Master is initializing (as long as the ProcedureExecutor is up). Does NOT call
* Coprocessor hooks.
+ * @param override You need to add the override for case where a region has previously been
+ * bypassed. When a Procedure has been bypassed, a Procedure will have completed
+ * but no other Procedure will be able to make progress on the target entity
+ * (intentionally). This override flag will override this fencing mechanism.
* @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding
* for hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an
* example of what a random user-space encoded Region name looks like.
*/
- List<Long> unassigns(List<String> encodedRegionNames) throws IOException;
+ List<Long> unassigns(List<String> encodedRegionNames, boolean override) throws IOException;
+
+ default List<Long> unassigns(List<String> encodedRegionNames) throws IOException {
+ return unassigns(encodedRegionNames, false);
+ }
/**
* Bypass specified procedure and move it to completion. Procedure is marked completed but
@@ -73,9 +92,13 @@ public interface Hbck extends Abortable, Closeable {
*
* @param pids of procedures to complete.
* @param waitTime wait time in ms for acquiring lock for a procedure
- * @param force if force set to true, we will bypass the procedure even if it is executing.
+ * @param override if override set to true, we will bypass the procedure even if it is executing.
* This is for procedures which can't break out during execution (bugs?).
+ * @param recursive If set, if a parent procedure, we will find and bypass children and then
+ * the parent procedure (Dangerous but useful in case where child procedure has been 'lost').
+ * Does not always work. Experimental.
* @return true if procedure is marked for bypass successfully, false otherwise
*/
- List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean force) throws IOException;
+ List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override, boolean recursive)
+ throws IOException;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java
index 6ab81e2..7db8b02 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java
@@ -1883,16 +1883,18 @@ public final class RequestConverter {
// HBCK2
public static MasterProtos.AssignsRequest toAssignRegionsRequest(
- List<String> encodedRegionNames) {
+ List<String> encodedRegionNames, boolean override) {
MasterProtos.AssignsRequest.Builder b = MasterProtos.AssignsRequest.newBuilder();
- return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).build();
+ return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).
+ setOverride(override).build();
}
public static MasterProtos.UnassignsRequest toUnassignRegionsRequest(
- List<String> encodedRegionNames) {
+ List<String> encodedRegionNames, boolean override) {
MasterProtos.UnassignsRequest.Builder b =
MasterProtos.UnassignsRequest.newBuilder();
- return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).build();
+ return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).
+ setOverride(override).build();
}
private static List<RegionSpecifier> toEncodedRegionNameRegionSpecifiers(
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
index 0824b51..089caa0 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java
@@ -145,12 +145,16 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
private boolean lockedWhenLoading = false;
/**
- * Used for force complete of the procedure without
- * actually doing any logic in the procedure.
+ * Used for override complete of the procedure without actually doing any logic in the procedure.
* If bypass is set to true, when executing it will return null when
- * {@link #doExecute(Object)} to finish the procedure and releasing any locks
- * it may currently hold.
- * Bypassing a procedure is not like aborting. Aborting a procedure will trigger
+ * {@link #doExecute(Object)} is called to finish the procedure and release any locks
+ * it may currently hold. The bypass does cleanup around the Procedure as far as the
+ * Procedure framework is concerned. It does not clean any internal state that the
+ * Procedure's themselves may have set. That is for the Procedures to do themselves
+ * when bypass is called. They should override bypass and do their cleanup in the
+ * overridden bypass method (be sure to call the parent bypass to ensure proper
+ * processing).
+ * <p></p>Bypassing a procedure is not like aborting. Aborting a procedure will trigger
* a rollback. And since the {@link #abort(Object)} method is overrideable
* Some procedures may have chosen to ignore the aborting.
*/
@@ -175,12 +179,15 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
}
/**
- * set the bypass to true
- * Only called in {@link ProcedureExecutor#bypassProcedure(long, long, boolean)} for now,
- * DO NOT use this method alone, since we can't just bypass
- * one single procedure. We need to bypass its ancestor too. So making it package private
+ * Set the bypass to true.
+ * Only called in {@link ProcedureExecutor#bypassProcedure(long, long, boolean, boolean)} for now.
+ * DO NOT use this method alone, since we can't just bypass one single procedure. We need to
+ * bypass its ancestor too. If your Procedure has set state, it needs to undo it in here.
+ * @param env Current environment. May be null because of context; e.g. pretty-printing
+ * procedure WALs where there is no 'environment' (and where Procedures that require
+ * an 'environment' won't be run.
*/
- void bypass() {
+ protected void bypass(TEnvironment env) {
this.bypass = true;
}
@@ -707,7 +714,7 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
/**
* Will only be called when loading procedures from procedure store, where we need to record
* whether the procedure has already held a lock. Later we will call
- * {@link #doAcquireLock(Object)} to actually acquire the lock.
+ * {@link #doAcquireLock(Object, ProcedureStore)} to actually acquire the lock.
*/
final void lockedWhenLoading() {
this.lockedWhenLoading = true;
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
index 25c72ba..db58ac7 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java
@@ -318,7 +318,7 @@ public class ProcedureExecutor<TEnvironment> {
private Configuration conf;
/**
- * Created in the {@link #start(int, boolean)} method. Destroyed in {@link #join()} (FIX! Doing
+ * Created in the {@link #init(int, boolean)} method. Destroyed in {@link #join()} (FIX! Doing
* resource handling rather than observing in a #join is unexpected).
* Overridden when we do the ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery
* (Should be ok).
@@ -326,7 +326,7 @@ public class ProcedureExecutor<TEnvironment> {
private ThreadGroup threadGroup;
/**
- * Created in the {@link #start(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing
+ * Created in the {@link #init(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing
* resource handling rather than observing in a #join is unexpected).
* Overridden when we do the ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery
* (Should be ok).
@@ -334,7 +334,7 @@ public class ProcedureExecutor<TEnvironment> {
private CopyOnWriteArrayList<WorkerThread> workerThreads;
/**
- * Created in the {@link #start(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing
+ * Created in the {@link #init(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing
* resource handling rather than observing in a #join is unexpected).
* Overridden when we do the ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery
* (Should be ok).
@@ -1021,7 +1021,7 @@ public class ProcedureExecutor<TEnvironment> {
* Bypass a procedure. If the procedure is set to bypass, all the logic in
* execute/rollback will be ignored and it will return success, whatever.
* It is used to recover buggy stuck procedures, releasing the lock resources
- * and letting other procedures to run. Bypassing one procedure (and its ancestors will
+ * and letting other procedures run. Bypassing one procedure (and its ancestors will
* be bypassed automatically) may leave the cluster in a middle state, e.g. region
* not assigned, or some hdfs files left behind. After getting rid of those stuck procedures,
* the operators may have to do some clean up on hdfs or schedule some assign procedures
@@ -1048,35 +1048,39 @@ public class ProcedureExecutor<TEnvironment> {
* there. We need to restart the master after bypassing, and letting the problematic
* procedure to execute wth bypass=true, so in that condition, the procedure can be
* successfully bypassed.
+ * @param recursive We will do an expensive search for children of each pid. EXPENSIVE!
* @return true if bypass success
* @throws IOException IOException
*/
- public List<Boolean> bypassProcedure(List<Long> pids, long lockWait, boolean force)
+ public List<Boolean> bypassProcedure(List<Long> pids, long lockWait, boolean force,
+ boolean recursive)
throws IOException {
List<Boolean> result = new ArrayList<Boolean>(pids.size());
for(long pid: pids) {
- result.add(bypassProcedure(pid, lockWait, force));
+ result.add(bypassProcedure(pid, lockWait, force, recursive));
}
return result;
}
- boolean bypassProcedure(long pid, long lockWait, boolean force) throws IOException {
+ boolean bypassProcedure(long pid, long lockWait, boolean override, boolean recursive)
+ throws IOException {
Preconditions.checkArgument(lockWait > 0, "lockWait should be positive");
- Procedure<TEnvironment> procedure = getProcedure(pid);
+ final Procedure<TEnvironment> procedure = getProcedure(pid);
if (procedure == null) {
- LOG.debug("Procedure with id={} does not exist, skipping bypass", pid);
+ LOG.debug("Procedure pid={} does not exist, skipping bypass", pid);
return false;
}
- LOG.debug("Begin bypass {} with lockWait={}, force={}", procedure, lockWait, force);
+ LOG.debug("Begin bypass {} with lockWait={}, override={}, recursive={}",
+ procedure, lockWait, override, recursive);
IdLock.Entry lockEntry = procExecutionLock.tryLockEntry(procedure.getProcId(), lockWait);
- if (lockEntry == null && !force) {
+ if (lockEntry == null && !override) {
LOG.debug("Waited {} ms, but {} is still running, skipping bypass with force={}",
- lockWait, procedure, force);
+ lockWait, procedure, override);
return false;
} else if (lockEntry == null) {
LOG.debug("Waited {} ms, but {} is still running, begin bypass with force={}",
- lockWait, procedure, force);
+ lockWait, procedure, override);
}
try {
// check whether the procedure is already finished
@@ -1086,8 +1090,25 @@ public class ProcedureExecutor<TEnvironment> {
}
if (procedure.hasChildren()) {
- LOG.debug("{} has children, skipping bypass", procedure);
- return false;
+ if (recursive) {
+ // EXPENSIVE. Checks each live procedure of which there could be many!!!
+ // Is there another way to get children of a procedure?
+ LOG.info("Recursive bypass on children of pid={}", procedure.getProcId());
+ this.procedures.forEachValue(1 /*Single-threaded*/,
+ // Transformer
+ v -> v.getParentProcId() == procedure.getProcId()? v: null,
+ // Consumer
+ v -> {
+ try {
+ bypassProcedure(v.getProcId(), lockWait, override, recursive);
+ } catch (IOException e) {
+ LOG.warn("Recursive bypass of pid={}", v.getProcId(), e);
+ }
+ });
+ } else {
+ LOG.debug("{} has children, skipping bypass", procedure);
+ return false;
+ }
}
// If the procedure has no parent or no child, we are safe to bypass it in whatever state
@@ -1097,6 +1118,7 @@ public class ProcedureExecutor<TEnvironment> {
LOG.debug("Bypassing procedures in RUNNABLE, WAITING and WAITING_TIMEOUT states "
+ "(with no parent), {}",
procedure);
+ // Question: how is the bypass done here?
return false;
}
@@ -1106,7 +1128,7 @@ public class ProcedureExecutor<TEnvironment> {
Procedure<TEnvironment> current = procedure;
while (current != null) {
LOG.debug("Bypassing {}", current);
- current.bypass();
+ current.bypass(getEnvironment());
store.update(procedure);
long parentID = current.getParentProcId();
current = getProcedure(parentID);
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java
index 27802ef..b736220 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java
@@ -272,7 +272,7 @@ public final class ProcedureUtil {
}
if (proto.getBypass()) {
- proc.bypass();
+ proc.bypass(null);
}
ProcedureStateSerializer serializer = null;
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java
index 6f4795d..f2ad17f 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java
@@ -74,6 +74,10 @@ public class RemoteProcedureException extends ProcedureException {
return new Exception(cause);
}
+ // NOTE: Does not throw DoNotRetryIOE because it does not
+ // have access (DNRIOE is in the client module). Use
+ // MasterProcedureUtil.unwrapRemoteIOException if need to
+ // throw DNRIOE.
public IOException unwrapRemoteIOException() {
final Exception cause = unwrapRemoteException();
if (cause instanceof IOException) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java
index 0c59f30..7d587fd 100644
--- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java
+++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java
@@ -89,7 +89,7 @@ public class TestProcedureBypass {
long id = procExecutor.submitProcedure(proc);
Thread.sleep(500);
//bypass the procedure
- assertTrue(procExecutor.bypassProcedure(id, 30000, false));
+ assertTrue(procExecutor.bypassProcedure(id, 30000, false, false));
htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
LOG.info("{} finished", proc);
}
@@ -100,7 +100,7 @@ public class TestProcedureBypass {
long id = procExecutor.submitProcedure(proc);
Thread.sleep(500);
//bypass the procedure
- assertTrue(procExecutor.bypassProcedure(id, 1000, true));
+ assertTrue(procExecutor.bypassProcedure(id, 1000, true, false));
//Since the procedure is stuck there, we need to restart the executor to recovery.
ProcedureTestingUtility.restart(procExecutor);
htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
@@ -116,7 +116,7 @@ public class TestProcedureBypass {
.size() > 0);
SuspendProcedure suspendProcedure = (SuspendProcedure)procExecutor.getProcedures().stream()
.filter(p -> p.getParentProcId() == rootId).collect(Collectors.toList()).get(0);
- assertTrue(procExecutor.bypassProcedure(suspendProcedure.getProcId(), 1000, false));
+ assertTrue(procExecutor.bypassProcedure(suspendProcedure.getProcId(), 1000, false, false));
htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
LOG.info("{} finished", proc);
}
@@ -128,14 +128,26 @@ public class TestProcedureBypass {
long id = procExecutor.submitProcedure(proc);
Thread.sleep(500);
// bypass the procedure
- assertFalse(procExecutor.bypassProcedure(id, 1000, false));
- assertTrue(procExecutor.bypassProcedure(id, 1000, true));
+ assertFalse(procExecutor.bypassProcedure(id, 1000, false, false));
+ assertTrue(procExecutor.bypassProcedure(id, 1000, true, false));
htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
LOG.info("{} finished", proc);
}
-
+ @Test
+ public void testBypassingProcedureWithParentRecursive() throws Exception {
+ final RootProcedure proc = new RootProcedure();
+ long rootId = procExecutor.submitProcedure(proc);
+ htu.waitFor(5000, () -> procExecutor.getProcedures().stream()
+ .filter(p -> p.getParentProcId() == rootId).collect(Collectors.toList())
+ .size() > 0);
+ SuspendProcedure suspendProcedure = (SuspendProcedure)procExecutor.getProcedures().stream()
+ .filter(p -> p.getParentProcId() == rootId).collect(Collectors.toList()).get(0);
+ assertTrue(procExecutor.bypassProcedure(rootId, 1000, false, true));
+ htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
+ LOG.info("{} finished", proc);
+ }
@AfterClass
public static void tearDown() throws Exception {
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-protocol-shaded/src/main/protobuf/Master.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto
index b2a3bda..afc6e64 100644
--- a/hbase-protocol-shaded/src/main/protobuf/Master.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto
@@ -99,6 +99,7 @@ message MergeTableRegionsResponse {
message AssignRegionRequest {
required RegionSpecifier region = 1;
+ optional bool override = 2 [default = false];
}
message AssignRegionResponse {
@@ -1005,6 +1006,7 @@ message SetTableStateInMetaRequest {
// Region at a time.
message AssignsRequest {
repeated RegionSpecifier region = 1;
+ optional bool override = 2 [default = false];
}
/** Like Admin's AssignRegionResponse except it can
@@ -1019,6 +1021,7 @@ message AssignsResponse {
*/
message UnassignsRequest {
repeated RegionSpecifier region = 1;
+ optional bool override = 2 [default = false];
}
/** Like Admin's UnassignRegionResponse except it can
@@ -1031,7 +1034,8 @@ message UnassignsResponse {
message BypassProcedureRequest {
repeated uint64 proc_id = 1;
optional uint64 waitTime = 2; // wait time in ms to acquire lock on a procedure
- optional bool force = 3; // if true, procedure is marked for bypass even if its executing
+ optional bool override = 3 [default = false]; // if true, procedure is marked for bypass even if its executing
+ optional bool recursive = 4;
}
message BypassProcedureResponse {
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
index d839167..3ccbb41 100644
--- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
@@ -473,6 +473,7 @@ message RegionStateTransitionStateData {
required RegionStateTransitionState lastState = 2;
optional ServerName assign_candidate = 3;
required bool force_new_plan = 4;
+ optional bool override = 5 [default = false];
}
message RegionRemoteProcedureBaseStateData {
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
index 3f7af6b..e124c54 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
@@ -65,6 +65,7 @@ import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerListener;
import org.apache.hadoop.hbase.master.TableStateManager;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.net.Address;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
@@ -873,7 +874,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
Procedure<?> result = masterServices.getMasterProcedureExecutor().getResult(procId);
if (result != null && result.isFailed()) {
throw new IOException("Failed to create group table. " +
- result.getException().unwrapRemoteIOException());
+ MasterProcedureUtil.unwrapRemoteIOException(result));
}
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 265bcc6..851890f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1047,7 +1047,7 @@ public class HMaster extends HRegionServer implements MasterServices {
// as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta
// if it is down. It may take a while to come online. So, wait here until meta if for sure
// available. Thats what waitUntilMetaOnline does.
- if (!waitUntilMetaOnline()) {
+ if (!waitForMetaOnline()) {
return;
}
this.assignmentManager.joinCluster();
@@ -1079,7 +1079,7 @@ public class HMaster extends HRegionServer implements MasterServices {
// Here we expect hbase:namespace to be online. See inside initClusterSchemaService.
// TODO: Fix this. Namespace is a pain being a sort-of system table. Fold it in to hbase:meta.
// isNamespace does like isMeta and waits until namespace is onlined before allowing progress.
- if (!waitUntilNamespaceOnline()) {
+ if (!waitForNamespaceOnline()) {
return;
}
status.setStatus("Starting cluster schema service");
@@ -1163,7 +1163,7 @@ public class HMaster extends HRegionServer implements MasterServices {
* and we will hold here until operator intervention.
*/
@VisibleForTesting
- public boolean waitUntilMetaOnline() throws InterruptedException {
+ public boolean waitForMetaOnline() throws InterruptedException {
return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO);
}
@@ -1204,7 +1204,7 @@ public class HMaster extends HRegionServer implements MasterServices {
* @return True if namespace table is up/online.
*/
@VisibleForTesting
- public boolean waitUntilNamespaceOnline() throws InterruptedException {
+ public boolean waitForNamespaceOnline() throws InterruptedException {
List<RegionInfo> ris = this.assignmentManager.getRegionStates().
getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME);
if (ris.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 4702ad9..1540105 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -29,7 +29,6 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetricsBuilder;
@@ -44,7 +43,6 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
@@ -595,7 +593,7 @@ public class MasterRpcServices extends RSRpcServices
BalanceRequest request) throws ServiceException {
try {
return BalanceResponse.newBuilder().setBalancerRan(master.balance(
- request.hasForce() ? request.getForce() : false)).build();
+ request.hasForce()? request.getForce(): false)).build();
} catch (IOException ex) {
throw new ServiceException(ex);
}
@@ -1181,7 +1179,8 @@ public class MasterRpcServices extends RSRpcServices
if (executor.isFinished(procId)) {
builder.setState(GetProcedureResultResponse.State.FINISHED);
if (result.isFailed()) {
- IOException exception = result.getException().unwrapRemoteIOException();
+ IOException exception =
+ MasterProcedureUtil.unwrapRemoteIOException(result);
builder.setException(ForeignExceptionUtil.toProtoForeignException(exception));
}
byte[] resultData = result.getResult();
@@ -2346,15 +2345,16 @@ public class MasterRpcServices extends RSRpcServices
MasterProtos.AssignsResponse.Builder responseBuilder =
MasterProtos.AssignsResponse.newBuilder();
try {
+ boolean override = request.getOverride();
for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) {
- // Assign is synchronous as of hbase-2.2. Need an asynchronous one.
RegionInfo ri = getRegionInfo(rs);
if (ri == null) {
LOG.info("Unknown={}", rs);
responseBuilder.addPid(Procedure.NO_PROC_ID);
continue;
}
- responseBuilder.addPid(this.master.getAssignmentManager().assign(ri));
+ responseBuilder.addPid(this.master.getMasterProcedureExecutor().submitProcedure(this.master
+ .getAssignmentManager().createOneAssignProcedure(ri, override)));
}
return responseBuilder.build();
} catch (IOException ioe) {
@@ -2377,15 +2377,16 @@ public class MasterRpcServices extends RSRpcServices
MasterProtos.UnassignsResponse.Builder responseBuilder =
MasterProtos.UnassignsResponse.newBuilder();
try {
+ boolean override = request.getOverride();
for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) {
- // Unassign is synchronous as of hbase-2.2. Need an asynchronous one.
RegionInfo ri = getRegionInfo(rs);
if (ri == null) {
LOG.info("Unknown={}", rs);
responseBuilder.addPid(Procedure.NO_PROC_ID);
continue;
}
- responseBuilder.addPid(this.master.getAssignmentManager().unassign(ri));
+ responseBuilder.addPid(this.master.getMasterProcedureExecutor().submitProcedure(this.master
+ .getAssignmentManager().createOneUnassignProcedure(ri, override)));
}
return responseBuilder.build();
} catch (IOException ioe) {
@@ -2410,7 +2411,7 @@ public class MasterRpcServices extends RSRpcServices
try {
List<Boolean> ret =
master.getMasterProcedureExecutor().bypassProcedure(request.getProcIdList(),
- request.getWaitTime(), request.getForce());
+ request.getWaitTime(), request.getOverride(), request.getRecursive());
return MasterProtos.BypassProcedureResponse.newBuilder().addAllBypassed(ret).build();
} catch (IOException e) {
throw new ServiceException(e);
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 4244f4d..8b5bb6c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -682,10 +682,13 @@ public class AssignmentManager implements ServerListener {
}
private TransitRegionStateProcedure createAssignProcedure(RegionStateNode regionNode,
- ServerName targetServer) {
+ ServerName targetServer, boolean override) {
TransitRegionStateProcedure proc;
regionNode.lock();
try {
+ if(override && regionNode.getProcedure() != null) {
+ regionNode.unsetProcedure(regionNode.getProcedure());
+ }
assert regionNode.getProcedure() == null;
proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(),
regionNode.getRegionInfo(), targetServer);
@@ -696,6 +699,42 @@ public class AssignmentManager implements ServerListener {
return proc;
}
+ private TransitRegionStateProcedure createUnassignProcedure(RegionStateNode regionNode,
+ boolean override) {
+ TransitRegionStateProcedure proc;
+ regionNode.lock();
+ try {
+ if(override && regionNode.getProcedure() != null) {
+ regionNode.unsetProcedure(regionNode.getProcedure());
+ }
+ assert regionNode.getProcedure() == null;
+ proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(),
+ regionNode.getRegionInfo());
+ regionNode.setProcedure(proc);
+ } finally {
+ regionNode.unlock();
+ }
+ return proc;
+ }
+
+ /**
+ * Create one TransitRegionStateProcedure to assign a region w/o specifying a target server.
+ * This method is specified for HBCK2
+ */
+ public TransitRegionStateProcedure createOneAssignProcedure(RegionInfo hri, boolean override) {
+ RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri);
+ return createAssignProcedure(regionNode, null, override);
+ }
+
+ /**
+ * Create one TransitRegionStateProcedure to unassign a region.
+ * This method is specified for HBCK2
+ */
+ public TransitRegionStateProcedure createOneUnassignProcedure(RegionInfo hri, boolean override) {
+ RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri);
+ return createUnassignProcedure(regionNode, override);
+ }
+
/**
* Create an array of TransitRegionStateProcedure w/o specifying a target server.
* <p/>
@@ -707,8 +746,8 @@ public class AssignmentManager implements ServerListener {
*/
public TransitRegionStateProcedure[] createAssignProcedures(List<RegionInfo> hris) {
return hris.stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri))
- .map(regionNode -> createAssignProcedure(regionNode, null)).sorted(AssignmentManager::compare)
- .toArray(TransitRegionStateProcedure[]::new);
+ .map(regionNode -> createAssignProcedure(regionNode, null, false))
+ .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new);
}
/**
@@ -719,7 +758,7 @@ public class AssignmentManager implements ServerListener {
Map<ServerName, List<RegionInfo>> assignments) {
return assignments.entrySet().stream()
.flatMap(e -> e.getValue().stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri))
- .map(regionNode -> createAssignProcedure(regionNode, e.getKey())))
+ .map(regionNode -> createAssignProcedure(regionNode, e.getKey(), false)))
.sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new);
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
index dd8415a..df1503b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -39,6 +39,7 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionStateData;
@@ -114,7 +115,8 @@ public class TransitRegionStateProcedure
public TransitRegionStateProcedure() {
}
- private TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri,
+ @VisibleForTesting
+ protected TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri,
ServerName assignCandidate, boolean forceNewPlan, RegionStateTransitionState initialState,
RegionStateTransitionState lastState) {
super(env, hri);
@@ -476,8 +478,9 @@ public class TransitRegionStateProcedure
@Override
protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
super.serializeStateData(serializer);
- RegionStateTransitionStateData.Builder builder = RegionStateTransitionStateData.newBuilder()
- .setInitialState(initialState).setLastState(lastState).setForceNewPlan(forceNewPlan);
+ RegionStateTransitionStateData.Builder builder =
+ RegionStateTransitionStateData.newBuilder().setInitialState(initialState)
+ .setLastState(lastState).setForceNewPlan(forceNewPlan);
if (assignCandidate != null) {
builder.setAssignCandidate(ProtobufUtil.toServerName(assignCandidate));
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index 52a284e..f4abb1c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
@@ -1457,11 +1458,13 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
ServerName oldServerName = entry.getValue();
// In the current set of regions even if one has region replica let us go with
// getting the entire snapshot
- if (this.services != null && this.services.getAssignmentManager() != null) { // for tests
- RegionStates states = this.services.getAssignmentManager().getRegionStates();
- if (!hasRegionReplica && states != null &&
- states.isReplicaAvailableForRegion(region)) {
- hasRegionReplica = true;
+ if (this.services != null) { // for tests
+ AssignmentManager am = this.services.getAssignmentManager();
+ if (am != null) {
+ RegionStates states = am.getRegionStates();
+ if (!hasRegionReplica && states != null && states.isReplicaAvailableForRegion(region)) {
+ hasRegionReplica = true;
+ }
}
}
List<ServerName> localServers = new ArrayList<>();
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java
index 58263d3..1e488b6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java
@@ -19,9 +19,12 @@ package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
import java.util.regex.Pattern;
+
+import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureException;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.NonceKey;
@@ -176,4 +179,17 @@ public final class MasterProcedureUtil {
public static int getServerPriority(ServerProcedureInterface proc) {
return proc.hasMetaTableRegion() ? 100 : 1;
}
+
+ /**
+ * This is a version of unwrapRemoteIOException that can do DoNotRetryIOE.
+ * We need to throw DNRIOE to clients if a failed Procedure else they will
+ * keep trying. The default proc.getException().unwrapRemoteException
+ * doesn't have access to DNRIOE from the procedure2 module.
+ */
+ public static IOException unwrapRemoteIOException(Procedure proc) {
+ Exception e = proc.getException().unwrapRemoteException();
+ // Do not retry ProcedureExceptions!
+ return (e instanceof ProcedureException)? new DoNotRetryIOException(e):
+ proc.getException().unwrapRemoteIOException();
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java
index 41b30ad..f9fdad8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java
@@ -69,7 +69,8 @@ public class ProcedureDescriber {
description.put("LAST_UPDATE", new Date(proc.getLastUpdate()));
if (proc.isFailed()) {
- description.put("ERRORS", proc.getException().unwrapRemoteIOException().getMessage());
+ description.put("ERRORS",
+ MasterProcedureUtil.unwrapRemoteIOException(proc).getMessage());
}
description.put("PARAMETERS", parametersToObject(proc));
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java
index 2011c0b..f771210 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java
@@ -99,7 +99,7 @@ public abstract class ProcedurePrepareLatch {
@Override
protected void countDown(final Procedure proc) {
if (proc.hasException()) {
- exception = proc.getException().unwrapRemoteIOException();
+ exception = MasterProcedureUtil.unwrapRemoteIOException(proc);
}
latch.countDown();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
index f72905b..c8ff9f8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
@@ -160,9 +160,10 @@ public final class ProcedureSyncWait {
throw new IOException("The Master is Aborting");
}
+ // If the procedure fails, we should always have an exception captured. Throw it.
+ // Needs to be an IOE to get out of here.
if (proc.hasException()) {
- // If the procedure fails, we should always have an exception captured. Throw it.
- throw proc.getException().unwrapRemoteIOException();
+ throw MasterProcedureUtil.unwrapRemoteIOException(proc);
} else {
return proc.getResult();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java
index eaed0bc..0d21fb8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java
@@ -109,7 +109,7 @@ public class TestMetaTableAccessor {
@Test
public void testIsMetaWhenAllHealthy() throws InterruptedException {
HMaster m = UTIL.getMiniHBaseCluster().getMaster();
- assertTrue(m.waitUntilMetaOnline());
+ assertTrue(m.waitForMetaOnline());
}
@Test
@@ -118,7 +118,7 @@ public class TestMetaTableAccessor {
int index = UTIL.getMiniHBaseCluster().getServerWithMeta();
HRegionServer rsWithMeta = UTIL.getMiniHBaseCluster().getRegionServer(index);
rsWithMeta.abort("TESTING");
- assertTrue(m.waitUntilMetaOnline());
+ assertTrue(m.waitForMetaOnline());
}
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java
index 80c5a36..7680845 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java
@@ -116,7 +116,8 @@ public class TestHbck {
//bypass the procedure
List<Long> pids = Arrays.<Long>asList(procId);
- List<Boolean> results = TEST_UTIL.getHbck().bypassProcedure(pids, 30000, false);
+ List<Boolean> results =
+ TEST_UTIL.getHbck().bypassProcedure(pids, 30000, false, false);
assertTrue("Failed to by pass procedure!", results.get(0));
TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
LOG.info("{} finished", proc);
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java
new file mode 100644
index 0000000..97dc37b
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState.REGION_STATE_TRANSITION_OPEN;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState;
+
+
+/**
+ * Tests bypass on a region assign/unassign
+ */
+@Category({LargeTests.class})
+public class TestRegionBypass {
+ private final static Logger LOG = LoggerFactory.getLogger(TestRegionBypass.class);
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRegionBypass.class);
+
+ @Rule
+ public TestName name = new TestName();
+
+ private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ private TableName tableName;
+
+ @BeforeClass
+ public static void startCluster() throws Exception {
+ TEST_UTIL.startMiniCluster(2);
+ }
+
+ @AfterClass
+ public static void stopCluster() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws IOException {
+ this.tableName = TableName.valueOf(this.name.getMethodName());
+ // Create a table. Has one region at least.
+ TEST_UTIL.createTable(this.tableName, Bytes.toBytes("cf"));
+
+ }
+
+ @Test
+ public void testBypass() throws IOException {
+ Admin admin = TEST_UTIL.getAdmin();
+ MasterProcedureEnv env =
+ TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().getEnvironment();
+ List<RegionInfo> regions = admin.getRegions(this.tableName);
+ for (RegionInfo ri: regions) {
+ admin.unassign(ri.getRegionName(), false);
+ }
+ List<Long> pids = new ArrayList<>(regions.size());
+ for (RegionInfo ri: regions) {
+ Procedure<MasterProcedureEnv> p = new StallingAssignProcedure(env, ri, null, false,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE,
+ RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED);
+ pids.add(TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().
+ submitProcedure(p));
+ }
+ for (Long pid: pids) {
+ while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().isStarted(pid)) {
+ Thread.currentThread().yield();
+ }
+ }
+ List<Procedure<MasterProcedureEnv>> ps =
+ TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().getProcedures();
+ for (Procedure<MasterProcedureEnv> p: ps) {
+ if (p instanceof StallingAssignProcedure) {
+ List<Boolean> bs = TEST_UTIL.getHbck().
+ bypassProcedure(Arrays.asList(p.getProcId()), 1000, true, false);
+ for (Boolean b: bs) {
+ LOG.info("BYPASSED {} {}", p.getProcId(), b);
+ }
+ }
+ }
+ // Try and assign WITHOUT override flag. Should fail!.
+ for (RegionInfo ri: regions) {
+ try {
+ admin.assign(ri.getRegionName());
+ } catch (Throwable dnrioe) {
+ // Expected
+ LOG.info("Expected {}", dnrioe);
+ }
+ }
+ while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().
+ getActiveProcIds().isEmpty()) {
+ Thread.currentThread().yield();
+ }
+ // Now assign with the override flag.
+ for (RegionInfo ri: regions) {
+ TEST_UTIL.getHbck().assigns(Arrays.<String>asList(ri.getEncodedName()), true);
+ }
+ while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().
+ getActiveProcIds().isEmpty()) {
+ Thread.currentThread().yield();
+ }
+ for (RegionInfo ri: regions) {
+ assertTrue(ri.toString(), TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().
+ getRegionStates().isRegionOnline(ri));
+ }
+ }
+
+ /**
+ * An AssignProcedure that Stalls just before the finish.
+ */
+ public static class StallingAssignProcedure extends TransitRegionStateProcedure{
+ public final CountDownLatch latch = new CountDownLatch(2);
+
+ public StallingAssignProcedure(){}
+
+ public StallingAssignProcedure(MasterProcedureEnv env, RegionInfo hri,
+ ServerName assignCandidate, boolean forceNewPlan, RegionStateTransitionState initialState,
+ RegionStateTransitionState lastState) {
+ super(env, hri, assignCandidate, forceNewPlan, initialState, lastState);
+ init(env);
+ }
+
+ private void init(MasterProcedureEnv env){
+ RegionStateNode regionNode =
+ env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion());
+ regionNode.setProcedure(this);
+ }
+
+
+ @Override
+ protected Flow executeFromState(MasterProcedureEnv env, RegionStateTransitionState state)
+ throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
+ switch (state) {
+ case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
+ LOG.info("LATCH1 {}", this.latch.getCount());
+ this.latch.countDown();
+ setNextState(REGION_STATE_TRANSITION_OPEN);
+ return Flow.HAS_MORE_STATE;
+ case REGION_STATE_TRANSITION_OPEN:
+ if (latch.getCount() == 0) {
+ LOG.info("LATCH3 {}", this.latch.getCount());
+ return Flow.NO_MORE_STATE;
+ } else {
+ LOG.info("LATCH2 {}", this.latch.getCount());
+ return Flow.HAS_MORE_STATE;
+ }
+ default:
+ throw new UnsupportedOperationException("unhandled state=" + state);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb b/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb
index 68842a0..1d86b8d 100644
--- a/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb
+++ b/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb
@@ -31,8 +31,7 @@ EOF
end
def command
- formatter.header(%w[Id Name State Submitted_Time Last_Update Parameters])
-
+ formatter.header(%w[PID Name State Submitted Last_Update Parameters])
list = JSON.parse(admin.list_procedures)
list.each do |proc|
submitted_time = Time.at(Integer(proc['submittedTime']) / 1000).to_s
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java
----------------------------------------------------------------------
diff --git a/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java b/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java
index ab36edc..9bb8eac 100644
--- a/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java
+++ b/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java
@@ -39,5 +39,4 @@ public class TestShell extends AbstractTestShell {
// Start all ruby tests
jruby.runScriptlet(PathType.ABSOLUTE, "src/test/ruby/tests_runner.rb");
}
-
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/4a609db3/hbase-shell/src/test/ruby/shell/shell_test.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/test/ruby/shell/shell_test.rb b/hbase-shell/src/test/ruby/shell/shell_test.rb
index c1e9017..abf9235 100644
--- a/hbase-shell/src/test/ruby/shell/shell_test.rb
+++ b/hbase-shell/src/test/ruby/shell/shell_test.rb
@@ -85,9 +85,9 @@ class ShellTest < Test::Unit::TestCase
define_test "Shell::Shell interactive mode should not throw" do
# incorrect number of arguments
- @shell.command('create', 'foo')
- @shell.command('create', 'foo', 'family_1')
+ @shell.command('create', 'nothrow_table')
+ @shell.command('create', 'nothrow_table', 'family_1')
# create a table that exists
- @shell.command('create', 'foo', 'family_1')
+ @shell.command('create', 'nothrow_table', 'family_1')
end
end