You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/01/19 13:24:57 UTC
[23/31] hbase git commit: HBASE-19815 Flakey
TestAssignmentManager.testAssignWithRandExec (Part Two).
HBASE-19815 Flakey TestAssignmentManager.testAssignWithRandExec (Part Two).
Part One cleaned up a ClassCastException.
Part Two adds the ServerCrashProcedure#handleRIT behavior to RecoverMetaProcedure.
Adds debug in the test.
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/581fabe7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/581fabe7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/581fabe7
Branch: refs/heads/HBASE-19064
Commit: 581fabe7b2177a090af33517f2f7cb1cdab2c64b
Parents: 646770d
Author: Michael Stack <st...@apache.org>
Authored: Wed Jan 17 22:35:35 2018 -0800
Committer: Michael Stack <st...@apache.org>
Committed: Thu Jan 18 11:32:21 2018 -0800
----------------------------------------------------------------------
.../master/procedure/RecoverMetaProcedure.java | 38 +++++++++++++++++---
.../assignment/TestAssignmentManager.java | 9 ++++-
2 files changed, 41 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/581fabe7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
index 50ef3e0..70d0d55 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverMetaProcedure.java
@@ -28,6 +28,8 @@ import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
@@ -126,17 +128,17 @@ public class RecoverMetaProcedure
RegionInfoBuilder.FIRST_META_REGIONINFO, this.replicaId);
AssignProcedure metaAssignProcedure;
+ AssignmentManager am = master.getAssignmentManager();
if (failedMetaServer != null) {
- LOG.info(this + "; Assigning meta with new plan. previous meta server=" +
- failedMetaServer);
- metaAssignProcedure = master.getAssignmentManager().createAssignProcedure(hri);
+ handleRIT(env, hri, this.failedMetaServer);
+ LOG.info(this + "; Assigning meta with new plan; previous server=" + failedMetaServer);
+ metaAssignProcedure = am.createAssignProcedure(hri);
} else {
// get server carrying meta from zk
ServerName metaServer =
MetaTableLocator.getMetaRegionState(master.getZooKeeper()).getServerName();
LOG.info(this + "; Retaining meta assignment to server=" + metaServer);
- metaAssignProcedure =
- master.getAssignmentManager().createAssignProcedure(hri, metaServer);
+ metaAssignProcedure = am.createAssignProcedure(hri, metaServer);
}
addChildProcedure(metaAssignProcedure);
@@ -152,6 +154,32 @@ public class RecoverMetaProcedure
return Flow.HAS_MORE_STATE;
}
+ /**
+ * Is the region stuck assigning to this failedMetaServer? If so, cancel the call
+ * just as we do over in ServerCrashProcedure#handleRIT except less to do here; less context
+ * to carry.
+ */
+ private void handleRIT(MasterProcedureEnv env, RegionInfo ri, ServerName crashedServerName) {
+ AssignmentManager am = env.getAssignmentManager();
+ RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(ri);
+ if (rtp == null) {
+ return; // Nothing to do. Not in RIT.
+ }
+ // Make sure the RIT is against this crashed server. In the case where there are many
+ // processings of a crashed server -- backed up for whatever reason (slow WAL split)
+ // -- then a previous SCP may have already failed an assign, etc., and it may have a
+ // new location target; DO NOT fail these else we make for assign flux.
+ ServerName rtpServerName = rtp.getServer(env);
+ if (rtpServerName == null) {
+ LOG.warn("RIT with ServerName null! " + rtp);
+ } else if (rtpServerName.equals(crashedServerName)) {
+ LOG.info("pid=" + getProcId() + " found RIT " + rtp + "; " +
+ rtp.getRegionState(env).toShortString());
+ rtp.remoteCallFailed(env, crashedServerName,
+ new ServerCrashException(getProcId(), crashedServerName));
+ }
+ }
+
@Override
protected void rollbackState(MasterProcedureEnv env,
MasterProcedureProtos.RecoverMetaState recoverMetaState)
http://git-wip-us.apache.org/repos/asf/hbase/blob/581fabe7/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
index 3ab915b..9b9f624 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
+import java.io.InterruptedIOException;
import java.net.SocketTimeoutException;
import java.util.NavigableMap;
import java.util.Random;
@@ -206,7 +207,7 @@ public class TestAssignmentManager {
rsDispatcher.setMockRsExecutor(new RandRsExecutor());
// Loop a bunch of times so we hit various combos of exceptions.
for (int i = 0; i < 10; i++) {
- LOG.info("" + i);
+ LOG.info("ROUND=" + i);
AssignProcedure proc = am.createAssignProcedure(hri);
waitOnFuture(submitProcedure(proc));
}
@@ -445,6 +446,12 @@ public class TestAssignmentManager {
return future.get(5, TimeUnit.SECONDS);
} catch (ExecutionException e) {
LOG.info("ExecutionException", e);
+ Exception ee = (Exception)e.getCause();
+ if (ee instanceof InterruptedIOException) {
+ for (Procedure p: this.master.getMasterProcedureExecutor().getProcedures()) {
+ LOG.info(p.toStringDetails());
+ }
+ }
throw (Exception)e.getCause();
}
}