You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2017/12/21 13:24:57 UTC
[08/42] hbase git commit: HBASE-19218 Master stuck thinking
hbase:namespace is assigned after restart preventing intialization
Signed-off-by: Li Xiang
HBASE-19218 Master stuck thinking hbase:namespace is assigned after restart preventing intialization
Signed-off-by: Li Xiang <ea...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7f938dd9
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7f938dd9
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7f938dd9
Branch: refs/heads/HBASE-19397
Commit: 7f938dd980c6bfaf988ea12592df0f4f2d1805df
Parents: c811d7f
Author: Michael Stack <st...@apache.org>
Authored: Wed Dec 20 16:32:39 2017 -0800
Committer: Michael Stack <st...@apache.org>
Committed: Wed Dec 20 21:47:10 2017 -0800
----------------------------------------------------------------------
.../procedure2/RemoteProcedureDispatcher.java | 5 +
.../assignment/RegionTransitionProcedure.java | 3 +-
.../master/snapshot/TestAssignProcedure.java | 109 +++++++++++++++++++
3 files changed, 115 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/7f938dd9/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
index 4cee524..2a4fe90 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
@@ -167,6 +167,11 @@ public abstract class RemoteProcedureDispatcher<TEnv, TRemote extends Comparable
* @return True if we successfully added the operation.
*/
public boolean addOperationToNode(final TRemote key, RemoteProcedure rp) {
+ if (key == null) {
+ // Key is remote server name. Be careful. It could have been nulled by a concurrent
+ // ServerCrashProcedure shutting down outstanding RPC requests. See remoteCallFailed.
+ return false;
+ }
assert key != null : "found null key for node";
BufferNode node = nodeMap.get(key);
if (node == null) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/7f938dd9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
index 17ba75a..e34c703 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
@@ -183,7 +183,6 @@ public abstract class RegionTransitionProcedure
public void remoteCallFailed(final MasterProcedureEnv env,
final ServerName serverName, final IOException exception) {
final RegionStateNode regionNode = getRegionState(env);
- assert serverName.equals(regionNode.getRegionLocation());
String msg = exception.getMessage() == null? exception.getClass().getSimpleName():
exception.getMessage();
LOG.warn("Remote call failed " + this + "; " + regionNode.toShortString() +
@@ -208,7 +207,7 @@ public abstract class RegionTransitionProcedure
*/
protected boolean addToRemoteDispatcher(final MasterProcedureEnv env,
final ServerName targetServer) {
- assert targetServer.equals(getRegionState(env).getRegionLocation()) :
+ assert targetServer == null || targetServer.equals(getRegionState(env).getRegionLocation()):
"targetServer=" + targetServer + " getRegionLocation=" +
getRegionState(env).getRegionLocation(); // TODO
http://git-wip-us.apache.org/repos/asf/hbase/blob/7f938dd9/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
index 1f93ff1..e8a3081 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
@@ -18,17 +18,30 @@
*/
package org.apache.hadoop.hbase.master.snapshot;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.master.assignment.RegionStates;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
@@ -37,7 +50,9 @@ import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.junit.rules.TestRule;
+import org.mockito.Mockito;
+import static junit.framework.TestCase.assertFalse;
import static junit.framework.TestCase.assertTrue;
@@ -50,6 +65,100 @@ public class TestAssignProcedure {
withLookingForStuckThread(true).
build();
+ /**
+ * An override that opens up the updateTransition method inside in AssignProcedure so can call it
+ * below directly in test and mess with targetServer. Used by test
+ * {@link #testTargetServerBeingNulledOnUs()}.
+ */
+ public static class TargetServerBeingNulledOnUsAssignProcedure extends AssignProcedure {
+ public final AtomicBoolean addToRemoteDispatcherWasCalled = new AtomicBoolean(false);
+ public final AtomicBoolean remoteCallFailedWasCalled = new AtomicBoolean(false);
+ private final RegionStates.RegionStateNode rsn;
+
+ public TargetServerBeingNulledOnUsAssignProcedure(RegionInfo regionInfo,
+ RegionStates.RegionStateNode rsn) {
+ super(regionInfo);
+ this.rsn = rsn;
+ }
+
+ /**
+ * Override so can change access from protected to public.
+ */
+ @Override
+ public boolean updateTransition(MasterProcedureEnv env, RegionStates.RegionStateNode regionNode)
+ throws IOException, ProcedureSuspendedException {
+ return super.updateTransition(env, regionNode);
+ }
+
+ @Override
+ protected boolean addToRemoteDispatcher(MasterProcedureEnv env, ServerName targetServer) {
+ // So, mock the ServerCrashProcedure nulling out the targetServer AFTER updateTransition
+ // has been called and BEFORE updateTransition gets to here.
+ // We used to throw a NullPointerException. Now we just say the assign failed so it will
+ // be rescheduled.
+ boolean b = super.addToRemoteDispatcher(env, null);
+ assertFalse(b);
+ // Assert we were actually called.
+ this.addToRemoteDispatcherWasCalled.set(true);
+ return b;
+ }
+
+ @Override
+ public RegionStates.RegionStateNode getRegionState(MasterProcedureEnv env) {
+ // Do this so we don't have to mock a bunch of stuff.
+ return this.rsn;
+ }
+
+ @Override
+ public void remoteCallFailed(final MasterProcedureEnv env,
+ final ServerName serverName, final IOException exception) {
+ // Just skip this remoteCallFailed. Its too hard to mock. Assert it is called though.
+ // Happens after the code we are testing has been called.
+ this.remoteCallFailedWasCalled.set(true);
+ }
+ };
+
+ /**
+ * Test that we deal with ServerCrashProcedure zero'ing out the targetServer in the
+ * RegionStateNode in the midst of our doing an assign. The trickery is done above in
+ * TargetServerBeingNulledOnUsAssignProcedure. We skip a bunch of logic to get at the guts
+ * where the problem happens (We also skip-out the failure handling because it'd take a bunch
+ * of mocking to get it to run). Fix is inside in RemoteProcedureDispatch#addOperationToNode.
+ * It now notices empty targetServer and just returns false so we fall into failure processing
+ * and we'll reassign elsewhere instead of NPE'ing. The fake of ServerCrashProcedure nulling out
+ * the targetServer happens inside in updateTransition just after it was called but before it
+ * gets to the near the end when addToRemoteDispatcher is called. See the
+ * TargetServerBeingNulledOnUsAssignProcedure class above. See HBASE-19218.
+ * Before fix, this test would fail w/ a NullPointerException.
+ */
+ @Test
+ public void testTargetServerBeingNulledOnUs() throws ProcedureSuspendedException, IOException {
+ TableName tn = TableName.valueOf(this.name.getMethodName());
+ RegionInfo ri = RegionInfoBuilder.newBuilder(tn).build();
+ // Create an RSN with location/target server. Will be cleared above in addToRemoteDispatcher to
+ // simulate issue in HBASE-19218
+ RegionStates.RegionStateNode rsn = new RegionStates.RegionStateNode(ri);
+ rsn.setRegionLocation(ServerName.valueOf("server.example.org", 0, 0));
+ MasterProcedureEnv env = Mockito.mock(MasterProcedureEnv.class);
+ AssignmentManager am = Mockito.mock(AssignmentManager.class);
+ ServerManager sm = Mockito.mock(ServerManager.class);
+ Mockito.when(sm.isServerOnline(Mockito.any())).thenReturn(true);
+ MasterServices ms = Mockito.mock(MasterServices.class);
+ Mockito.when(ms.getServerManager()).thenReturn(sm);
+ Configuration configuration = HBaseConfiguration.create();
+ Mockito.when(ms.getConfiguration()).thenReturn(configuration);
+ Mockito.when(env.getAssignmentManager()).thenReturn(am);
+ Mockito.when(env.getMasterServices()).thenReturn(ms);
+ RSProcedureDispatcher rsd = new RSProcedureDispatcher(ms);
+ Mockito.when(env.getRemoteDispatcher()).thenReturn(rsd);
+
+ TargetServerBeingNulledOnUsAssignProcedure assignProcedure =
+ new TargetServerBeingNulledOnUsAssignProcedure(ri, rsn);
+ assignProcedure.updateTransition(env, rsn);
+ assertTrue(assignProcedure.remoteCallFailedWasCalled.get());
+ assertTrue(assignProcedure.addToRemoteDispatcherWasCalled.get());
+ }
+
@Test
public void testSimpleComparator() {
List<AssignProcedure> procedures = new ArrayList<AssignProcedure>();