You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2017/12/21 13:24:57 UTC

[08/42] hbase git commit: HBASE-19218 Master stuck thinking hbase:namespace is assigned after restart preventing intialization Signed-off-by: Li Xiang

HBASE-19218 Master stuck thinking hbase:namespace is assigned after restart preventing intialization
Signed-off-by: Li Xiang <ea...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7f938dd9
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7f938dd9
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7f938dd9

Branch: refs/heads/HBASE-19397
Commit: 7f938dd980c6bfaf988ea12592df0f4f2d1805df
Parents: c811d7f
Author: Michael Stack <st...@apache.org>
Authored: Wed Dec 20 16:32:39 2017 -0800
Committer: Michael Stack <st...@apache.org>
Committed: Wed Dec 20 21:47:10 2017 -0800

----------------------------------------------------------------------
 .../procedure2/RemoteProcedureDispatcher.java   |   5 +
 .../assignment/RegionTransitionProcedure.java   |   3 +-
 .../master/snapshot/TestAssignProcedure.java    | 109 +++++++++++++++++++
 3 files changed, 115 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7f938dd9/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
index 4cee524..2a4fe90 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureDispatcher.java
@@ -167,6 +167,11 @@ public abstract class RemoteProcedureDispatcher<TEnv, TRemote extends Comparable
    * @return True if we successfully added the operation.
    */
   public boolean addOperationToNode(final TRemote key, RemoteProcedure rp) {
+    if (key == null) {
+      // Key is remote server name. Be careful. It could have been nulled by a concurrent
+      // ServerCrashProcedure shutting down outstanding RPC requests. See remoteCallFailed.
+      return false;
+    }
     assert key != null : "found null key for node";
     BufferNode node = nodeMap.get(key);
     if (node == null) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/7f938dd9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
index 17ba75a..e34c703 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java
@@ -183,7 +183,6 @@ public abstract class RegionTransitionProcedure
   public void remoteCallFailed(final MasterProcedureEnv env,
       final ServerName serverName, final IOException exception) {
     final RegionStateNode regionNode = getRegionState(env);
-    assert serverName.equals(regionNode.getRegionLocation());
     String msg = exception.getMessage() == null? exception.getClass().getSimpleName():
       exception.getMessage();
     LOG.warn("Remote call failed " + this + "; " + regionNode.toShortString() +
@@ -208,7 +207,7 @@ public abstract class RegionTransitionProcedure
    */
   protected boolean addToRemoteDispatcher(final MasterProcedureEnv env,
       final ServerName targetServer) {
-    assert targetServer.equals(getRegionState(env).getRegionLocation()) :
+    assert targetServer == null || targetServer.equals(getRegionState(env).getRegionLocation()):
       "targetServer=" + targetServer + " getRegionLocation=" +
         getRegionState(env).getRegionLocation(); // TODO
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/7f938dd9/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
index 1f93ff1..e8a3081 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestAssignProcedure.java
@@ -18,17 +18,30 @@
  */
 package org.apache.hadoop.hbase.master.snapshot;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CategoryBasedTimeout;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.ServerManager;
 import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
+import org.apache.hadoop.hbase.master.assignment.RegionStates;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -37,7 +50,9 @@ import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
 import org.junit.rules.TestRule;
+import org.mockito.Mockito;
 
+import static junit.framework.TestCase.assertFalse;
 import static junit.framework.TestCase.assertTrue;
 
 
@@ -50,6 +65,100 @@ public class TestAssignProcedure {
       withLookingForStuckThread(true).
       build();
 
+  /**
+   * An override that opens up the updateTransition method inside in AssignProcedure so can call it
+   * below directly in test and mess with targetServer. Used by test
+   * {@link #testTargetServerBeingNulledOnUs()}.
+   */
+  public static class TargetServerBeingNulledOnUsAssignProcedure extends AssignProcedure {
+    public final AtomicBoolean addToRemoteDispatcherWasCalled = new AtomicBoolean(false);
+    public final AtomicBoolean remoteCallFailedWasCalled = new AtomicBoolean(false);
+    private final RegionStates.RegionStateNode rsn;
+
+    public TargetServerBeingNulledOnUsAssignProcedure(RegionInfo regionInfo,
+        RegionStates.RegionStateNode rsn) {
+      super(regionInfo);
+      this.rsn = rsn;
+    }
+
+    /**
+     * Override so can change access from protected to public.
+     */
+    @Override
+    public boolean updateTransition(MasterProcedureEnv env, RegionStates.RegionStateNode regionNode)
+        throws IOException, ProcedureSuspendedException {
+      return super.updateTransition(env, regionNode);
+    }
+
+    @Override
+    protected boolean addToRemoteDispatcher(MasterProcedureEnv env, ServerName targetServer) {
+      // So, mock the ServerCrashProcedure nulling out the targetServer AFTER updateTransition
+      // has been called and BEFORE updateTransition gets to here.
+      // We used to throw a NullPointerException. Now we just say the assign failed so it will
+      // be rescheduled.
+      boolean b = super.addToRemoteDispatcher(env, null);
+      assertFalse(b);
+      // Assert we were actually called.
+      this.addToRemoteDispatcherWasCalled.set(true);
+      return b;
+    }
+
+    @Override
+    public RegionStates.RegionStateNode getRegionState(MasterProcedureEnv env) {
+      // Do this so we don't have to mock a bunch of stuff.
+      return this.rsn;
+    }
+
+    @Override
+    public void remoteCallFailed(final MasterProcedureEnv env,
+        final ServerName serverName, final IOException exception) {
+      // Just skip this remoteCallFailed. Its too hard to mock. Assert it is called though.
+      // Happens after the code we are testing has been called.
+      this.remoteCallFailedWasCalled.set(true);
+    }
+  };
+
+  /**
+   * Test that we deal with ServerCrashProcedure zero'ing out the targetServer in the
+   * RegionStateNode in the midst of our doing an assign. The trickery is done above in
+   * TargetServerBeingNulledOnUsAssignProcedure. We skip a bunch of logic to get at the guts
+   * where the problem happens (We also skip-out the failure handling because it'd take a bunch
+   * of mocking to get it to run). Fix is inside in RemoteProcedureDispatch#addOperationToNode.
+   * It now notices empty targetServer and just returns false so we fall into failure processing
+   * and we'll reassign elsewhere instead of NPE'ing. The fake of ServerCrashProcedure nulling out
+   * the targetServer happens inside in updateTransition just after it was called but before it
+   * gets to the near the end when addToRemoteDispatcher is called. See the
+   * TargetServerBeingNulledOnUsAssignProcedure class above. See HBASE-19218.
+   * Before fix, this test would fail w/ a NullPointerException.
+   */
+  @Test
+  public void testTargetServerBeingNulledOnUs() throws ProcedureSuspendedException, IOException {
+    TableName tn = TableName.valueOf(this.name.getMethodName());
+    RegionInfo ri = RegionInfoBuilder.newBuilder(tn).build();
+    // Create an RSN with location/target server. Will be cleared above in addToRemoteDispatcher to
+    // simulate issue in HBASE-19218
+    RegionStates.RegionStateNode rsn = new RegionStates.RegionStateNode(ri);
+    rsn.setRegionLocation(ServerName.valueOf("server.example.org", 0, 0));
+    MasterProcedureEnv env = Mockito.mock(MasterProcedureEnv.class);
+    AssignmentManager am = Mockito.mock(AssignmentManager.class);
+    ServerManager sm = Mockito.mock(ServerManager.class);
+    Mockito.when(sm.isServerOnline(Mockito.any())).thenReturn(true);
+    MasterServices ms = Mockito.mock(MasterServices.class);
+    Mockito.when(ms.getServerManager()).thenReturn(sm);
+    Configuration configuration = HBaseConfiguration.create();
+    Mockito.when(ms.getConfiguration()).thenReturn(configuration);
+    Mockito.when(env.getAssignmentManager()).thenReturn(am);
+    Mockito.when(env.getMasterServices()).thenReturn(ms);
+    RSProcedureDispatcher rsd = new RSProcedureDispatcher(ms);
+    Mockito.when(env.getRemoteDispatcher()).thenReturn(rsd);
+
+    TargetServerBeingNulledOnUsAssignProcedure assignProcedure =
+        new TargetServerBeingNulledOnUsAssignProcedure(ri, rsn);
+    assignProcedure.updateTransition(env, rsn);
+    assertTrue(assignProcedure.remoteCallFailedWasCalled.get());
+    assertTrue(assignProcedure.addToRemoteDispatcherWasCalled.get());
+  }
+
   @Test
   public void testSimpleComparator() {
     List<AssignProcedure> procedures = new ArrayList<AssignProcedure>();