You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/07/03 14:43:59 UTC

[hbase] branch branch-2.2 updated: HBASE-24562: Stabilize master startup with meta replicas enabled (#1997)

This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.2 by this push:
     new 899422b  HBASE-24562: Stabilize master startup with meta replicas enabled (#1997)
899422b is described below

commit 899422be44b4b2c5fbd3b9fa79ba782d965ac0d8
Author: BukrosSzabolcs <bu...@gmail.com>
AuthorDate: Fri Jul 3 16:43:41 2020 +0200

    HBASE-24562: Stabilize master startup with meta replicas enabled (#1997)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |  6 +-
 .../hadoop/hbase/master/MasterMetaBootstrap.java   |  4 +-
 .../hbase/master/assignment/AssignmentManager.java | 34 +++++++++-
 .../hadoop/hbase/client/TestMetaWithReplicas.java  | 77 ++++++++++++++++++++++
 4 files changed, 115 insertions(+), 6 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index e87560c..0dc2997 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1131,7 +1131,11 @@ public class HMaster extends HRegionServer implements MasterServices {
     assignmentManager.checkIfShouldMoveSystemRegionAsync();
     status.setStatus("Assign meta replicas");
     MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
-    metaBootstrap.assignMetaReplicas();
+    try {
+      metaBootstrap.assignMetaReplicas();
+    } catch (IOException | KeeperException e){
+      LOG.error("Assigning meta replica failed: ", e);
+    }
     status.setStatus("Starting quota manager");
     initQuotaManager();
     if (QuotaUtil.isQuotaEnabled(conf)) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index e57817e..1cf6cf1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -81,9 +81,9 @@ class MasterMetaBootstrap {
       // down hosting server which calls AM#stop.
       if (metaState != null && metaState.getServerName() != null) {
         // Try to retain old assignment.
-        assignmentManager.assign(hri, metaState.getServerName());
+        assignmentManager.assignAsync(hri, metaState.getServerName());
       } else {
-        assignmentManager.assign(hri);
+        assignmentManager.assignAsync(hri);
       }
     }
     unassignExcessMetaReplica(numReplicas);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 9a5796f..8afe691 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -589,9 +589,9 @@ public class AssignmentManager {
     }
   }
 
-  // TODO: Need an async version of this for hbck2.
-  public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
-    // TODO: should we use getRegionStateNode?
+  private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn)
+    throws IOException {
+     // TODO: should we use getRegionStateNode?
     RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);
     TransitRegionStateProcedure proc;
     regionNode.lock();
@@ -602,6 +602,12 @@ public class AssignmentManager {
     } finally {
       regionNode.unlock();
     }
+    return proc;
+  }
+
+  // TODO: Need an async version of this for hbck2.
+  public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
+    TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
     ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);
     return proc.getProcId();
   }
@@ -610,6 +616,28 @@ public class AssignmentManager {
     return assign(regionInfo, null);
   }
 
+  /**
+   * Submits a procedure that assigns a region to a target server without waiting for it to finish
+   * @param regionInfo the region we would like to assign
+   * @param sn target server name
+   * @return submitProcedure
+   * @throws IOException if preTransitCheck fails
+   */
+  public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+    TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
+    return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
+  }
+
+  /**
+   * Submits a procedure that assigns a region without waiting for it to finish
+   * @param regionInfo the region we would like to assign
+   * @return submitProcedure
+   * @throws IOException if preTransitCheck fails
+   */
+  public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException {
+    return assignAsync(regionInfo, null);
+  }
+
   public long unassign(RegionInfo regionInfo) throws IOException {
     RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);
     if (regionNode == null) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
index ddf5688..cbfac28 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
@@ -23,12 +23,14 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Abortable;
@@ -41,8 +43,12 @@ import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
@@ -52,7 +58,9 @@ import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
+import org.apache.zookeeper.KeeperException;
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -350,4 +358,73 @@ public class TestMetaWithReplicas {
       assertNotEquals(3, i);
     }
   }
+
+  @Test
+  public void testFailedReplicaAssigment() throws InterruptedException, IOException {
+    //using our rigged master, to force a failed meta replica assignment
+    TEST_UTIL.getMiniHBaseCluster().getConfiguration().setClass(HConstants.MASTER_IMPL,
+        BrokenMetaReplicaMaster.class, HMaster.class);
+    TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
+    HMaster newMaster = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
+    //waiting for master to come up
+    TEST_UTIL.waitFor(30000, () -> newMaster.isInitialized());
+    TEST_UTIL.getMiniHBaseCluster().getConfiguration().unset(HConstants.MASTER_IMPL);
+
+    AssignmentManager am = newMaster.getAssignmentManager();
+    //showing one of the replicas got assigned
+    RegionInfo metaReplicaHri = RegionReplicaUtil.getRegionInfoForReplica(
+      RegionInfoBuilder.FIRST_META_REGIONINFO, 1);
+    TEST_UTIL.waitFor(30000, () ->
+        am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri) != null &&
+        am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri).getRegionLocation()
+            != null);
+    RegionStateNode metaReplicaRegionNode =
+        am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri);
+    Assert.assertNotNull(metaReplicaRegionNode.getRegionLocation());
+    //showing one of the replicas failed to be assigned
+    RegionInfo metaReplicaHri2 = RegionReplicaUtil.getRegionInfoForReplica(
+      RegionInfoBuilder.FIRST_META_REGIONINFO, 2);
+    RegionStateNode metaReplicaRegionNode2 =
+        am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri2);
+    Assert.assertNull(metaReplicaRegionNode2.getRegionLocation());
+
+    //showing master is active and running
+    Assert.assertFalse(newMaster.isStopping());
+    Assert.assertFalse(newMaster.isStopped());
+    Assert.assertTrue(newMaster.isActiveMaster());
+  }
+
+  public static class BrokenTransitRegionStateProcedure extends TransitRegionStateProcedure {
+    protected BrokenTransitRegionStateProcedure() {
+      //super(env, hri, assignCandidate, forceNewPlan, type);
+      super(null, null, null, false,TransitionType.ASSIGN);
+    }
+  }
+
+  public static class BrokenMetaReplicaMaster extends HMaster{
+    public BrokenMetaReplicaMaster(final Configuration conf) throws IOException, KeeperException {
+      super(conf);
+    }
+
+    @Override
+    public AssignmentManager createAssignmentManager(MasterServices master) {
+      return new BrokenMasterMetaAssignmentManager(master);
+    }
+  }
+
+  public static class BrokenMasterMetaAssignmentManager extends AssignmentManager{
+    MasterServices master;
+    public BrokenMasterMetaAssignmentManager(final MasterServices master) {
+      super(master);
+      this.master = master;
+    }
+
+    public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+      RegionStateNode regionNode = getRegionStates().getOrCreateRegionStateNode(regionInfo);
+      if (regionNode.getRegionInfo().getReplicaId() == 2) {
+        regionNode.setProcedure(new BrokenTransitRegionStateProcedure());
+      }
+      return super.assignAsync(regionInfo, sn);
+    }
+  }
 }