You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by wc...@apache.org on 2020/06/24 17:38:49 UTC

[hbase] branch master updated: HBASE-24562: Stabilize master startup with meta replicas enabled (#1903)

This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 8cdb2cc  HBASE-24562: Stabilize master startup with meta replicas enabled (#1903)
8cdb2cc is described below

commit 8cdb2cca4461d6adad3f44af001055848a205370
Author: BukrosSzabolcs <bu...@gmail.com>
AuthorDate: Wed Jun 24 19:38:36 2020 +0200

    HBASE-24562: Stabilize master startup with meta replicas enabled (#1903)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
    Signed-off-by: Huaxiang Sun <hu...@apache.com>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |  6 +-
 .../hadoop/hbase/master/MasterMetaBootstrap.java   |  4 +-
 .../hbase/master/assignment/AssignmentManager.java | 34 ++++++++++-
 .../hadoop/hbase/client/TestMetaWithReplicas.java  | 70 ++++++++++++++++++++++
 4 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 9fdb0e5..5a9404a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1166,7 +1166,11 @@ public class HMaster extends HRegionServer implements MasterServices {
     assignmentManager.checkIfShouldMoveSystemRegionAsync();
     status.setStatus("Assign meta replicas");
     MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
-    metaBootstrap.assignMetaReplicas();
+    try {
+      metaBootstrap.assignMetaReplicas();
+    } catch (IOException | KeeperException e){
+      LOG.error("Assigning meta replica failed: ", e);
+    }
     status.setStatus("Starting quota manager");
     initQuotaManager();
     if (QuotaUtil.isQuotaEnabled(conf)) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index 6e38bdd..ef12d07 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -81,9 +81,9 @@ class MasterMetaBootstrap {
       // down hosting server which calls AM#stop.
       if (metaState != null && metaState.getServerName() != null) {
         // Try to retain old assignment.
-        assignmentManager.assign(hri, metaState.getServerName());
+        assignmentManager.assignAsync(hri, metaState.getServerName());
       } else {
-        assignmentManager.assign(hri);
+        assignmentManager.assignAsync(hri);
       }
     }
     unassignExcessMetaReplica(numReplicas);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index ef6e30c..4c42237 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -595,9 +595,9 @@ public class AssignmentManager {
     }
   }
 
-  // TODO: Need an async version of this for hbck2.
-  public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
-    // TODO: should we use getRegionStateNode?
+  private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn)
+    throws IOException {
+     // TODO: should we use getRegionStateNode?
     RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);
     TransitRegionStateProcedure proc;
     regionNode.lock();
@@ -608,6 +608,12 @@ public class AssignmentManager {
     } finally {
       regionNode.unlock();
     }
+    return proc;
+  }
+
+  // TODO: Need an async version of this for hbck2.
+  public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {
+    TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
     ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);
     return proc.getProcId();
   }
@@ -616,6 +622,28 @@ public class AssignmentManager {
     return assign(regionInfo, null);
   }
 
+  /**
+   * Submits a procedure that assigns a region to a target server without waiting for it to finish
+   * @param regionInfo the region we would like to assign
+   * @param sn target server name
+   * @return
+   * @throws IOException
+   */
+  public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+    TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn);
+    return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
+  }
+
+  /**
+   * Submits a procedure that assigns a region without waiting for it to finish
+   * @param regionInfo the region we would like to assign
+   * @return
+   * @throws IOException
+   */
+  public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException {
+    return assignAsync(regionInfo, null);
+  }
+
   public long unassign(RegionInfo regionInfo) throws IOException {
     RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);
     if (regionNode == null) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
index 56e96fb..65dc38e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
@@ -23,12 +23,14 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Abortable;
@@ -42,8 +44,12 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.StartMiniClusterOption;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableNotFoundException;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
+import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
+import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
 import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -53,6 +59,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -355,4 +362,67 @@ public class TestMetaWithReplicas {
       assertNotEquals(3, i);
     }
   }
+
+  @Test
+  public void testFailedReplicaAssigment() throws InterruptedException, IOException {
+    //using our rigged master, to force a failed meta replica assignment
+    TEST_UTIL.getMiniHBaseCluster().getConfiguration().setClass(HConstants.MASTER_IMPL, BrokenMetaReplicaMaster.class, HMaster.class);
+    TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
+    HMaster newMaster = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
+    //waiting for master to come up
+    TEST_UTIL.waitFor(30000, () -> newMaster.isInitialized());
+    TEST_UTIL.getMiniHBaseCluster().getConfiguration().unset(HConstants.MASTER_IMPL);
+
+
+    AssignmentManager am = newMaster.getAssignmentManager();
+    //showing one of the replicas got assigned
+    RegionInfo metaReplicaHri = RegionReplicaUtil.getRegionInfoForReplica(
+      RegionInfoBuilder.FIRST_META_REGIONINFO, 1);
+    RegionStateNode metaReplicaRegionNode = am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri);
+    Assert.assertNotNull(metaReplicaRegionNode.getRegionLocation());
+    //showing one of the replicas failed to be assigned
+    RegionInfo metaReplicaHri2 = RegionReplicaUtil.getRegionInfoForReplica(
+      RegionInfoBuilder.FIRST_META_REGIONINFO, 2);
+    RegionStateNode metaReplicaRegionNode2 = am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri2);
+    Assert.assertNull(metaReplicaRegionNode2.getRegionLocation());
+
+    //showing master is active and running
+    Assert.assertFalse(newMaster.isStopping());
+    Assert.assertFalse(newMaster.isStopped());
+    Assert.assertTrue(newMaster.isActiveMaster());
+  }
+
+  public static class BrokenTransitRegionStateProcedure extends TransitRegionStateProcedure {
+    protected BrokenTransitRegionStateProcedure() {
+      //super(env, hri, assignCandidate, forceNewPlan, type);
+      super(null, null, null, false,TransitionType.ASSIGN);
+    }
+  }
+
+  public static class BrokenMetaReplicaMaster extends HMaster{
+    public BrokenMetaReplicaMaster(final Configuration conf) throws IOException {
+      super(conf);
+    }
+
+    @Override
+    public AssignmentManager createAssignmentManager(MasterServices master) {
+      return new BrokenMasterMetaAssignmentManager(master);
+    }
+  }
+
+  public static class BrokenMasterMetaAssignmentManager extends AssignmentManager{
+    MasterServices master;
+    public BrokenMasterMetaAssignmentManager(final MasterServices master) {
+      super(master);
+      this.master = master;
+    }
+
+    public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {
+      RegionStateNode regionNode = getRegionStates().getOrCreateRegionStateNode(regionInfo);
+      if (regionNode.getRegionInfo().getReplicaId() == 2) {
+        regionNode.setProcedure(new BrokenTransitRegionStateProcedure());
+      }
+      return super.assignAsync(regionInfo, sn);
+    }
+  }
 }