You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by hu...@apache.org on 2020/06/26 18:22:31 UTC

[hbase] branch branch-2.3 updated: HBASE-24552 Replica region needs to check if primary region directory exists at file system in TransitRegionStateProcedure (#1924) (#1972)

This is an automated email from the ASF dual-hosted git repository.

huaxiangsun pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.3 by this push:
     new 809a623  HBASE-24552 Replica region needs to check if primary region directory exists at file system in TransitRegionStateProcedure (#1924) (#1972)
809a623 is described below

commit 809a62386bc5d90a663567d7a633e6259595877f
Author: huaxiangsun <hu...@apache.org>
AuthorDate: Fri Jun 26 11:22:12 2020 -0700

    HBASE-24552 Replica region needs to check if primary region directory exists at file system in TransitRegionStateProcedure (#1924) (#1972)
    
    Signed-off-by:  stack <st...@apache.org>
---
 .../assignment/TransitRegionStateProcedure.java    | 13 ++++
 .../apache/hadoop/hbase/HBaseTestingUtility.java   | 19 +++++
 .../apache/hadoop/hbase/master/TestMetaFixer.java  | 36 +++------
 .../master/assignment/TestRegionReplicaSplit.java  | 86 ++++++++++++++++------
 4 files changed, 103 insertions(+), 51 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
index d88f812..ed992fe 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -338,6 +338,19 @@ public class TransitRegionStateProcedure
     try {
       switch (state) {
         case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
+          // Need to do some sanity check for replica region, if the region does not exist at
+          // master, do not try to assign the replica region, log error and return.
+          if (!RegionReplicaUtil.isDefaultReplica(regionNode.getRegionInfo())) {
+            RegionInfo defaultRI =
+              RegionReplicaUtil.getRegionInfoForDefaultReplica(regionNode.getRegionInfo());
+            if (env.getMasterServices().getAssignmentManager().getRegionStates().
+              getRegionStateNode(defaultRI) == null) {
+              LOG.error(
+                "Cannot assign replica region {} because its primary region {} does not exist.",
+                regionNode.getRegionInfo(), defaultRI);
+              return Flow.NO_MORE_STATE;
+            }
+          }
           queueAssign(env, regionNode);
           return Flow.HAS_MORE_STATE;
         case REGION_STATE_TRANSITION_OPEN:
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index e078653..58b4457 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -51,6 +51,7 @@ import java.util.TreeSet;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.stream.Collectors;
+import java.util.function.BooleanSupplier;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -4467,4 +4468,22 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
           ColumnFamilyDescriptor.COMPARATOR.compare(it.next(), it2.next()));
     }
   }
+
+  /**
+   * Await the successful return of {@code condition}, sleeping {@code sleepMillis} between
+   * invocations.
+   */
+  public static void await(final long sleepMillis, final BooleanSupplier condition)
+    throws InterruptedException {
+    try {
+      while (!condition.getAsBoolean()) {
+        Thread.sleep(sleepMillis);
+      }
+    } catch (RuntimeException e) {
+      if (e.getCause() instanceof AssertionError) {
+        throw (AssertionError) e.getCause();
+      }
+      throw e;
+    }
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
index a36ef88..bc69f93 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
@@ -25,7 +25,6 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.function.BooleanSupplier;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellBuilderFactory;
 import org.apache.hadoop.hbase.CellBuilderType;
@@ -114,7 +113,8 @@ public class TestMetaFixer {
 
     // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
     // watchdog TestRule terminates the test.
-    await(50, () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition()));
+    HBaseTestingUtility.await(50,
+      () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition()));
 
     ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
     assertEquals(originalCount, ris.size());
@@ -191,7 +191,7 @@ public class TestMetaFixer {
     MetaFixer fixer = new MetaFixer(services);
     fixer.fixOverlaps(report);
 
-    await(10, () -> {
+    HBaseTestingUtility. await(10, () -> {
       try {
         if (cj.scan() > 0) {
           // It submits GC once, then it will immediately kick off another GC to test if
@@ -215,7 +215,7 @@ public class TestMetaFixer {
     });
 
     // Wait until all GCs settled down
-    await(10, () -> {
+    HBaseTestingUtility.await(10, () -> {
       return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
     });
 
@@ -258,7 +258,7 @@ public class TestMetaFixer {
       fixer.fixOverlaps(report);
       AssignmentManager am = services.getAssignmentManager();
 
-      await(200, () -> {
+      HBaseTestingUtility.await(200, () -> {
         try {
           cj.scan();
           final CatalogJanitor.Report postReport = cj.getLastReport();
@@ -291,7 +291,7 @@ public class TestMetaFixer {
       report = cj.getLastReport();
       fixer.fixOverlaps(report);
 
-      await(20, () -> {
+      HBaseTestingUtility.await(20, () -> {
         try {
           // Make sure it GC only once.
           return (cj.scan() > 0);
@@ -359,7 +359,7 @@ public class TestMetaFixer {
     fixer.fixOverlaps(report);
 
     // Wait until all procedures settled down
-    await(200, () -> {
+    HBaseTestingUtility.await(200, () -> {
       return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
     });
 
@@ -371,7 +371,7 @@ public class TestMetaFixer {
     fixer.fixOverlaps(report);
 
     // Wait until all procedures settled down
-    await(200, () -> {
+    HBaseTestingUtility.await(200, () -> {
       return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
     });
 
@@ -414,7 +414,7 @@ public class TestMetaFixer {
     assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
     MetaFixer fixer = new MetaFixer(services);
     fixer.fixOverlaps(report);
-    await(10, () -> {
+    HBaseTestingUtility.await(10, () -> {
       try {
         services.getCatalogJanitor().scan();
         final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport();
@@ -424,22 +424,4 @@ public class TestMetaFixer {
       }
     });
   }
-
-  /**
-   * Await the successful return of {@code condition}, sleeping {@code sleepMillis} between
-   * invocations.
-   */
-  private static void await(final long sleepMillis, final BooleanSupplier condition)
-    throws InterruptedException {
-    try {
-      while (!condition.getAsBoolean()) {
-        Thread.sleep(sleepMillis);
-      }
-    } catch (RuntimeException e) {
-      if (e.getCause() instanceof AssertionError) {
-        throw (AssertionError) e.getCause();
-      }
-      throw e;
-    }
-  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java
index de45183..0d3790f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java
@@ -18,6 +18,7 @@
  */
 package org.apache.hadoop.hbase.master.assignment;
 
+import static org.junit.Assert.assertNotEquals;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -28,6 +29,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.client.RegionReplicaTestHelper;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
@@ -56,7 +58,6 @@ public class TestRegionReplicaSplit {
   private static final Logger LOG = LoggerFactory.getLogger(TestRegionReplicaSplit.class);
 
   private static final int NB_SERVERS = 4;
-  private static Table table;
 
   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
   private static final byte[] f = HConstants.CATALOG_FAMILY;
@@ -65,21 +66,19 @@ public class TestRegionReplicaSplit {
   public static void beforeClass() throws Exception {
     HTU.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3);
     HTU.startMiniCluster(NB_SERVERS);
-    final TableName tableName = TableName.valueOf(TestRegionReplicaSplit.class.getSimpleName());
-
-    // Create table then get the single region for our new table.
-    createTable(tableName);
   }
 
   @Rule
   public TestName name = new TestName();
 
-  private static void createTable(final TableName tableName) throws IOException {
+  private static Table createTableAndLoadData(final TableName tableName) throws IOException {
     TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
     builder.setRegionReplication(3);
     // create a table with 3 replication
-    table = HTU.createTable(builder.build(), new byte[][] { f }, getSplits(2),
+    Table table = HTU.createTable(builder.build(), new byte[][] { f }, getSplits(2),
       new Configuration(HTU.getConfiguration()));
+    HTU.loadTable(HTU.getConnection().getTable(tableName), f);
+    return table;
   }
 
   private static byte[][] getSplits(int numRegions) {
@@ -92,35 +91,74 @@ public class TestRegionReplicaSplit {
   @AfterClass
   public static void afterClass() throws Exception {
     HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false;
-    table.close();
     HTU.shutdownMiniCluster();
   }
 
   @Test
   public void testRegionReplicaSplitRegionAssignment() throws Exception {
-    HTU.loadNumericRows(table, f, 0, 3);
-    // split the table
-    List<RegionInfo> regions = new ArrayList<RegionInfo>();
-    for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
-      for (Region r : rs.getRegionServer().getRegions(table.getName())) {
-        regions.add(r.getRegionInfo());
+    TableName tn = TableName.valueOf(this.name.getMethodName());
+    Table table = null;
+    try {
+      table = createTableAndLoadData(tn);
+      HTU.loadNumericRows(table, f, 0, 3);
+      // split the table
+      List<RegionInfo> regions = new ArrayList<RegionInfo>();
+      for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
+        for (Region r : rs.getRegionServer().getRegions(table.getName())) {
+          regions.add(r.getRegionInfo());
+        }
+      }
+      // There are 6 regions before split, 9 regions after split.
+      HTU.getAdmin().split(table.getName(), Bytes.toBytes(1));
+      int count = 0;
+      while (true) {
+        for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
+          for (Region r : rs.getRegionServer().getRegions(table.getName())) {
+            count++;
+          }
+        }
+        if (count >= 9) {
+          break;
+        }
+        count = 0;
+      }
+      RegionReplicaTestHelper.assertReplicaDistributed(HTU, table);
+    } finally {
+      if (table != null) {
+        HTU.deleteTable(tn);
       }
     }
-    // There are 6 regions before split, 9 regions after split.
-    HTU.getAdmin().split(table.getName(), Bytes.toBytes(1));
-    int count = 0;
-    while (true) {
+  }
+
+  @Test
+  public void testAssignFakeReplicaRegion() throws Exception {
+    TableName tn = TableName.valueOf(this.name.getMethodName());
+    Table table = null;
+    try {
+      table = createTableAndLoadData(tn);
+      final RegionInfo fakeHri =
+        RegionInfoBuilder.newBuilder(table.getName()).setStartKey(Bytes.toBytes("a"))
+          .setEndKey(Bytes.toBytes("b")).setReplicaId(1)
+          .setRegionId(System.currentTimeMillis()).build();
+
+      // To test AssignProcedure can defend this case.
+      HTU.getMiniHBaseCluster().getMaster().getAssignmentManager().assign(fakeHri);
+      // Wait until all assigns are done.
+      HBaseTestingUtility.await(50, () -> {
+        return HTU.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getActiveProcIds()
+          .isEmpty();
+      });
+
+      // Make sure the region is not online.
       for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
         for (Region r : rs.getRegionServer().getRegions(table.getName())) {
-          count++;
+          assertNotEquals(r.getRegionInfo(), fakeHri);
         }
       }
-      if (count >= 9) {
-        break;
+    } finally {
+      if (table != null) {
+        HTU.deleteTable(tn);
       }
-      count = 0;
     }
-
-    RegionReplicaTestHelper.assertReplicaDistributed(HTU, table);
   }
 }