You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by hu...@apache.org on 2020/06/26 18:22:31 UTC
[hbase] branch branch-2.3 updated: HBASE-24552 Replica region needs
to check if primary region directory exists at file system in
TransitRegionStateProcedure (#1924) (#1972)
This is an automated email from the ASF dual-hosted git repository.
huaxiangsun pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 809a623 HBASE-24552 Replica region needs to check if primary region directory exists at file system in TransitRegionStateProcedure (#1924) (#1972)
809a623 is described below
commit 809a62386bc5d90a663567d7a633e6259595877f
Author: huaxiangsun <hu...@apache.org>
AuthorDate: Fri Jun 26 11:22:12 2020 -0700
HBASE-24552 Replica region needs to check if primary region directory exists at file system in TransitRegionStateProcedure (#1924) (#1972)
Signed-off-by: stack <st...@apache.org>
---
.../assignment/TransitRegionStateProcedure.java | 13 ++++
.../apache/hadoop/hbase/HBaseTestingUtility.java | 19 +++++
.../apache/hadoop/hbase/master/TestMetaFixer.java | 36 +++------
.../master/assignment/TestRegionReplicaSplit.java | 86 ++++++++++++++++------
4 files changed, 103 insertions(+), 51 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
index d88f812..ed992fe 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -338,6 +338,19 @@ public class TransitRegionStateProcedure
try {
switch (state) {
case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
+ // Need to do some sanity check for replica region, if the region does not exist at
+ // master, do not try to assign the replica region, log error and return.
+ if (!RegionReplicaUtil.isDefaultReplica(regionNode.getRegionInfo())) {
+ RegionInfo defaultRI =
+ RegionReplicaUtil.getRegionInfoForDefaultReplica(regionNode.getRegionInfo());
+ if (env.getMasterServices().getAssignmentManager().getRegionStates().
+ getRegionStateNode(defaultRI) == null) {
+ LOG.error(
+ "Cannot assign replica region {} because its primary region {} does not exist.",
+ regionNode.getRegionInfo(), defaultRI);
+ return Flow.NO_MORE_STATE;
+ }
+ }
queueAssign(env, regionNode);
return Flow.HAS_MORE_STATE;
case REGION_STATE_TRANSITION_OPEN:
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index e078653..58b4457 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -51,6 +51,7 @@ import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
+import java.util.function.BooleanSupplier;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
@@ -4467,4 +4468,22 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
ColumnFamilyDescriptor.COMPARATOR.compare(it.next(), it2.next()));
}
}
+
+ /**
+ * Await the successful return of {@code condition}, sleeping {@code sleepMillis} between
+ * invocations.
+ */
+ public static void await(final long sleepMillis, final BooleanSupplier condition)
+ throws InterruptedException {
+ try {
+ while (!condition.getAsBoolean()) {
+ Thread.sleep(sleepMillis);
+ }
+ } catch (RuntimeException e) {
+ if (e.getCause() instanceof AssertionError) {
+ throw (AssertionError) e.getCause();
+ }
+ throw e;
+ }
+ }
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
index a36ef88..bc69f93 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java
@@ -25,7 +25,6 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.function.BooleanSupplier;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellBuilderFactory;
import org.apache.hadoop.hbase.CellBuilderType;
@@ -114,7 +113,8 @@ public class TestMetaFixer {
// wait for RITs to settle -- those are the fixed regions being assigned -- or until the
// watchdog TestRule terminates the test.
- await(50, () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition()));
+ HBaseTestingUtility.await(50,
+ () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition()));
ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
assertEquals(originalCount, ris.size());
@@ -191,7 +191,7 @@ public class TestMetaFixer {
MetaFixer fixer = new MetaFixer(services);
fixer.fixOverlaps(report);
- await(10, () -> {
+ HBaseTestingUtility. await(10, () -> {
try {
if (cj.scan() > 0) {
// It submits GC once, then it will immediately kick off another GC to test if
@@ -215,7 +215,7 @@ public class TestMetaFixer {
});
// Wait until all GCs settled down
- await(10, () -> {
+ HBaseTestingUtility.await(10, () -> {
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
});
@@ -258,7 +258,7 @@ public class TestMetaFixer {
fixer.fixOverlaps(report);
AssignmentManager am = services.getAssignmentManager();
- await(200, () -> {
+ HBaseTestingUtility.await(200, () -> {
try {
cj.scan();
final CatalogJanitor.Report postReport = cj.getLastReport();
@@ -291,7 +291,7 @@ public class TestMetaFixer {
report = cj.getLastReport();
fixer.fixOverlaps(report);
- await(20, () -> {
+ HBaseTestingUtility.await(20, () -> {
try {
// Make sure it GC only once.
return (cj.scan() > 0);
@@ -359,7 +359,7 @@ public class TestMetaFixer {
fixer.fixOverlaps(report);
// Wait until all procedures settled down
- await(200, () -> {
+ HBaseTestingUtility.await(200, () -> {
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
});
@@ -371,7 +371,7 @@ public class TestMetaFixer {
fixer.fixOverlaps(report);
// Wait until all procedures settled down
- await(200, () -> {
+ HBaseTestingUtility.await(200, () -> {
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
});
@@ -414,7 +414,7 @@ public class TestMetaFixer {
assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
MetaFixer fixer = new MetaFixer(services);
fixer.fixOverlaps(report);
- await(10, () -> {
+ HBaseTestingUtility.await(10, () -> {
try {
services.getCatalogJanitor().scan();
final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport();
@@ -424,22 +424,4 @@ public class TestMetaFixer {
}
});
}
-
- /**
- * Await the successful return of {@code condition}, sleeping {@code sleepMillis} between
- * invocations.
- */
- private static void await(final long sleepMillis, final BooleanSupplier condition)
- throws InterruptedException {
- try {
- while (!condition.getAsBoolean()) {
- Thread.sleep(sleepMillis);
- }
- } catch (RuntimeException e) {
- if (e.getCause() instanceof AssertionError) {
- throw (AssertionError) e.getCause();
- }
- throw e;
- }
- }
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java
index de45183..0d3790f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java
@@ -18,6 +18,7 @@
*/
package org.apache.hadoop.hbase.master.assignment;
+import static org.junit.Assert.assertNotEquals;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -28,6 +29,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionReplicaTestHelper;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
@@ -56,7 +58,6 @@ public class TestRegionReplicaSplit {
private static final Logger LOG = LoggerFactory.getLogger(TestRegionReplicaSplit.class);
private static final int NB_SERVERS = 4;
- private static Table table;
private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
private static final byte[] f = HConstants.CATALOG_FAMILY;
@@ -65,21 +66,19 @@ public class TestRegionReplicaSplit {
public static void beforeClass() throws Exception {
HTU.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3);
HTU.startMiniCluster(NB_SERVERS);
- final TableName tableName = TableName.valueOf(TestRegionReplicaSplit.class.getSimpleName());
-
- // Create table then get the single region for our new table.
- createTable(tableName);
}
@Rule
public TestName name = new TestName();
- private static void createTable(final TableName tableName) throws IOException {
+ private static Table createTableAndLoadData(final TableName tableName) throws IOException {
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
builder.setRegionReplication(3);
// create a table with 3 replication
- table = HTU.createTable(builder.build(), new byte[][] { f }, getSplits(2),
+ Table table = HTU.createTable(builder.build(), new byte[][] { f }, getSplits(2),
new Configuration(HTU.getConfiguration()));
+ HTU.loadTable(HTU.getConnection().getTable(tableName), f);
+ return table;
}
private static byte[][] getSplits(int numRegions) {
@@ -92,35 +91,74 @@ public class TestRegionReplicaSplit {
@AfterClass
public static void afterClass() throws Exception {
HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false;
- table.close();
HTU.shutdownMiniCluster();
}
@Test
public void testRegionReplicaSplitRegionAssignment() throws Exception {
- HTU.loadNumericRows(table, f, 0, 3);
- // split the table
- List<RegionInfo> regions = new ArrayList<RegionInfo>();
- for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
- for (Region r : rs.getRegionServer().getRegions(table.getName())) {
- regions.add(r.getRegionInfo());
+ TableName tn = TableName.valueOf(this.name.getMethodName());
+ Table table = null;
+ try {
+ table = createTableAndLoadData(tn);
+ HTU.loadNumericRows(table, f, 0, 3);
+ // split the table
+ List<RegionInfo> regions = new ArrayList<RegionInfo>();
+ for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
+ for (Region r : rs.getRegionServer().getRegions(table.getName())) {
+ regions.add(r.getRegionInfo());
+ }
+ }
+ // There are 6 regions before split, 9 regions after split.
+ HTU.getAdmin().split(table.getName(), Bytes.toBytes(1));
+ int count = 0;
+ while (true) {
+ for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
+ for (Region r : rs.getRegionServer().getRegions(table.getName())) {
+ count++;
+ }
+ }
+ if (count >= 9) {
+ break;
+ }
+ count = 0;
+ }
+ RegionReplicaTestHelper.assertReplicaDistributed(HTU, table);
+ } finally {
+ if (table != null) {
+ HTU.deleteTable(tn);
}
}
- // There are 6 regions before split, 9 regions after split.
- HTU.getAdmin().split(table.getName(), Bytes.toBytes(1));
- int count = 0;
- while (true) {
+ }
+
+ @Test
+ public void testAssignFakeReplicaRegion() throws Exception {
+ TableName tn = TableName.valueOf(this.name.getMethodName());
+ Table table = null;
+ try {
+ table = createTableAndLoadData(tn);
+ final RegionInfo fakeHri =
+ RegionInfoBuilder.newBuilder(table.getName()).setStartKey(Bytes.toBytes("a"))
+ .setEndKey(Bytes.toBytes("b")).setReplicaId(1)
+ .setRegionId(System.currentTimeMillis()).build();
+
+ // To test AssignProcedure can defend this case.
+ HTU.getMiniHBaseCluster().getMaster().getAssignmentManager().assign(fakeHri);
+ // Wait until all assigns are done.
+ HBaseTestingUtility.await(50, () -> {
+ return HTU.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getActiveProcIds()
+ .isEmpty();
+ });
+
+ // Make sure the region is not online.
for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
for (Region r : rs.getRegionServer().getRegions(table.getName())) {
- count++;
+ assertNotEquals(r.getRegionInfo(), fakeHri);
}
}
- if (count >= 9) {
- break;
+ } finally {
+ if (table != null) {
+ HTU.deleteTable(tn);
}
- count = 0;
}
-
- RegionReplicaTestHelper.assertReplicaDistributed(HTU, table);
}
}