You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by hu...@apache.org on 2018/01/03 20:33:56 UTC
hbase git commit: HBASE-18625 Splitting of region with replica,
doesn't update region list in serverHolding. A server crash leads to
overlap.
Repository: hbase
Updated Branches:
refs/heads/branch-1 6faed49ad -> fd33b3589
HBASE-18625 Splitting of region with replica, doesn't update region list in serverHolding. A server crash leads to overlap.
Signed-off-by: ramkrishna.s.vasudevan<ra...@intel.com>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/fd33b358
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/fd33b358
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/fd33b358
Branch: refs/heads/branch-1
Commit: fd33b3589e9ce03ed478552628403d5c08ae2394
Parents: 6faed49
Author: huaxiang sun <hu...@apache.org>
Authored: Wed Jan 3 12:24:05 2018 -0800
Committer: Huaxiang Sun <hs...@cloudera.com>
Committed: Wed Jan 3 12:32:44 2018 -0800
----------------------------------------------------------------------
.../hadoop/hbase/master/RegionStates.java | 33 ++++++-
.../TestCatalogJanitorInMemoryStates.java | 91 +++++++++++++++++---
2 files changed, 112 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/fd33b358/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 599e649..7ce6257 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -596,6 +596,19 @@ public class RegionStates {
}
/**
+ * Used in some unit tests
+ */
+ @VisibleForTesting
+ synchronized boolean existsInServerHoldings(final ServerName serverName,
+ final HRegionInfo hri) {
+ Set<HRegionInfo> oldRegions = serverHoldings.get(serverName);
+ if (oldRegions != null) {
+ return oldRegions.contains(hri);
+ }
+ return false;
+ }
+
+ /**
* A dead server's wals have been split so that all the regions
* used to be open on it can be safely assigned now. Mark them assignable.
*/
@@ -664,8 +677,26 @@ public class RegionStates {
deleteRegion(hri);
return;
}
+
+ /*
+ * One tricky case, if region here is a replica region and its parent is at
+ * SPLIT state, its newState should be same as its parent, not OFFLINE.
+ */
State newState =
- expectedState == null ? State.OFFLINE : expectedState;
+ expectedState == null ? State.OFFLINE : expectedState;
+
+ if ((expectedState == null) && !RegionReplicaUtil.isDefaultReplica(hri)) {
+ RegionState primateState = getRegionState(
+ RegionReplicaUtil.getRegionInfoForDefaultReplica(hri));
+ if ((primateState != null) && (primateState.getState() == State.SPLIT)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Update region " + hri + "to SPLIT, from primary region " +
+ RegionReplicaUtil.getRegionInfoForDefaultReplica(hri));
+ }
+ newState = State.SPLIT;
+ }
+ }
+
updateRegionState(hri, newState);
String encodedName = hri.getEncodedName();
synchronized (this) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/fd33b358/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
index 34cf19f..5ec3d6a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
@@ -18,21 +18,20 @@
*/
package org.apache.hadoop.hbase.master;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.Threads;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.junit.AfterClass;
@@ -44,12 +43,6 @@ import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.junit.rules.TestRule;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertNotNull;
@Category({MasterTests.class, MediumTests.class})
@@ -126,6 +119,57 @@ public class TestCatalogJanitorInMemoryStates {
}
+ /**
+ * Test that after replica parent region is split, the parent replica region is removed from
+ * AM's serverHoldings and
+ */
+ @Test(timeout = 180000)
+ public void testInMemoryForReplicaParentCleanup() throws IOException, InterruptedException {
+ final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+ final CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().catalogJanitorChore;
+
+ final TableName tableName = TableName.valueOf("testInMemoryForReplicaParentCleanup");
+ HTableDescriptor hdt = TEST_UTIL.createTableDescriptor(tableName.getNameAsString());
+ hdt.setRegionReplication(2);
+ TEST_UTIL.createTable(hdt, new byte[][] { FAMILY }, TEST_UTIL.getConfiguration());
+
+ RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
+ List<HRegionLocation> allRegionLocations = locator.getAllRegionLocations();
+
+ // There are two regions, one for primary, one for the replica.
+ assertTrue(allRegionLocations.size() == 2);
+
+ HRegionLocation replicaParentRegion, primaryParentRegion;
+ if (RegionReplicaUtil.isDefaultReplica(
+ allRegionLocations.get(0).getRegionInfo().getReplicaId())) {
+ primaryParentRegion = allRegionLocations.get(0);
+ replicaParentRegion = allRegionLocations.get(1);
+ } else {
+ primaryParentRegion = allRegionLocations.get(1);
+ replicaParentRegion = allRegionLocations.get(0);
+ }
+
+ List<HRegionLocation> primaryDaughters = splitRegion(primaryParentRegion.getRegionInfo(),
+ Bytes.toBytes("a"));
+
+ // Wait until the replica parent region is offline.
+ while (am.getRegionStates().isRegionOnline(replicaParentRegion.getRegionInfo())) {
+ Thread.sleep(100);
+ }
+
+ assertNotNull("Should have found daughter regions for " + primaryDaughters, primaryDaughters);
+
+ // check that primary parent region is not in AM's serverHoldings
+ assertFalse("Primary Parent region should have been removed from RegionState's serverHoldings",
+ am.getRegionStates().existsInServerHoldings(primaryParentRegion.getServerName(),
+ primaryParentRegion.getRegionInfo()));
+
+ // check that primary parent region is not in AM's serverHoldings
+ assertFalse("Primary Parent region should have been removed from RegionState's serverHoldings",
+ am.getRegionStates().existsInServerHoldings(replicaParentRegion.getServerName(),
+ replicaParentRegion.getRegionInfo()));
+ }
+
/*
* Splits a region
* @param t Region to split.
@@ -152,6 +196,31 @@ public class TestCatalogJanitorInMemoryStates {
}
/*
+* Splits a region
+* @param t Region to split.
+* @return List of region locations
+* @throws IOException, InterruptedException
+*/
+ private List<HRegionLocation> splitRegion(final HRegionInfo r, final byte[] splitPoint)
+ throws IOException, InterruptedException {
+ List<HRegionLocation> locations = new ArrayList<>();
+ // Split this table in two.
+ Admin admin = TEST_UTIL.getHBaseAdmin();
+ Connection connection = TEST_UTIL.getConnection();
+ admin.splitRegion(r.getEncodedNameAsBytes(), splitPoint);
+ admin.close();
+ PairOfSameType<HRegionInfo> regions = waitOnDaughters(r);
+ if (regions != null) {
+ try (RegionLocator rl = connection.getRegionLocator(r.getTable())) {
+ locations.add(rl.getRegionLocation(regions.getFirst().getEncodedNameAsBytes()));
+ locations.add(rl.getRegionLocation(regions.getSecond().getEncodedNameAsBytes()));
+ }
+ return locations;
+ }
+ return locations;
+ }
+
+ /*
* Wait on region split. May return because we waited long enough on the split
* and it didn't happen. Caller should check.
* @param r