You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by hu...@apache.org on 2018/01/03 20:33:56 UTC

hbase git commit: HBASE-18625 Splitting of region with replica, doesn't update region list in serverHolding. A server crash leads to overlap.

Repository: hbase
Updated Branches:
  refs/heads/branch-1 6faed49ad -> fd33b3589


HBASE-18625 Splitting of region with replica, doesn't update region list in serverHolding. A server crash leads to overlap.

Signed-off-by: ramkrishna.s.vasudevan<ra...@intel.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/fd33b358
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/fd33b358
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/fd33b358

Branch: refs/heads/branch-1
Commit: fd33b3589e9ce03ed478552628403d5c08ae2394
Parents: 6faed49
Author: huaxiang sun <hu...@apache.org>
Authored: Wed Jan 3 12:24:05 2018 -0800
Committer: Huaxiang Sun <hs...@cloudera.com>
Committed: Wed Jan 3 12:32:44 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/master/RegionStates.java       | 33 ++++++-
 .../TestCatalogJanitorInMemoryStates.java       | 91 +++++++++++++++++---
 2 files changed, 112 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/fd33b358/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 599e649..7ce6257 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -596,6 +596,19 @@ public class RegionStates {
   }
 
   /**
+   * Used in some unit tests
+   */
+  @VisibleForTesting
+  synchronized boolean existsInServerHoldings(final ServerName serverName,
+      final HRegionInfo hri) {
+    Set<HRegionInfo> oldRegions = serverHoldings.get(serverName);
+    if (oldRegions != null) {
+      return oldRegions.contains(hri);
+    }
+    return false;
+  }
+
+  /**
    * A dead server's wals have been split so that all the regions
    * used to be open on it can be safely assigned now. Mark them assignable.
    */
@@ -664,8 +677,26 @@ public class RegionStates {
       deleteRegion(hri);
       return;
     }
+
+    /*
+     * One tricky case, if region here is a replica region and its parent is at
+     * SPLIT state, its newState should be same as its parent, not OFFLINE.
+     */
     State newState =
-      expectedState == null ? State.OFFLINE : expectedState;
+        expectedState == null ? State.OFFLINE : expectedState;
+
+    if ((expectedState == null) && !RegionReplicaUtil.isDefaultReplica(hri)) {
+      RegionState primateState = getRegionState(
+          RegionReplicaUtil.getRegionInfoForDefaultReplica(hri));
+      if ((primateState != null) && (primateState.getState() == State.SPLIT)) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Update region " + hri + "to SPLIT, from primary region " +
+              RegionReplicaUtil.getRegionInfoForDefaultReplica(hri));
+        }
+        newState = State.SPLIT;
+      }
+    }
+
     updateRegionState(hri, newState);
     String encodedName = hri.getEncodedName();
     synchronized (this) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/fd33b358/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
index 34cf19f..5ec3d6a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
@@ -18,21 +18,20 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.client.*;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.PairOfSameType;
 import org.apache.hadoop.hbase.util.Threads;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import org.junit.AfterClass;
@@ -44,12 +43,6 @@ import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
 import org.junit.rules.TestRule;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertNotNull;
 
 @Category({MasterTests.class, MediumTests.class})
@@ -126,6 +119,57 @@ public class TestCatalogJanitorInMemoryStates {
 
   }
 
+  /**
+   * Test that after replica parent region is split, the parent replica region is removed from
+   * AM's serverHoldings and
+   */
+  @Test(timeout = 180000)
+  public void testInMemoryForReplicaParentCleanup() throws IOException, InterruptedException {
+    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    final CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().catalogJanitorChore;
+
+    final TableName tableName = TableName.valueOf("testInMemoryForReplicaParentCleanup");
+    HTableDescriptor hdt = TEST_UTIL.createTableDescriptor(tableName.getNameAsString());
+    hdt.setRegionReplication(2);
+    TEST_UTIL.createTable(hdt, new byte[][] { FAMILY }, TEST_UTIL.getConfiguration());
+
+    RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
+    List<HRegionLocation> allRegionLocations = locator.getAllRegionLocations();
+
+    // There are two regions, one for primary, one for the replica.
+    assertTrue(allRegionLocations.size() == 2);
+
+    HRegionLocation replicaParentRegion, primaryParentRegion;
+    if (RegionReplicaUtil.isDefaultReplica(
+        allRegionLocations.get(0).getRegionInfo().getReplicaId())) {
+      primaryParentRegion = allRegionLocations.get(0);
+      replicaParentRegion = allRegionLocations.get(1);
+    } else {
+      primaryParentRegion = allRegionLocations.get(1);
+      replicaParentRegion = allRegionLocations.get(0);
+    }
+
+    List<HRegionLocation> primaryDaughters = splitRegion(primaryParentRegion.getRegionInfo(),
+        Bytes.toBytes("a"));
+
+    // Wait until the replica parent region is offline.
+    while (am.getRegionStates().isRegionOnline(replicaParentRegion.getRegionInfo())) {
+      Thread.sleep(100);
+    }
+
+    assertNotNull("Should have found daughter regions for " + primaryDaughters, primaryDaughters);
+
+    // check that primary parent region is not in AM's serverHoldings
+    assertFalse("Primary Parent region should have been removed from RegionState's serverHoldings",
+        am.getRegionStates().existsInServerHoldings(primaryParentRegion.getServerName(),
+            primaryParentRegion.getRegionInfo()));
+
+    // check that primary parent region is not in AM's serverHoldings
+    assertFalse("Primary Parent region should have been removed from RegionState's serverHoldings",
+        am.getRegionStates().existsInServerHoldings(replicaParentRegion.getServerName(),
+            replicaParentRegion.getRegionInfo()));
+  }
+
   /*
  * Splits a region
  * @param t Region to split.
@@ -152,6 +196,31 @@ public class TestCatalogJanitorInMemoryStates {
   }
 
   /*
+* Splits a region
+* @param t Region to split.
+* @return List of region locations
+* @throws IOException, InterruptedException
+*/
+  private List<HRegionLocation> splitRegion(final HRegionInfo r, final byte[] splitPoint)
+      throws IOException, InterruptedException {
+    List<HRegionLocation> locations = new ArrayList<>();
+    // Split this table in two.
+    Admin admin = TEST_UTIL.getHBaseAdmin();
+    Connection connection = TEST_UTIL.getConnection();
+    admin.splitRegion(r.getEncodedNameAsBytes(), splitPoint);
+    admin.close();
+    PairOfSameType<HRegionInfo> regions = waitOnDaughters(r);
+    if (regions != null) {
+      try (RegionLocator rl = connection.getRegionLocator(r.getTable())) {
+        locations.add(rl.getRegionLocation(regions.getFirst().getEncodedNameAsBytes()));
+        locations.add(rl.getRegionLocation(regions.getSecond().getEncodedNameAsBytes()));
+      }
+      return locations;
+    }
+    return locations;
+  }
+
+  /*
    * Wait on region split. May return because we waited long enough on the split
    * and it didn't happen.  Caller should check.
    * @param r