You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2014/12/16 03:32:06 UTC

hbase git commit: HBASE-12686 Failures in split before PONR not clearing the daughter regions from regions in transition during rollback (Vandana Ayyalasomayajula)

Repository: hbase
Updated Branches:
  refs/heads/0.98 b5f645e02 -> a5d2abaa9


HBASE-12686 Failures in split before PONR not clearing the daughter regions from regions in transition during rollback (Vandana Ayyalasomayajula)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a5d2abaa
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a5d2abaa
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a5d2abaa

Branch: refs/heads/0.98
Commit: a5d2abaa9f3798ed83d459d45c4c999dea0094bd
Parents: b5f645e
Author: Andrew Purtell <ap...@apache.org>
Authored: Mon Dec 15 18:30:40 2014 -0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Mon Dec 15 18:30:40 2014 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/master/AssignmentManager.java  | 38 +++++++++++++++-----
 .../TestSplitTransactionOnCluster.java          | 34 ++++++++++++++++++
 2 files changed, 64 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/a5d2abaa/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index fc80d9c..f0fe635 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.NavigableMap;
 import java.util.Set;
 import java.util.TreeMap;
@@ -170,6 +171,9 @@ public class AssignmentManager extends ZooKeeperListener {
   private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
     = new HashMap<String, PairOfSameType<HRegionInfo>>();
 
+  private final Map<HRegionInfo, PairOfSameType<HRegionInfo>> splitRegions
+  = new HashMap<HRegionInfo, PairOfSameType<HRegionInfo>>();
+
   /**
    * The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
    * failure due to lack of availability of region plan or bad region plan
@@ -1308,14 +1312,30 @@ public class AssignmentManager extends ZooKeeperListener {
             boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
             ServerName serverName = rs.getServerName();
             if (serverManager.isServerOnline(serverName)) {
-              if (rs.isOnServer(serverName)
-                  && (rs.isOpened() || rs.isSplitting())) {
-                regionOnline(regionInfo, serverName);
-                if (disabled) {
-                  // if server is offline, no hurt to unassign again
-                  LOG.info("Opened " + regionNameStr
-                    + "but this table is disabled, triggering close of region");
-                  unassign(regionInfo);
+              if (rs.isOnServer(serverName) && (rs.isOpened() || rs.isSplitting())) {
+                synchronized (regionStates) {
+                  regionOnline(regionInfo, serverName);
+                  if (rs.isSplitting() && splitRegions.containsKey(regionInfo)) {
+                    // Check if the daugter regions are still there, if they are present, offline
+                    // as its the case of a rollback.
+                    HRegionInfo hri_a = splitRegions.get(regionInfo).getFirst();
+                    HRegionInfo hri_b = splitRegions.get(regionInfo).getSecond();
+                    if (!regionStates.isRegionInTransition(hri_a.getEncodedName())) {
+                      LOG.warn("Split daughter region not in transition " + hri_a);
+                    }
+                    if (!regionStates.isRegionInTransition(hri_b.getEncodedName())) {
+                      LOG.warn("Split daughter region not in transition" + hri_b);
+                    }
+                    regionOffline(hri_a);
+                    regionOffline(hri_b);
+                    splitRegions.remove(regionInfo);
+                  }
+                  if (disabled) {
+                    // if server is offline, no hurt to unassign again
+                    LOG.info("Opened " + regionNameStr
+                        + "but this table is disabled, triggering close of region");
+                    unassign(regionInfo);
+                  }
                 }
               } else if (rs.isMergingNew()) {
                 synchronized (regionStates) {
@@ -3980,6 +4000,7 @@ public class AssignmentManager extends ZooKeeperListener {
     }
 
     synchronized (regionStates) {
+      splitRegions.put(p, new PairOfSameType<HRegionInfo>(hri_a, hri_b));
       regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
       regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
       regionStates.updateRegionState(rt, State.SPLITTING);
@@ -3995,6 +4016,7 @@ public class AssignmentManager extends ZooKeeperListener {
         regionOffline(p, State.SPLIT);
         regionOnline(hri_a, sn);
         regionOnline(hri_b, sn);
+        splitRegions.remove(p);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/a5d2abaa/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index e6b9725..b6744e0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -1162,6 +1162,40 @@ public class TestSplitTransactionOnCluster {
     return(null);
   }
 
+  @Test
+  public void testFailedSplit() throws Exception {
+    TableName tableName = TableName.valueOf("testFailedSplit");
+    byte[] colFamily = Bytes.toBytes("info");
+    TESTING_UTIL.createTable(tableName, colFamily);
+    HTable table = new HTable(TESTING_UTIL.getConfiguration(), tableName);
+    try {
+      TESTING_UTIL.loadTable(table, colFamily);
+      List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
+      assertTrue(regions.size() == 1);
+      final HRegion actualRegion = cluster.getRegions(tableName).get(0);
+      actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
+        Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
+
+      // The following split would fail.
+      admin.split(tableName.getNameAsString());
+      FailingSplitRegionObserver.latch.await();
+      LOG.info("Waiting for region to come out of RIT");
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
+          Map<String, RegionState> rit = regionStates.getRegionsInTransition();
+          return !rit.containsKey(actualRegion.getRegionInfo().getEncodedName());
+        }
+      });
+      regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
+      assertTrue(regions.size() == 1);
+    } finally {
+      table.close();
+      TESTING_UTIL.deleteTable(tableName);
+    }
+  }
+
   private List<HRegion> checkAndGetDaughters(byte[] tableName)
       throws InterruptedException {
     List<HRegion> daughters = null;