You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2014/12/16 03:32:06 UTC
hbase git commit: HBASE-12686 Failures in split before PONR not
clearing the daughter regions from regions in transition during rollback
(Vandana Ayyalasomayajula)
Repository: hbase
Updated Branches:
refs/heads/0.98 b5f645e02 -> a5d2abaa9
HBASE-12686 Failures in split before PONR not clearing the daughter regions from regions in transition during rollback (Vandana Ayyalasomayajula)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a5d2abaa
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a5d2abaa
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a5d2abaa
Branch: refs/heads/0.98
Commit: a5d2abaa9f3798ed83d459d45c4c999dea0094bd
Parents: b5f645e
Author: Andrew Purtell <ap...@apache.org>
Authored: Mon Dec 15 18:30:40 2014 -0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Mon Dec 15 18:30:40 2014 -0800
----------------------------------------------------------------------
.../hadoop/hbase/master/AssignmentManager.java | 38 +++++++++++++++-----
.../TestSplitTransactionOnCluster.java | 34 ++++++++++++++++++
2 files changed, 64 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/a5d2abaa/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index fc80d9c..f0fe635 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
@@ -170,6 +171,9 @@ public class AssignmentManager extends ZooKeeperListener {
private final Map<String, PairOfSameType<HRegionInfo>> mergingRegions
= new HashMap<String, PairOfSameType<HRegionInfo>>();
+ private final Map<HRegionInfo, PairOfSameType<HRegionInfo>> splitRegions
+ = new HashMap<HRegionInfo, PairOfSameType<HRegionInfo>>();
+
/**
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
* failure due to lack of availability of region plan or bad region plan
@@ -1308,14 +1312,30 @@ public class AssignmentManager extends ZooKeeperListener {
boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable());
ServerName serverName = rs.getServerName();
if (serverManager.isServerOnline(serverName)) {
- if (rs.isOnServer(serverName)
- && (rs.isOpened() || rs.isSplitting())) {
- regionOnline(regionInfo, serverName);
- if (disabled) {
- // if server is offline, no hurt to unassign again
- LOG.info("Opened " + regionNameStr
- + "but this table is disabled, triggering close of region");
- unassign(regionInfo);
+ if (rs.isOnServer(serverName) && (rs.isOpened() || rs.isSplitting())) {
+ synchronized (regionStates) {
+ regionOnline(regionInfo, serverName);
+ if (rs.isSplitting() && splitRegions.containsKey(regionInfo)) {
+ // Check if the daugter regions are still there, if they are present, offline
+ // as its the case of a rollback.
+ HRegionInfo hri_a = splitRegions.get(regionInfo).getFirst();
+ HRegionInfo hri_b = splitRegions.get(regionInfo).getSecond();
+ if (!regionStates.isRegionInTransition(hri_a.getEncodedName())) {
+ LOG.warn("Split daughter region not in transition " + hri_a);
+ }
+ if (!regionStates.isRegionInTransition(hri_b.getEncodedName())) {
+ LOG.warn("Split daughter region not in transition" + hri_b);
+ }
+ regionOffline(hri_a);
+ regionOffline(hri_b);
+ splitRegions.remove(regionInfo);
+ }
+ if (disabled) {
+ // if server is offline, no hurt to unassign again
+ LOG.info("Opened " + regionNameStr
+ + "but this table is disabled, triggering close of region");
+ unassign(regionInfo);
+ }
}
} else if (rs.isMergingNew()) {
synchronized (regionStates) {
@@ -3980,6 +4000,7 @@ public class AssignmentManager extends ZooKeeperListener {
}
synchronized (regionStates) {
+ splitRegions.put(p, new PairOfSameType<HRegionInfo>(hri_a, hri_b));
regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn);
regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn);
regionStates.updateRegionState(rt, State.SPLITTING);
@@ -3995,6 +4016,7 @@ public class AssignmentManager extends ZooKeeperListener {
regionOffline(p, State.SPLIT);
regionOnline(hri_a, sn);
regionOnline(hri_b, sn);
+ splitRegions.remove(p);
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/a5d2abaa/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index e6b9725..b6744e0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -1162,6 +1162,40 @@ public class TestSplitTransactionOnCluster {
return(null);
}
+ @Test
+ public void testFailedSplit() throws Exception {
+ TableName tableName = TableName.valueOf("testFailedSplit");
+ byte[] colFamily = Bytes.toBytes("info");
+ TESTING_UTIL.createTable(tableName, colFamily);
+ HTable table = new HTable(TESTING_UTIL.getConfiguration(), tableName);
+ try {
+ TESTING_UTIL.loadTable(table, colFamily);
+ List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
+ assertTrue(regions.size() == 1);
+ final HRegion actualRegion = cluster.getRegions(tableName).get(0);
+ actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
+ Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
+
+ // The following split would fail.
+ admin.split(tableName.getNameAsString());
+ FailingSplitRegionObserver.latch.await();
+ LOG.info("Waiting for region to come out of RIT");
+ TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() throws Exception {
+ RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
+ Map<String, RegionState> rit = regionStates.getRegionsInTransition();
+ return !rit.containsKey(actualRegion.getRegionInfo().getEncodedName());
+ }
+ });
+ regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
+ assertTrue(regions.size() == 1);
+ } finally {
+ table.close();
+ TESTING_UTIL.deleteTable(tableName);
+ }
+ }
+
private List<HRegion> checkAndGetDaughters(byte[] tableName)
throws InterruptedException {
List<HRegion> daughters = null;