You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2013/04/04 22:21:17 UTC
svn commit: r1464724 - in /hbase/branches/0.94/src:
main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/master/handler/
test/java/org/apache/hadoop/hbase/master/
test/java/org/apache/hadoop/hbase/regionserver/
Author: tedyu
Date: Thu Apr 4 20:21:17 2013
New Revision: 1464724
URL: http://svn.apache.org/r1464724
Log:
HBASE-8127 Region of a disabling or disabled table could be stuck in transition state when RS dies during Master initialization (Rajeshbabu)
Modified:
hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java
Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1464724&r1=1464723&r2=1464724&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Apr 4 20:21:17 2013
@@ -2836,6 +2836,12 @@ public class AssignmentManager extends Z
// see HBASE-5916
if (actualDeadServers.contains(deadServer.getKey())) {
for (Pair<HRegionInfo, Result> deadRegion : deadServer.getValue()) {
+ HRegionInfo hri = deadRegion.getFirst();
+ // Delete znode of region in transition if table is disabled or disabling. If a region
+ // server went down during master initialization then SSH cannot handle the regions of
+ // partially disabled tables because in memory region state information may not be
+ // available with master.
+ deleteNodeAndOfflineRegion(hri);
nodes.remove(deadRegion.getFirst().getEncodedName());
}
continue;
@@ -2884,6 +2890,22 @@ public class AssignmentManager extends Z
}
}
+ /**
+ * Delete znode of region in transition if table is disabling/disabled and offline the region.
+ * @param hri region to offline.
+ */
+ public void deleteNodeAndOfflineRegion(HRegionInfo hri) {
+ if (zkTable.isDisablingOrDisabledTable(hri.getTableNameAsString())) {
+ try {
+ // If table is partially disabled then delete znode if exists in any state.
+ ZKAssign.deleteNodeFailSilent(this.master.getZooKeeper(), hri);
+ } catch (KeeperException ke) {
+ this.master.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
+ }
+ regionOffline(hri);
+ }
+ }
+
/*
* Presumes caller has taken care of necessary locking modifying servers Map.
* @param hsi
@@ -3580,7 +3602,7 @@ public class AssignmentManager extends Z
out.writeLong(stamp.get());
}
}
-
+
public void stop() {
this.timeoutMonitor.interrupt();
this.timerUpdater.interrupt();
Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1464724&r1=1464723&r2=1464724&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Thu Apr 4 20:21:17 2013
@@ -288,31 +288,29 @@ public class ServerShutdownHandler exten
// If the table was partially disabled and the RS went down, we should clear the RIT
// and remove the node for the region. The rit that we use may be stale in case the table
// was in DISABLING state but though we did assign we will not be clearing the znode in
- // CLOSING state. Doing this will have no harm. See HBASE-5927
- toAssign = checkForDisablingOrDisabledTables(ritsGoingToServer, toAssign, rit, assignmentManager);
+ // CLOSING state. Doing this will have no harm. The rit can be null if region server went
+ // down during master startup. In that case If any znodes' exists for partially disabled
+ // table regions deleting them during startup only. See HBASE-8127.
+ toAssign =
+ checkForDisablingOrDisabledTables(ritsGoingToServer, toAssign, rit, e.getKey(),
+ assignmentManager);
}
return toAssign;
}
private List<HRegionInfo> checkForDisablingOrDisabledTables(Set<HRegionInfo> regionsFromRIT,
- List<HRegionInfo> toAssign, RegionState rit, AssignmentManager assignmentManager) {
- if (rit == null) {
- return toAssign;
- }
- if (!rit.isClosing() && !rit.isPendingClose()) {
- return toAssign;
- }
- if (!assignmentManager.getZKTable().isDisablingOrDisabledTable(
- rit.getRegion().getTableNameAsString())) {
- return toAssign;
- }
- HRegionInfo hri = rit.getRegion();
- AssignmentManager am = assignmentManager;
- am.deleteClosingOrClosedNode(hri);
- am.regionOffline(hri);
- // To avoid region assignment if table is in disabling or disabled state.
- toAssign.remove(hri);
- regionsFromRIT.remove(hri);
+ List<HRegionInfo> toAssign, RegionState rit, HRegionInfo hri,
+ AssignmentManager assignmentManager) {
+ boolean disabled =
+ assignmentManager.getZKTable().isDisablingOrDisabledTable(hri.getTableNameAsString());
+ if (disabled) {
+ // To avoid region assignment if table is in disabling or disabled state.
+ toAssign.remove(hri);
+ regionsFromRIT.remove(hri);
+ }
+ if (rit != null && disabled) {
+ assignmentManager.deleteNodeAndOfflineRegion(hri);
+ }
return toAssign;
}
Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1464724&r1=1464723&r2=1464724&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Thu Apr 4 20:21:17 2013
@@ -431,10 +431,18 @@ public class TestAssignmentManager {
@Test
public void testSSHWhenDisableTableInProgress()
throws KeeperException, IOException {
- testCaseWithPartiallyDisabledState(TableState.DISABLING);
- testCaseWithPartiallyDisabledState(TableState.DISABLED);
+ testCaseWithPartiallyDisabledState(TableState.DISABLING, false);
+ testCaseWithPartiallyDisabledState(TableState.DISABLED, false);
}
+ @Test
+ public void testSSHWhenDisablingTableRegionsInOpeningState()
+ throws KeeperException, IOException {
+ testCaseWithPartiallyDisabledState(TableState.DISABLING, true);
+ testCaseWithPartiallyDisabledState(TableState.DISABLED, true);
+ }
+
+
/**
* To test if the split region is removed from RIT if the region was in SPLITTING state
* but the RS has actually completed the splitting in META but went down. See HBASE-6070
@@ -500,7 +508,8 @@ public class TestAssignmentManager {
}
}
- private void testCaseWithPartiallyDisabledState(TableState state) throws KeeperException, IOException, NodeExistsException {
+ private void testCaseWithPartiallyDisabledState(TableState state, boolean opening)
+ throws KeeperException, IOException, NodeExistsException {
// Create and startup an executor. This is used by AssignmentManager
// handling zk callbacks.
ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
@@ -511,20 +520,32 @@ public class TestAssignmentManager {
// Create an AM.
AssignmentManager am = new AssignmentManager(this.server, this.serverManager, ct, balancer,
executor);
- // adding region to regions and servers maps.
- am.regionOnline(REGIONINFO, SERVERNAME_A);
- // adding region in pending close.
- am.regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO,
- State.PENDING_CLOSE, System.currentTimeMillis(), SERVERNAME_A));
-
+ if (opening) {
+ am.regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO,
+ State.OPENING, System.currentTimeMillis(), SERVERNAME_A));
+ } else {
+ // adding region to regions and servers maps.
+ am.regionOnline(REGIONINFO, SERVERNAME_A);
+ // adding region in pending close.
+ am.regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO,
+ State.PENDING_CLOSE, System.currentTimeMillis(), SERVERNAME_A));
+ }
if (state == TableState.DISABLING) {
am.getZKTable().setDisablingTable(REGIONINFO.getTableNameAsString());
} else {
am.getZKTable().setDisabledTable(REGIONINFO.getTableNameAsString());
}
+ RegionTransitionData data = null;
+ if (opening) {
+ data =
+ new RegionTransitionData(EventType.RS_ZK_REGION_OPENING, REGIONINFO.getRegionName(),
+ SERVERNAME_A);
- RegionTransitionData data = new RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
- REGIONINFO.getRegionName(), SERVERNAME_A);
+ } else {
+ data =
+ new RegionTransitionData(EventType.M_ZK_REGION_CLOSING, REGIONINFO.getRegionName(),
+ SERVERNAME_A);
+ }
String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
// create znode in M_ZK_REGION_CLOSING state.
ZKUtil.createAndWatch(this.watcher, node, data.getBytes());
@@ -534,12 +555,8 @@ public class TestAssignmentManager {
// check znode deleted or not.
// In both cases the znode should be deleted.
assertTrue("The znode should be deleted.",ZKUtil.checkExists(this.watcher, node) == -1);
- // check whether in rit or not. In the DISABLING case also the below assert will be true
- // but the piece of code added for HBASE-5927 will not do that.
- if (state == TableState.DISABLED) {
- assertTrue("Region state of region in pending close should be removed from rit.",
- am.regionsInTransition.isEmpty());
- }
+ assertTrue("Region state of region in pending close should be removed from rit.",
+ am.regionsInTransition.isEmpty());
} finally {
executor.shutdown();
am.shutdown();
Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java?rev=1464724&r1=1464723&r2=1464724&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java Thu Apr 4 20:21:17 2013
@@ -25,7 +25,6 @@ import static org.junit.Assert.assertTru
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.client.HT
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.ServerManager;
@@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.util.Byte
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
+import org.apache.hadoop.hbase.zookeeper.ZKTable;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass;
@@ -64,7 +65,7 @@ public class TestRSKilledWhenMasterIniti
private static final HBaseTestingUtility TESTUTIL = new HBaseTestingUtility();
private static final int NUM_MASTERS = 1;
- private static final int NUM_RS = 4;
+ private static final int NUM_RS = 5;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
@@ -72,7 +73,7 @@ public class TestRSKilledWhenMasterIniti
Configuration conf = TESTUTIL.getConfiguration();
conf.setClass(HConstants.MASTER_IMPL, TestingMaster.class, HMaster.class);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
- conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 4);
+ conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 5);
// Start up the cluster.
TESTUTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
@@ -234,6 +235,61 @@ public class TestRSKilledWhenMasterIniti
assertEquals(3, count);
}
+ @Test (timeout=180000)
+ public void testMasterFailoverWhenDisablingTableRegionsInRITOnDeadRS() throws Exception {
+ MiniHBaseCluster cluster = TESTUTIL.getHBaseCluster();
+ HMaster master = cluster.getMaster();
+ // disable load balancing on this master
+ master.balanceSwitch(false);
+
+ final String table = "testMasterFailoverWhenDisablingTableRegionsInRITOnDeadRS";
+ byte [] FAMILY = Bytes.toBytes("family");
+ byte[][] SPLIT_KEYS =
+ new byte[][] {Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c"),
+ Bytes.toBytes("d") };
+ HTableDescriptor htd = new HTableDescriptor(table);
+ HColumnDescriptor hcd = new HColumnDescriptor(FAMILY);
+ htd.addFamily(hcd);
+ TESTUTIL.getHBaseAdmin().createTable(htd, SPLIT_KEYS);
+ AssignmentManager am = cluster.getMaster().getAssignmentManager();
+ List<HRegionInfo> regionsOfTable = null;
+ while ((regionsOfTable = am.getRegionsOfTable(table.getBytes())).size()
+ != (SPLIT_KEYS.length + 1)) {
+ Thread.sleep(10);
+ }
+ HRegionInfo closingRegion = regionsOfTable.get(0);
+ ServerName serverName = am.getRegionServerOfRegion(closingRegion);
+ HRegionServer deadRS = null;
+ for (int i = 0; i < cluster.getRegionServerThreads().size(); i++) {
+ deadRS = cluster.getRegionServer(i);
+ if (deadRS.getServerName().equals(serverName)) {
+ break;
+ }
+ }
+
+ // Disable the table in ZK
+ ZKTable zkTable = am.getZKTable();
+ zkTable.setDisablingTable(table);
+ ZKAssign.createNodeClosing(master.getZooKeeper(), closingRegion, serverName);
+
+ // Stop the master
+ abortMaster(cluster);
+ master = startMasterAndWaitUntilLogSplit(cluster);
+ deadRS.kill();
+ deadRS.join();
+ waitUntilMasterIsInitialized(master);
+ am = cluster.getMaster().getAssignmentManager();
+ zkTable = am.getZKTable();
+ // wait for no more RIT
+ ZKAssign.blockUntilNoRIT(master.getZooKeeper());
+ while (!master.getAssignmentManager().getZKTable().isDisabledTable(table)) {
+ Thread.sleep(10);
+ }
+ assertTrue("Table should be disabled state.", zkTable.isDisabledTable(table));
+ HBaseAdmin admin = new HBaseAdmin(master.getConfiguration());
+ admin.deleteTable(table);
+ }
+
private void abortMaster(MiniHBaseCluster cluster)
throws InterruptedException {
for (MasterThread mt : cluster.getLiveMasterThreads()) {