You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ra...@apache.org on 2012/05/25 18:54:09 UTC
svn commit: r1342724 - in /hbase/trunk/src:
main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/master/handler/
main/java/org/apache/hadoop/hbase/protobuf/
test/java/org/apache/hadoop/hbase/master/
Author: ramkrishna
Date: Fri May 25 16:54:09 2012
New Revision: 1342724
URL: http://svn.apache.org/viewvc?rev=1342724&view=rev
Log:
HBASE-6070 AM.nodeDeleted and SSH races creating problems for regions under SPLIT (Ramkrishna)
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Fri May 25 16:54:09 2012
@@ -1145,7 +1145,7 @@ public class AssignmentManager extends Z
RegionState rs = this.regionsInTransition.get(regionName);
if (rs != null) {
HRegionInfo regionInfo = rs.getRegion();
- if (rs.isSplitting() || rs.isSplit()) {
+ if (rs.isSplit()) {
LOG.debug("Ephemeral node deleted, regionserver crashed?, " +
"clearing from RIT; rs=" + rs);
regionOffline(rs.getRegion());
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Fri May 25 16:54:09 2012
@@ -294,7 +294,7 @@ public class ServerShutdownHandler exten
this.server.getCatalogTracker())) {
ServerName addressFromAM = this.services.getAssignmentManager()
.getRegionServerOfRegion(e.getKey());
- if (rit != null && !rit.isClosing() && !rit.isPendingClose()) {
+ if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) {
// Skip regions that were in transition unless CLOSING or
// PENDING_CLOSE
LOG.info("Skip assigning region " + rit.toString());
@@ -307,6 +307,16 @@ public class ServerShutdownHandler exten
} else {
toAssignRegions.add(e.getKey());
}
+ } else if (rit != null && (rit.isSplitting() || rit.isSplit())) {
+ // This will happen when the RS went down and the call back for the SPLIITING or SPLIT
+ // has not yet happened for node Deleted event. In that case if the region was actually
+ // split
+ // but the RS had gone down before completing the split process then will not try to
+ // assign the parent region again. In that case we should make the region offline and
+ // also delete the region from RIT.
+ HRegionInfo region = rit.getRegion();
+ AssignmentManager am = this.services.getAssignmentManager();
+ am.regionOffline(region);
}
// If the table was partially disabled and the RS went down, we should clear the RIT
// and remove the node for the region.
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java Fri May 25 16:54:09 2012
@@ -1039,6 +1039,7 @@ public final class ProtobufUtil {
RequestConverter.buildGetRequest(regionName, get);
try {
GetResponse response = client.get(null, request);
+ if (response == null) return null;
return toResult(response.getResult());
} catch (ServiceException se) {
throw getRemoteException(se);
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java Fri May 25 16:54:09 2012
@@ -67,6 +67,22 @@ public class Mocking {
Bytes.toBytes(sn.getStartcode())));
return new Result(kvs);
}
+
+
+ /**
+ * @param sn
+ * ServerName to use making startcode and server in meta
+ * @param hri
+ * Region to serialize into HRegionInfo
+ * @return A mocked up Result that fakes a Get on a row in the <code>.META.</code> table.
+ * @throws IOException
+ */
+ static Result getMetaTableRowResultAsSplitRegion(final HRegionInfo hri, final ServerName sn)
+ throws IOException {
+ hri.setOffline(true);
+ hri.setSplit(true);
+ return getMetaTableRowResult(hri, sn);
+ }
/**
* Fakes the regionserver-side zk transitions of a region open.
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Fri May 25 16:54:09 2012
@@ -405,7 +405,7 @@ public class TestAssignmentManager {
AssignmentManager am = new AssignmentManager(this.server,
this.serverManager, ct, balancer, executor, null);
try {
- processServerShutdownHandler(ct, am);
+ processServerShutdownHandler(ct, am, false);
} finally {
executor.shutdown();
am.shutdown();
@@ -429,6 +429,70 @@ public class TestAssignmentManager {
testCaseWithPartiallyDisabledState(Table.State.DISABLING);
testCaseWithPartiallyDisabledState(Table.State.DISABLED);
}
+
+
+ /**
+ * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
+ * has actually completed the splitting in META but went down. See HBASE-6070 and also HBASE-5806
+ *
+ * @throws KeeperException
+ * @throws IOException
+ */
+ @Test
+ public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
+ // true indicates the region is split but still in RIT
+ testCaseWithSplitRegionPartial(true);
+ // false indicate the region is not split
+ testCaseWithSplitRegionPartial(false);
+ }
+
+ private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
+ IOException, NodeExistsException, InterruptedException, ServiceException {
+ // Create and startup an executor. This is used by AssignmentManager
+ // handling zk callbacks.
+ ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
+
+ // We need a mocked catalog tracker.
+ CatalogTracker ct = Mockito.mock(CatalogTracker.class);
+ LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
+ // Create an AM.
+ AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server,
+ this.serverManager);
+ // adding region to regions and servers maps.
+ am.regionOnline(REGIONINFO, SERVERNAME_A);
+ // adding region in pending close.
+ am.regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO,
+ State.SPLITTING, System.currentTimeMillis(), SERVERNAME_A));
+ am.getZKTable().setEnabledTable(REGIONINFO.getTableNameAsString());
+ RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
+ REGIONINFO.getRegionName(), SERVERNAME_A);
+ String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
+ // create znode in M_ZK_REGION_CLOSING state.
+ ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
+
+ try {
+ processServerShutdownHandler(ct, am, regionSplitDone);
+ // check znode deleted or not.
+ // In both cases the znode should be deleted.
+
+ if (regionSplitDone) {
+ assertTrue("Region state of region in SPLITTING should be removed from rit.",
+ am.regionsInTransition.isEmpty());
+ } else {
+ while (!am.assignInvoked) {
+ Thread.sleep(1);
+ }
+ assertTrue("Assign should be invoked.", am.assignInvoked);
+ }
+ } finally {
+ REGIONINFO.setOffline(false);
+ REGIONINFO.setSplit(false);
+ executor.shutdown();
+ am.shutdown();
+ // Clean up all znodes
+ ZKAssign.deleteAllNodes(this.watcher);
+ }
+ }
private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
IOException, NodeExistsException, ServiceException {
@@ -460,7 +524,7 @@ public class TestAssignmentManager {
// create znode in M_ZK_REGION_CLOSING state.
ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
try {
- processServerShutdownHandler(ct, am);
+ processServerShutdownHandler(ct, am, false);
// check znode deleted or not.
// In both cases the znode should be deleted.
assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
@@ -480,7 +544,7 @@ public class TestAssignmentManager {
}
}
- private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am)
+ private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
throws IOException, ServiceException {
// Make sure our new AM gets callbacks; once registered, can't unregister.
// Thats ok because we make a new zk watcher for each test.
@@ -490,7 +554,14 @@ public class TestAssignmentManager {
// Make an RS Interface implementation. Make it so a scanner can go against it.
ClientProtocol implementation = Mockito.mock(ClientProtocol.class);
// Get a meta row result that has region up on SERVERNAME_A
- Result r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
+
+ Result r = null;
+ if (splitRegion) {
+ r = Mocking.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
+ } else {
+ r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
+ }
+
ScanResponse.Builder builder = ScanResponse.newBuilder();
builder.setMoreResults(true);
builder.addResult(ProtobufUtil.toResult(r));
@@ -830,6 +901,7 @@ public class TestAssignmentManager {
// Ditto for ct
private final CatalogTracker ct;
boolean processRITInvoked = false;
+ boolean assignInvoked = false;
AtomicBoolean gate = new AtomicBoolean(true);
public AssignmentManagerWithExtrasForTesting(final Server master,
@@ -865,6 +937,17 @@ public class TestAssignmentManager {
super.assign(region, setOfflineInZK, forceNewPlan, hijack);
this.gate.set(true);
}
+
+ @Override
+ public ServerName getRegionServerOfRegion(HRegionInfo hri) {
+ return SERVERNAME_A;
+ }
+
+ @Override
+ public void assign(java.util.List<HRegionInfo> regions, java.util.List<ServerName> servers)
+ {
+ assignInvoked = true;
+ };
/** reset the watcher */
void setWatcher(ZooKeeperWatcher watcher) {
this.watcher = watcher;