You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2012/12/26 23:31:14 UTC
svn commit: r1426066 -
/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
Author: larsh
Date: Wed Dec 26 22:31:14 2012
New Revision: 1426066
URL: http://svn.apache.org/viewvc?rev=1426066&view=rev
Log:
HBASE-7438 TestSplitTransactionOnCluster has too many infinite loops
Modified:
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java?rev=1426066&r1=1426065&r2=1426066&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java Wed Dec 26 22:31:14 2012
@@ -21,8 +21,10 @@ package org.apache.hadoop.hbase.regionse
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.List;
@@ -175,12 +177,8 @@ public class TestSplitTransactionOnClust
// Now crash the server
cluster.abortRegionServer(tableRegionIndex);
waitUntilRegionServerDead();
+ awaitDaughters(tableName, daughters.size());
- // Wait till regions are back on line again.
- while(cluster.getRegions(tableName).size() < daughters.size()) {
- LOG.info("Waiting for repair to happen");
- Thread.sleep(1000);
- }
// Assert daughters are online.
regions = cluster.getRegions(tableName);
for (HRegion r: regions) {
@@ -295,11 +293,7 @@ public class TestSplitTransactionOnClust
// Now crash the server
cluster.abortRegionServer(tableRegionIndex);
waitUntilRegionServerDead();
- // Wait till regions are back on line again.
- while(cluster.getRegions(tableName).size() < daughters.size()) {
- LOG.info("Waiting for repair to happen");
- Thread.sleep(1000);
- }
+ awaitDaughters(tableName, daughters.size());
// Assert daughters are online.
regions = cluster.getRegions(tableName);
for (HRegion r: regions) {
@@ -357,21 +351,18 @@ public class TestSplitTransactionOnClust
if (r.getRegionInfo().equals(daughter)) daughterRegion = r;
}
assertTrue(daughterRegion != null);
- while (true) {
+ for (int i=0; i<100; i++) {
if (!daughterRegion.hasReferences()) break;
Threads.sleep(100);
}
+ assertFalse("Waiting for refereces to be compacted", daughterRegion.hasReferences());
split(daughter, server, regionCount);
// Get list of daughters
daughters = cluster.getRegions(tableName);
// Now crash the server
cluster.abortRegionServer(tableRegionIndex);
waitUntilRegionServerDead();
- // Wait till regions are back on line again.
- while(cluster.getRegions(tableName).size() < daughters.size()) {
- LOG.info("Waiting for repair to happen");
- Thread.sleep(1000);
- }
+ awaitDaughters(tableName, daughters.size());
// Assert daughters are online and ONLY the original daughters -- that
// fixup didn't insert one during server shutdown recover.
regions = cluster.getRegions(tableName);
@@ -508,12 +499,14 @@ public class TestSplitTransactionOnClust
byte[] data = ZKUtil.getDataNoWatch(t.getConnection()
.getZooKeeperWatcher(), node, stat);
// ZKUtil.create
- while (data != null) {
+ for (int i=0; data != null && i<60; i++) {
Thread.sleep(1000);
data = ZKUtil.getDataNoWatch(t.getConnection().getZooKeeperWatcher(),
node, stat);
}
+ assertNull("Waited too long for ZK node to be removed: "+node, data);
+
MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
@@ -559,9 +552,12 @@ public class TestSplitTransactionOnClust
htd.addFamily(new HColumnDescriptor("cf"));
admin.createTable(htd);
HTable t = new HTable(cluster.getConfiguration(), tableName);
- while (!(cluster.getRegions(tableName).size() == 1)) {
+ // wait for up to 10s
+ for (int i=0; cluster.getRegions(tableName).size() != 1 && i<100; i++) {
Thread.sleep(100);
}
+ assertTrue("waited too long for table to get online",
+ cluster.getRegions(tableName).size() == 1);
final List<HRegion> regions = cluster.getRegions(tableName);
HRegionInfo hri = getAndCheckSingleTableRegion(regions);
int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
@@ -584,9 +580,10 @@ public class TestSplitTransactionOnClust
}
}
}.start();
- while (!callRollBack) {
+ for (int i=0; !callRollBack && i<100; i++) {
Thread.sleep(100);
}
+ assertTrue("Waited too long for rollback", callRollBack);
SplitTransaction st = null;
st = new MockedSplitTransaction(regions.get(0), Bytes.toBytes("row2"));
try {
@@ -597,15 +594,19 @@ public class TestSplitTransactionOnClust
LOG.debug("Rollback started :"+ e.getMessage());
st.rollback(regionServer, regionServer);
}
- while (!firstSplitCompleted) {
+ for (int i=0; !firstSplitCompleted && i<100; i++) {
Thread.sleep(100);
}
+ assertTrue("fist split did not complete", firstSplitCompleted);
+
RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
Map<String, RegionState> rit = regionStates.getRegionsInTransition();
- while (rit.containsKey(hri.getTableNameAsString())) {
+ for (int i=0; rit.containsKey(hri.getTableNameAsString()) && i<100; i++) {
Thread.sleep(100);
}
+ assertFalse("region still in transition", rit.containsKey(rit.containsKey(hri.getTableNameAsString())));
+
List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
// Region server side split is successful.
assertEquals("The parent region should be splitted", 2, onlineRegions.size());
@@ -845,10 +846,12 @@ public class TestSplitTransactionOnClust
final int regionCount)
throws IOException, InterruptedException {
this.admin.split(hri.getRegionNameAsString());
- while (ProtobufUtil.getOnlineRegions(server).size() <= regionCount) {
+ for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 100; i++) {
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
+ assertFalse("Waited too long for split",
+ ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
}
private void removeDaughterFromMeta(final byte [] regionName) throws IOException {
@@ -895,13 +898,15 @@ public class TestSplitTransactionOnClust
Bytes.toBytes(hrs.getServerName().toString()));
}
// Wait till table region is up on the server that is NOT carrying .META..
- while (true) {
+ for (int i=0; i<100; i++) {
tableRegionIndex = cluster.getServerWith(hri.getRegionName());
if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
LOG.debug("Waiting on region move off the .META. server; current index " +
tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
Thread.sleep(100);
}
+ assertTrue("Region not moved off .META. server", tableRegionIndex != -1
+ && tableRegionIndex != metaServerIndex);
// Verify for sure table region is not on same server as .META.
tableRegionIndex = cluster.getServerWith(hri.getRegionName());
assertTrue(tableRegionIndex != -1);
@@ -939,11 +944,24 @@ public class TestSplitTransactionOnClust
private void waitUntilRegionServerDead() throws InterruptedException {
// Wait until the master processes the RS shutdown
- while (cluster.getMaster().getClusterStatus().
- getServers().size() == NB_SERVERS) {
+ for (int i=0; cluster.getMaster().getClusterStatus().
+ getServers().size() == NB_SERVERS && i<100; i++) {
LOG.info("Waiting on server to go down");
Thread.sleep(100);
}
+ assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
+ getServers().size() == NB_SERVERS);
+ }
+
+ private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
+ // Wait till regions are back on line again.
+ for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
+ LOG.info("Waiting for repair to happen");
+ Thread.sleep(1000);
+ }
+ if (cluster.getRegions(tableName).size() < numDaughters) {
+ fail("Waiting too long for daughter regions");
+ }
}
public static class MockMasterWithoutCatalogJanitor extends HMaster {