You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2010/06/16 23:21:46 UTC
svn commit: r955395 - in /hadoop/common/branches/branch-0.20-append:
CHANGES.txt src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java
src/test/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
Author: dhruba
Date: Wed Jun 16 21:21:45 2010
New Revision: 955395
URL: http://svn.apache.org/viewvc?rev=955395&view=rev
Log:
HDFS-1215. Fix unti test TestNodeCount.
(Todd Lipcon via dhruba)
Modified:
hadoop/common/branches/branch-0.20-append/CHANGES.txt
hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java
hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
Modified: hadoop/common/branches/branch-0.20-append/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-append/CHANGES.txt?rev=955395&r1=955394&r2=955395&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-append/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-append/CHANGES.txt Wed Jun 16 21:21:45 2010
@@ -41,6 +41,9 @@ Release 0.20-append - Unreleased
HDFS-927. DFSInputStream retries too many times for new block locations.
(Todd Lipcon via dhruba)
+ HDFS-1215. Fix unit test TestNodeCount.
+ (Todd Lipcon via dhruba)
+
Release 0.20.3 - Unreleased
NEW FEATURES
Modified: hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=955395&r1=955394&r2=955395&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java (original)
+++ hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java Wed Jun 16 21:21:45 2010
@@ -731,6 +731,14 @@ public class MiniDFSCluster {
* Wait until the cluster is active and running.
*/
public void waitActive() throws IOException {
+ waitActive(true);
+ }
+
+ /**
+ * Wait until the cluster is active.
+ * @param waitHeartbeats if true, will wait until all DNs have heartbeat
+ */
+ public void waitActive(boolean waitHeartbeats) throws IOException {
if (nameNode == null) {
return;
}
@@ -739,7 +747,8 @@ public class MiniDFSCluster {
DFSClient client = new DFSClient(addr, conf);
// make sure all datanodes are alive and sent heartbeat
- while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE))) {
+ while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE),
+ waitHeartbeats)) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
@@ -749,10 +758,17 @@ public class MiniDFSCluster {
client.close();
}
- private synchronized boolean shouldWait(DatanodeInfo[] dnInfo) {
+ private synchronized boolean shouldWait(DatanodeInfo[] dnInfo,
+ boolean waitHeartbeats) {
if (dnInfo.length != numDataNodes) {
return true;
}
+
+ // If we don't need heartbeats, we're done.
+ if (!waitHeartbeats) {
+ return false;
+ }
+
// make sure all datanodes have sent first heartbeat to namenode,
// using (capacity == 0) as proxy.
for (DatanodeInfo dn : dnInfo) {
Modified: hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java?rev=955395&r1=955394&r2=955395&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java (original)
+++ hadoop/common/branches/branch-0.20-append/src/test/org/apache/hadoop/hdfs/server/namenode/TestNodeCount.java Wed Jun 16 21:21:45 2010
@@ -3,6 +3,8 @@ package org.apache.hadoop.hdfs.server.na
import java.util.Collection;
import java.util.Iterator;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -19,6 +21,8 @@ import junit.framework.TestCase;
* so NN makes right decision for under/over-replicated blocks
*/
public class TestNodeCount extends TestCase {
+ static final Log LOG = LogFactory.getLog(TestNodeCount.class);
+
public void testNodeCount() throws Exception {
// start a mini dfs cluster of 2 nodes
final Configuration conf = new Configuration();
@@ -41,8 +45,9 @@ public class TestNodeCount extends TestC
// start two new nodes
cluster.startDataNodes(conf, 2, true, null, null);
- cluster.waitActive();
+ cluster.waitActive(false);
+ LOG.info("Bringing down first DN");
// bring down first datanode
DatanodeDescriptor datanode = datanodes[0];
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
@@ -51,21 +56,22 @@ public class TestNodeCount extends TestC
datanode.setLastUpdate(0); // mark it dead
namesystem.heartbeatCheck();
}
+
+ LOG.info("Waiting for block to be replicated");
// the block will be replicated
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
+ LOG.info("Restarting first datanode");
// restart the first datanode
cluster.restartDataNode(dnprop);
- cluster.waitActive();
-
+ cluster.waitActive(false);
+
+ LOG.info("Waiting for excess replicas to be detected");
+
// check if excessive replica is detected
- NumberReplicas num = null;
- do {
- synchronized (namesystem) {
- num = namesystem.countNodes(block);
- }
- } while (num.excessReplicas() == 0);
-
+ waitForExcessReplicasToChange(namesystem, block, 0);
+
+ LOG.info("Finding a non-excess node");
// find out a non-excess node
Iterator<DatanodeDescriptor> iter = namesystem.blocksMap.nodeIterator(block);
DatanodeDescriptor nonExcessDN = null;
@@ -78,7 +84,8 @@ public class TestNodeCount extends TestC
}
}
assertTrue(nonExcessDN!=null);
-
+
+ LOG.info("Stopping non-excess node: " + nonExcessDN);
// bring down non excessive datanode
dnprop = cluster.stopDataNode(nonExcessDN.getName());
// make sure that NN detects that the datanode is down
@@ -86,22 +93,46 @@ public class TestNodeCount extends TestC
nonExcessDN.setLastUpdate(0); // mark it dead
namesystem.heartbeatCheck();
}
-
+
+ LOG.info("Waiting for live replicas to hit repl factor");
// The block should be replicated
+ NumberReplicas num;
do {
num = namesystem.countNodes(block);
} while (num.liveReplicas() != REPLICATION_FACTOR);
+ LOG.info("Restarting first DN");
// restart the first datanode
cluster.restartDataNode(dnprop);
- cluster.waitActive();
-
+ cluster.waitActive(false);
+
+ LOG.info("Waiting for excess replicas to be detected");
// check if excessive replica is detected
- do {
- num = namesystem.countNodes(block);
- } while (num.excessReplicas() == 2);
+ waitForExcessReplicasToChange(namesystem, block, 2);
} finally {
cluster.shutdown();
}
}
+
+ private void waitForExcessReplicasToChange(
+ FSNamesystem namesystem,
+ Block block,
+ int oldReplicas) throws Exception
+ {
+ NumberReplicas num;
+ long startChecking = System.currentTimeMillis();
+ do {
+ synchronized (namesystem) {
+ num = namesystem.countNodes(block);
+ }
+ Thread.sleep(100);
+ if (System.currentTimeMillis() - startChecking > 30000) {
+ namesystem.metaSave("TestNodeCount.meta");
+ LOG.warn("Dumping meta into log directory");
+ fail("Timed out waiting for excess replicas to change");
+ }
+
+ } while (num.excessReplicas() == oldReplicas);
+ }
+
}