You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by el...@apache.org on 2012/09/21 07:48:42 UTC

svn commit: r1388331 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: CHANGES.txt src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java

Author: eli
Date: Fri Sep 21 05:48:42 2012
New Revision: 1388331

URL: http://svn.apache.org/viewvc?rev=1388331&view=rev
Log:
HDFS-3931. TestDatanodeBlockScanner#testBlockCorruptionPolicy2 is broken. Contributed by Andy Isaacson

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1388331&r1=1388330&r2=1388331&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Sep 21 05:48:42 2012
@@ -254,6 +254,9 @@ Release 2.0.3-alpha - Unreleased 
     HDFS-3932. NameNode Web UI broken if the rpc-address is set to the wildcard.
     (Colin Patrick McCabe via eli)
 
+    HDFS-3931. TestDatanodeBlockScanner#testBlockCorruptionPolicy2 is broken.
+    (Andy Isaacson via eli)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java?rev=1388331&r1=1388330&r2=1388331&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java Fri Sep 21 05:48:42 2012
@@ -506,7 +506,7 @@ public class DFSTestUtil {
   public static void waitReplication(FileSystem fs, Path fileName, short replFactor)
       throws IOException, InterruptedException, TimeoutException {
     boolean correctReplFactor;
-    final int ATTEMPTS = 20;
+    final int ATTEMPTS = 40;
     int count = 0;
 
     do {

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java?rev=1388331&r1=1388330&r2=1388331&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java Fri Sep 21 05:48:42 2012
@@ -269,6 +269,7 @@ public class TestDatanodeBlockScanner {
     conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 3);
     conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3L);
     conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false);
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, 5L);
 
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
     cluster.waitActive();
@@ -276,35 +277,47 @@ public class TestDatanodeBlockScanner {
     Path file1 = new Path("/tmp/testBlockCorruptRecovery/file");
     DFSTestUtil.createFile(fs, file1, 1024, numReplicas, 0);
     ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
+    final int ITERATIONS = 10;
 
     // Wait until block is replicated to numReplicas
     DFSTestUtil.waitReplication(fs, file1, numReplicas);
 
-    // Corrupt numCorruptReplicas replicas of block 
-    int[] corruptReplicasDNIDs = new int[numCorruptReplicas];
-    for (int i=0, j=0; (j != numCorruptReplicas) && (i < numDataNodes); i++) {
-      if (corruptReplica(block, i)) {
-        corruptReplicasDNIDs[j++] = i;
-        LOG.info("successfully corrupted block " + block + " on node " 
-                 + i + " " + cluster.getDataNodes().get(i).getDisplayName());
+    for (int k = 0; ; k++) {
+      // Corrupt numCorruptReplicas replicas of block 
+      int[] corruptReplicasDNIDs = new int[numCorruptReplicas];
+      for (int i=0, j=0; (j != numCorruptReplicas) && (i < numDataNodes); i++) {
+        if (corruptReplica(block, i)) {
+          corruptReplicasDNIDs[j++] = i;
+          LOG.info("successfully corrupted block " + block + " on node " 
+                   + i + " " + cluster.getDataNodes().get(i).getDisplayName());
+        }
+      }
+      
+      // Restart the datanodes containing corrupt replicas 
+      // so they would be reported to namenode and re-replicated
+      // They MUST be restarted in reverse order from highest to lowest index,
+      // because the act of restarting them removes them from the ArrayList
+      // and causes the indexes of all nodes above them in the list to change.
+      for (int i = numCorruptReplicas - 1; i >= 0 ; i--) {
+        LOG.info("restarting node with corrupt replica: position " 
+            + i + " node " + corruptReplicasDNIDs[i] + " " 
+            + cluster.getDataNodes().get(corruptReplicasDNIDs[i]).getDisplayName());
+        cluster.restartDataNode(corruptReplicasDNIDs[i]);
       }
-    }
-    
-    // Restart the datanodes containing corrupt replicas 
-    // so they would be reported to namenode and re-replicated
-    // They MUST be restarted in reverse order from highest to lowest index,
-    // because the act of restarting them removes them from the ArrayList
-    // and causes the indexes of all nodes above them in the list to change.
-    for (int i = numCorruptReplicas - 1; i >= 0 ; i--) {
-      LOG.info("restarting node with corrupt replica: position " 
-          + i + " node " + corruptReplicasDNIDs[i] + " " 
-          + cluster.getDataNodes().get(corruptReplicasDNIDs[i]).getDisplayName());
-      cluster.restartDataNode(corruptReplicasDNIDs[i]);
-    }
 
-    // Loop until all corrupt replicas are reported
-    DFSTestUtil.waitCorruptReplicas(fs, cluster.getNamesystem(), file1, 
-        block, numCorruptReplicas);
+      // Loop until all corrupt replicas are reported
+      try {
+        DFSTestUtil.waitCorruptReplicas(fs, cluster.getNamesystem(), file1, 
+                                        block, numCorruptReplicas);
+      } catch(TimeoutException e) {
+        if (k > ITERATIONS) {
+          throw e;
+        }
+        LOG.info("Timed out waiting for corrupt replicas, trying again, iteration " + k);
+        continue;
+      }
+      break;
+    }
     
     // Loop until the block recovers after replication
     DFSTestUtil.waitReplication(fs, file1, numReplicas);