You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by su...@apache.org on 2010/09/14 19:14:48 UTC

svn commit: r996995 - /hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java

Author: suresh
Date: Tue Sep 14 17:14:48 2010
New Revision: 996995

URL: http://svn.apache.org/viewvc?rev=996995&view=rev
Log:
Merging changes from trunk

Added:
    hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java

Added: hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java?rev=996995&view=auto
==============================================================================
--- hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java (added)
+++ hadoop/hdfs/branches/HDFS-1052/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java Tue Sep 14 17:14:48 2010
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.Collection;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.BlockMissingException;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+/**
+ * This class tests the listCorruptFileBlocks API.
+ * We create 3 files; intentionally delete their blocks
+ * Use listCorruptFileBlocks to validate that we get the list of corrupt
+ * files/blocks; also test the "paging" support by calling the API
+ * with a block # from a previous call and validate that the subsequent
+ * blocks/files are also returned.
+ */
+public class TestListCorruptFileBlocks extends TestCase {
+  static Log LOG = NameNode.stateChangeLog;
+
+  /** check if nn.getCorruptFiles() returns a file that has corrupted blocks */
+  public void testListCorruptFilesCorruptedBlock() throws Exception {
+    MiniDFSCluster cluster = null;
+    Random random = new Random();
+    
+    try {
+      Configuration conf = new HdfsConfiguration();
+      conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
+      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
+      cluster = new MiniDFSCluster(conf, 1, true, null);
+      FileSystem fs = cluster.getFileSystem();
+
+      // create two files with one block each
+      DFSTestUtil util = new DFSTestUtil("testCorruptFilesCorruptedBlock", 2, 1, 512);
+      util.createFiles(fs, "/srcdat10");
+
+      // fetch bad file list from namenode. There should be none.
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+          .listCorruptFileBlocks("/", null);
+      assertTrue("Namenode has " + badFiles.size()
+          + " corrupt files. Expecting None.", badFiles.size() == 0);
+
+      // Now deliberately corrupt one block
+      File data_dir = new File(System.getProperty("test.build.data"),
+      "dfs/data/data1/current/finalized");
+      assertTrue("data directory does not exist", data_dir.exists());
+      File[] blocks = data_dir.listFiles();
+      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
+      for (int idx = 0; idx < blocks.length; idx++) {
+        if (blocks[idx].getName().startsWith("blk_") &&
+            blocks[idx].getName().endsWith(".meta")) {
+          //
+          // shorten .meta file
+          //
+          RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
+          FileChannel channel = file.getChannel();
+          long position = channel.size() - 2;
+          int length = 2;
+          byte[] buffer = new byte[length];
+          random.nextBytes(buffer);
+          channel.write(ByteBuffer.wrap(buffer), position);
+          file.close();
+          LOG.info("Deliberately corrupting file " + blocks[idx].getName() +
+              " at offset " + position + " length " + length);
+
+          // read all files to trigger detection of corrupted replica
+          try {
+            util.checkFiles(fs, "/srcdat10");
+          } catch (BlockMissingException e) {
+            System.out.println("Received BlockMissingException as expected.");
+          } catch (IOException e) {
+            assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " +
+                " but received IOException " + e, false);
+          }
+          break;
+        }
+      }
+
+      // fetch bad file list from namenode. There should be one file.
+      badFiles = namenode.listCorruptFileBlocks("/", null);
+      LOG.info("Namenode has bad files. " + badFiles.size());
+      assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.",
+          badFiles.size() == 1);
+      util.cleanup(fs, "/srcdat10");
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
+  // deliberately remove blocks from a file and validate the list-corrupt-file-blocks API
+  public void testlistCorruptFileBlocks() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setLong("dfs.blockreport.intervalMsec", 1000);
+    conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans
+                                                           // directories
+    FileSystem fs = null;
+
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster(conf, 1, true, null);
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil util = new DFSTestUtil("testGetCorruptFiles", 3, 1, 1024);
+      util.createFiles(fs, "/corruptData");
+
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode
+          .listCorruptFileBlocks("/corruptData", null);
+      int numCorrupt = corruptFileBlocks.size();
+      assertTrue(numCorrupt == 0);
+      // delete the blocks
+      File baseDir = new File(System.getProperty("test.build.data",
+          "build/test/data"), "dfs/data");
+      for (int i = 0; i < 8; i++) {
+        File data_dir = new File(baseDir, "data" + (i + 1)
+            + MiniDFSCluster.FINALIZED_DIR_NAME);
+        File[] blocks = data_dir.listFiles();
+        if (blocks == null)
+          continue;
+        // assertTrue("Blocks do not exist in data-dir", (blocks != null) &&
+        // (blocks.length > 0));
+        for (int idx = 0; idx < blocks.length; idx++) {
+          if (!blocks[idx].getName().startsWith("blk_")) {
+            continue;
+          }
+          LOG.info("Deliberately removing file " + blocks[idx].getName());
+          assertTrue("Cannot remove file.", blocks[idx].delete());
+          // break;
+        }
+      }
+
+      int count = 0;
+      corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData", null);
+      numCorrupt = corruptFileBlocks.size();
+      while (numCorrupt < 3) {
+        Thread.sleep(1000);
+        corruptFileBlocks = namenode
+            .listCorruptFileBlocks("/corruptData", null);
+        numCorrupt = corruptFileBlocks.size();
+        count++;
+        if (count > 30)
+          break;
+      }
+      // Validate we get all the corrupt files
+      LOG.info("Namenode has bad files. " + numCorrupt);
+      assertTrue(numCorrupt == 3);
+      // test the paging here
+
+      FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks
+          .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
+      // now get the 2nd and 3rd file that is corrupt
+      Collection<FSNamesystem.CorruptFileBlockInfo> nextCorruptFileBlocks = namenode
+          .listCorruptFileBlocks("/corruptData", cfb[0].block.getBlockName());
+      FSNamesystem.CorruptFileBlockInfo[] ncfb = nextCorruptFileBlocks
+          .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
+      numCorrupt = nextCorruptFileBlocks.size();
+      assertTrue(numCorrupt == 2);
+      assertTrue(ncfb[0].block.getBlockName()
+          .equalsIgnoreCase(cfb[1].block.getBlockName()));
+
+      corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData",
+          ncfb[1].block.getBlockName());
+      numCorrupt = corruptFileBlocks.size();
+      assertTrue(numCorrupt == 0);
+      // Do a listing on a dir which doesn't have any corrupt blocks and
+      // validate
+      util.createFiles(fs, "/goodData");
+      corruptFileBlocks = namenode.listCorruptFileBlocks("/goodData", null);
+      numCorrupt = corruptFileBlocks.size();
+      assertTrue(numCorrupt == 0);
+      util.cleanup(fs, "/corruptData");
+      util.cleanup(fs, "/goodData");
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+  
+  /** check if NN.listCorruptFiles() returns the right limit */
+  public void testMaxCorruptFiles() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new HdfsConfiguration();
+      conf.setInt("dfs.datanode.directoryscan.interval", 15); // datanode scans directories
+      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
+      cluster = new MiniDFSCluster(conf, 1, true, null);
+      FileSystem fs = cluster.getFileSystem();
+      final int maxCorruptFileBlocks = 
+        FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;
+
+      // create 110 files with one block each
+      DFSTestUtil util = new DFSTestUtil("testMaxCorruptFiles", 
+          maxCorruptFileBlocks * 3, 1, 512);
+      util.createFiles(fs, "/srcdat2", (short) 1);
+      util.waitReplication(fs, "/srcdat2", (short) 1);
+
+      // verify that there are no bad blocks.
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+          .listCorruptFileBlocks("/srcdat2", null);
+      assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.",
+          badFiles.size() == 0);
+
+      // Now deliberately blocks from all files
+      File baseDir = new File(System.getProperty("test.build.data",
+      "build/test/data"),"dfs/data");
+      for (int i=0; i<8; i++) {
+        File data_dir = new File(baseDir, "data" +(i+1)+ MiniDFSCluster.FINALIZED_DIR_NAME);
+        File[] blocks = data_dir.listFiles();
+        if (blocks == null)
+          continue;
+
+        for (int idx = 0; idx < blocks.length; idx++) {
+          if (!blocks[idx].getName().startsWith("blk_")) {
+            continue;
+          }
+          assertTrue("Cannot remove file.", blocks[idx].delete());
+        }
+      }
+
+      badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+        
+       while (badFiles.size() < maxCorruptFileBlocks) {
+        LOG.info("# of corrupt files is: " + badFiles.size());
+        Thread.sleep(10000);
+        badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+      }
+      badFiles = namenode.listCorruptFileBlocks("/srcdat2", null); 
+      LOG.info("Namenode has bad files. " + badFiles.size());
+      assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + 
+          maxCorruptFileBlocks + ".",
+          badFiles.size() == maxCorruptFileBlocks);
+      util.cleanup(fs, "/srcdat2");
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+
+}