You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by sh...@apache.org on 2010/09/11 23:53:08 UTC

svn commit: r996213 - in /hadoop/hdfs/trunk: ./ src/java/org/apache/hadoop/hdfs/protocol/ src/java/org/apache/hadoop/hdfs/server/namenode/ src/java/org/apache/hadoop/hdfs/tools/ src/test/hdfs/org/apache/hadoop/hdfs/ src/test/hdfs/org/apache/hadoop/hdfs...

Author: shv
Date: Sat Sep 11 21:53:07 2010
New Revision: 996213

URL: http://svn.apache.org/viewvc?rev=996213&view=rev
Log:
HDFS-1111. Introduce getCorruptFileBlocks() for fsck. Contributed by Sriram Rao.

Added:
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java   (with props)
Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
    hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Sat Sep 11 21:53:07 2010
@@ -124,6 +124,8 @@ Trunk (unreleased changes)
     HDFS-1205. FSDatasetAsyncDiskService should name its threads.
     (Todd Lipcon via eli)
 
+    HDFS-1111. Introduce getCorruptFileBlocks() for fsck. (Sriram Rao via shv)
+
   OPTIMIZATIONS
 
     HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv)

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java Sat Sep 11 21:53:07 2010
@@ -26,7 +26,6 @@ import org.apache.hadoop.classification.
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
@@ -68,9 +67,9 @@ public interface ClientProtocol extends 
    * Compared to the previous version the following changes have been introduced:
    * (Only the latest change is reflected.
    * The log of historical changes can be retrieved from the svn).
-   * 63: remove getBlockLocations optimization
+   * 64: Remove ClientProtocol changes related to -list-corruptfiles
    */
-  public static final long versionID = 63L;
+  public static final long versionID = 64L;
   
   ///////////////////////////////////////
   // File contents
@@ -665,14 +664,6 @@ public interface ClientProtocol extends 
    * @throws IOException
    */
   public void metaSave(String filename) throws IOException;
-
-  /**
-   * @return Array of FileStatus objects referring to corrupted files.
-   *         The server could return all or a few of the files that are corrupt.
-   * @throws AccessControlException
-   * @throws IOException
-   */
-  FileStatus[] getCorruptFiles() throws AccessControlException, IOException; 
   
   /**
    * Get the file info for a specific file or directory.

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java Sat Sep 11 21:53:07 2010
@@ -24,7 +24,6 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -1724,27 +1723,13 @@ public class BlockManager {
                                    Long startingBlockId) {
     return corruptReplicas.getCorruptReplicaBlockIds(numExpectedBlocks,
                                                      startingBlockId);
-  }  
-  
+  }
+
   /**
-   * @return inodes of files with corrupt blocks, with a maximum of 
-   * MAX_CORRUPT_FILES_RETURNED inodes listed in total
+   * Return an iterator over the set of blocks for which there are no replicas.
    */
-  INode[] getCorruptInodes() {
-    LinkedHashSet<INode> set = new LinkedHashSet<INode>();
-
-    for (Block blk : 
-            neededReplications.getQueue(
-                UnderReplicatedBlocks.QUEUE_WITH_CORRUPT_BLOCKS)){
-      INode inode = blocksMap.getINode(blk);
-      if (inode != null && countNodes(blk).liveReplicas() == 0) {
-        set.add(inode);
-        if (set.size() >= this.maxCorruptFilesReturned) {
-          break;  
-        }
-      } 
-    }
-    return set.toArray(new INode[set.size()]);
+  BlockIterator getCorruptReplicaBlockIterator() {
+    return neededReplications
+        .iterator(UnderReplicatedBlocks.QUEUE_WITH_CORRUPT_BLOCKS);
   }
-  
 }

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Sat Sep 11 21:53:07 2010
@@ -52,6 +52,7 @@ import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.net.ScriptBasedMapping;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
+import org.apache.hadoop.hdfs.server.namenode.UnderReplicatedBlocks.BlockIterator;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@@ -67,7 +68,6 @@ import org.apache.hadoop.hdfs.DFSConfigK
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.InvalidPathException;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
@@ -88,8 +88,8 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.io.DataOutputStream;
+import java.io.PrintWriter;
 import java.lang.management.ManagementFactory;
 import java.net.InetAddress;
 import java.net.URI;
@@ -162,6 +162,7 @@ public class FSNamesystem implements FSC
   public static final Log auditLog = LogFactory.getLog(
       FSNamesystem.class.getName() + ".audit");
 
+  static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
   static int BLOCK_DELETION_INCREMENT = 1000;
   private boolean isPermissionEnabled;
   private UserGroupInformation fsOwner;
@@ -4464,37 +4465,57 @@ public class FSNamesystem implements FSC
     return blockManager.getCorruptReplicaBlockIds(numExpectedBlocks,
                                                   startingBlockId);
   }
-
+  
+  static class CorruptFileBlockInfo {
+    String path;
+    Block block;
+    
+    public CorruptFileBlockInfo(String p, Block b) {
+      path = p;
+      block = b;
+    }
+    
+    public String toString() {
+      return block.getBlockName() + "\t" + path;
+    }
+  }
   /**
-   * @return Array of FileStatus objects representing files with 
-   * corrupted blocks.
+   * @param path Restrict corrupt files to this portion of namespace.
+   * @param startBlockAfter Support for continuation; the set of files we return
+   *  back is ordered by blockid; startBlockAfter tells where to start from
+   * @return a list in which each entry describes a corrupt file/block
    * @throws AccessControlException
    * @throws IOException
    */
-  synchronized FileStatus[] getCorruptFiles() 
-    throws AccessControlException, IOException {
-    
+  synchronized Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
+      String startBlockAfter) throws AccessControlException, IOException {
+
     checkSuperuserPrivilege();
-    
-    INode[] inodes = blockManager.getCorruptInodes();
-    FileStatus[] ret = new FileStatus[inodes.length];
-    
-    int i = 0;
-    for (INode inode: inodes) {
-      String src = inode.getFullPathName();
-      ret[i++] = new FileStatus(inode.computeContentSummary().getLength(), 
-          inode.isDirectory(), 
-          ((INodeFile)inode).getReplication(), 
-          ((INodeFile)inode).getPreferredBlockSize(),
-          inode.getModificationTime(),
-          inode.getAccessTime(),
-          inode.getFsPermission(),
-          inode.getUserName(),
-          inode.getGroupName(),
-          new Path(src));
+    long startBlockId = 0;
+    // print a limited # of corrupt files per call
+    int count = 0;
+    ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
+    
+    if (startBlockAfter != null) {
+      startBlockId = Block.filename2id(startBlockAfter);
+    }
+    BlockIterator blkIterator = blockManager.getCorruptReplicaBlockIterator();
+    while (blkIterator.hasNext()) {
+      Block blk = blkIterator.next();
+      INode inode = blockManager.getINode(blk);
+      if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
+        String src = FSDirectory.getFullPathName(inode);
+        if (((startBlockAfter == null) || (blk.getBlockId() > startBlockId))
+            && (src.startsWith(path))) {
+          corruptFiles.add(new CorruptFileBlockInfo(src, blk));
+          count++;
+          if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
+            break;
+        }
+      }
     }
-
-    return ret;
+    LOG.info("list corrupt file blocks returned: " + count);
+    return corruptFiles;
   }
   
   public synchronized ArrayList<DatanodeDescriptor> getDecommissioningNodes() {

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Sat Sep 11 21:53:07 2010
@@ -33,7 +33,6 @@ import org.apache.hadoop.classification.
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.Options;
@@ -1118,12 +1117,20 @@ public class NameNode implements Namenod
     namesystem.metaSave(filename);
   }
 
-  /** {@inheritDoc} */
-  public FileStatus[] getCorruptFiles() 
-    throws AccessControlException, IOException {
-    
-    return namesystem.getCorruptFiles();
-    
+  /**
+   * 
+   * @param path
+   *          Sub-tree used in querying corrupt files
+   * @param startBlockAfter
+   *          Paging support---pass in the last block returned from the previous
+   *          call and some # of corrupt blocks after that point are returned
+   * @return a list in which each entry describes a corrupt file/block
+   * @throws AccessControlException
+   * @throws IOException
+   */
+  public Collection<FSNamesystem.CorruptFileBlockInfo> listCorruptFileBlocks(String path,
+      String startBlockAfter) throws AccessControlException, IOException {
+    return namesystem.listCorruptFileBlocks(path, startBlockAfter);
   }
   
   /** {@inheritDoc} */

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java Sat Sep 11 21:53:07 2010
@@ -24,6 +24,7 @@ import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@@ -35,8 +36,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -105,9 +104,14 @@ public class NamenodeFsck {
   private boolean showBlocks = false;
   private boolean showLocations = false;
   private boolean showRacks = false;
-  private boolean showCorruptFiles = false;
+  private boolean showCorruptFileBlocks = false;
   private int fixing = FIXING_NONE;
   private String path = "/";
+
+  // We return back N files that are corrupt; the list of files returned is
+  // ordered by block id; to allow continuation support, pass in the last block
+  // # from previous call
+  private String startBlockAfter = null;
   
   private final Configuration conf;
   private final PrintWriter out;
@@ -145,7 +149,12 @@ public class NamenodeFsck {
       else if (key.equals("locations")) { this.showLocations = true; }
       else if (key.equals("racks")) { this.showRacks = true; }
       else if (key.equals("openforwrite")) {this.showOpenFiles = true; }
-      else if (key.equals("corruptfiles")) {this.showCorruptFiles = true; }
+      else if (key.equals("listcorruptfileblocks")) {
+        this.showCorruptFileBlocks = true;
+      }
+      else if (key.equals("startblockafter")) {
+        this.startBlockAfter = pmap.get("startblockafter")[0]; 
+      }
     }
   }
   
@@ -164,8 +173,8 @@ public class NamenodeFsck {
       final HdfsFileStatus file = namenode.getFileInfo(path);
       if (file != null) {
 
-        if (showCorruptFiles) {
-          listCorruptFiles();
+        if (showCorruptFileBlocks) {
+          listCorruptFileBlocks();
           return;
         }
         
@@ -205,53 +214,25 @@ public class NamenodeFsck {
     }
   }
  
-  static String buildSummaryResultForListCorruptFiles(int corruptFilesCount,
-      String pathName) {
-
-    String summary = "";
-
-    if (corruptFilesCount == 0) {
-      summary = "Unable to locate any corrupt files under '" + pathName
-          + "'.\n\nPlease run a complete fsck to confirm if '" + pathName
-          + "' " + HEALTHY_STATUS;
-    } else if (corruptFilesCount == 1) {
-      summary = "There is at least 1 corrupt file under '" + pathName
-          + "', which " + CORRUPT_STATUS;
-    } else if (corruptFilesCount > 1) {
-      summary = "There are at least " + corruptFilesCount
-          + " corrupt files under '" + pathName + "', which " + CORRUPT_STATUS;
+  private void listCorruptFileBlocks() throws AccessControlException,
+      IOException {
+    Collection<FSNamesystem.CorruptFileBlockInfo> corruptFiles = namenode
+        .listCorruptFileBlocks(path, startBlockAfter);
+    int numCorruptFiles = corruptFiles.size();
+    String filler;
+    if (numCorruptFiles > 0) {
+      filler = Integer.toString(numCorruptFiles);
+    } else if (startBlockAfter == null) {
+      filler = "no";
     } else {
-      throw new IllegalArgumentException("corruptFilesCount must be positive");
+      filler = "no more";
     }
-
-    return summary;
-  }
-
-  private void listCorruptFiles() throws AccessControlException, IOException {
-    int matchedCorruptFilesCount = 0;
-    // directory representation of path
-    String pathdir = path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR;
-    FileStatus[] corruptFileStatuses = namenode.getCorruptFiles();
-
-    for (FileStatus fileStatus : corruptFileStatuses) {
-      String currentPath = fileStatus.getPath().toString();
-      if (currentPath.startsWith(pathdir) || currentPath.equals(path)) {
-        matchedCorruptFilesCount++;
-        
-        // print the header before listing first item
-        if (matchedCorruptFilesCount == 1 ) {
-          out.println("Here are a few files that may be corrupted:");
-          out.println("===========================================");
-        }
-        
-        out.println(currentPath);
-      }
+    for (FSNamesystem.CorruptFileBlockInfo c : corruptFiles) {
+      out.println(c.toString());
     }
-
+    out.println("\n\nThe filesystem under path '" + path + "' has " + filler
+        + " CORRUPT files");
     out.println();
-    out.println(buildSummaryResultForListCorruptFiles(matchedCorruptFilesCount,
-        path));
-
   }
   
   private void check(String parent, HdfsFileStatus file, Result res) throws IOException {

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java Sat Sep 11 21:53:07 2010
@@ -195,52 +195,66 @@ class UnderReplicatedBlocks implements I
     }
   }
 
-  /* returns an interator of all blocks in a given priority queue */
-  public synchronized Iterable<Block> getQueue(int priority) {
-    if (priority < 0 || priority >= LEVEL) {
-      return null;
-    }
-    return priorityQueues.get(priority);
+  /* returns an iterator of all blocks in a given priority queue */
+  synchronized BlockIterator iterator(int level) {
+    return new BlockIterator(level);
   }
-  
+    
   /* return an iterator of all the under replication blocks */
   public synchronized BlockIterator iterator() {
     return new BlockIterator();
   }
   
-    class BlockIterator implements Iterator<Block> {
-      private int level;
-      private List<Iterator<Block>> iterators = new ArrayList<Iterator<Block>>();
-      BlockIterator()  
-      {
-        level=0;
-        for(int i=0; i<LEVEL; i++) {
-          iterators.add(priorityQueues.get(i).iterator());
-        }
-      }
-              
-      private void update() {
-        while(level< LEVEL-1 && !iterators.get(level).hasNext()) {
-          level++;
-        }
-      }
-              
-      public Block next() {
-        update();
-        return iterators.get(level).next();
+  class BlockIterator implements Iterator<Block> {
+    private int level;
+    private boolean isIteratorForLevel = false;
+    private List<Iterator<Block>> iterators = new ArrayList<Iterator<Block>>();
+
+    BlockIterator()  
+    {
+      level=0;
+      for(int i=0; i<LEVEL; i++) {
+        iterators.add(priorityQueues.get(i).iterator());
       }
-              
-      public boolean hasNext() {
-        update();
-        return iterators.get(level).hasNext();
+    }
+
+    BlockIterator(int l) {
+      level = l;
+      isIteratorForLevel = true;
+      iterators.add(priorityQueues.get(level).iterator());
+    }
+
+    private void update() {
+      if (isIteratorForLevel)
+        return;
+      while(level< LEVEL-1 && !iterators.get(level).hasNext()) {
+        level++;
       }
-              
-      public void remove() {
+    }
+
+    public Block next() {
+      if (isIteratorForLevel)
+        return iterators.get(0).next();
+      update();
+      return iterators.get(level).next();
+    }
+
+    public boolean hasNext() {
+      if (isIteratorForLevel)
+        return iterators.get(0).hasNext();
+      update();
+      return iterators.get(level).hasNext();
+    }
+
+    public void remove() {
+      if (isIteratorForLevel) 
+        iterators.get(0).remove();
+      else
         iterators.get(level).remove();
-      }
-      
-      public int getPriority() {
-        return level;
+    }
+
+    public int getPriority() {
+      return level;
     };
-  }
+  }  
 }

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java Sat Sep 11 21:53:07 2010
@@ -31,7 +31,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.security.Krb5AndCertsSslSocketConnector;
 import org.apache.hadoop.security.SecurityUtil;
@@ -85,14 +84,16 @@ public class DFSck extends Configured im
    * Print fsck usage information
    */
   static void printUsage() {
-    System.err.println("Usage: DFSck <path> [-list-corruptfiles | [-move | -delete | -openforwrite ] [-files [-blocks [-locations | -racks]]]] ");
+    System.err.println("Usage: DFSck <path> [-list-corruptfileblocks | " +
+        "[-move | -delete | -openforwrite] " +
+        "[-files [-blocks [-locations | -racks]]]]");
     System.err.println("\t<path>\tstart checking from this path");
     System.err.println("\t-move\tmove corrupted files to /lost+found");
     System.err.println("\t-delete\tdelete corrupted files");
     System.err.println("\t-files\tprint out files being checked");
     System.err.println("\t-openforwrite\tprint out files opened for write");
-    System.err.println("\t-list-corruptfiles\tprint out corrupt files up to a "+
-        "maximum defined by property dfs.corruptfilesreturned.max");
+    System.err.println("\t-list-corruptfileblocks\tprint out list of missing "
+        + "blocks and files they belong to");
     System.err.println("\t-blocks\tprint out block report");
     System.err.println("\t-locations\tprint out locations for every block");
     System.err.println("\t-racks\tprint out network topology for data-node locations");
@@ -123,6 +124,67 @@ public class DFSck extends Configured im
       throw new IOException(e);
     }
   }
+  
+  /*
+   * To get the list, we need to call iteratively until the server says
+   * there is no more left.
+   */
+  private Integer listCorruptFileBlocks(String dir, String baseUrl)
+      throws IOException {
+    int errCode = -1;
+    int numCorrupt = 0;
+    String lastBlock = null;
+    final String noCorruptLine = "has no CORRUPT files";
+    final String noMoreCorruptLine = "has no more CORRUPT files";
+    boolean allDone = false;
+    while (!allDone) {
+      final StringBuffer url = new StringBuffer(baseUrl);
+      if (lastBlock != null) {
+        url.append("&startblockafter=").append(lastBlock);
+      }
+      URL path = new URL(url.toString());
+      SecurityUtil.fetchServiceTicket(path);
+      URLConnection connection = path.openConnection();
+      InputStream stream = connection.getInputStream();
+      BufferedReader input = new BufferedReader(new InputStreamReader(
+          stream, "UTF-8"));
+      try {
+        String line = null;
+        while ((line = input.readLine()) != null) {
+          if ((line.endsWith(noCorruptLine)) || 
+              (line.endsWith(noMoreCorruptLine)) ||
+              (line.endsWith(NamenodeFsck.NONEXISTENT_STATUS))) {
+            allDone = true;
+            break;
+          }
+          if ((line.isEmpty())
+              || (line.startsWith("FSCK started by")) 
+              || (line.startsWith("The filesystem under path")))
+            continue;
+          numCorrupt++;
+          if (numCorrupt == 1) {
+            System.out.println("The list of corrupt files under path '" 
+                + dir + "' are:");
+          }
+          System.out.println(line);
+          try {
+            // Get the block # that we need to send in next call
+            lastBlock = line.split("\t")[0];
+          } catch (Exception e) {
+            allDone = true;
+            break;
+          }
+        }
+      } finally {
+        input.close();
+      }
+    }
+    System.out.println("The filesystem under path '" + dir + "' has " 
+        + numCorrupt + " CORRUPT files");
+    if (numCorrupt == 0)
+      errCode = 0;
+    return errCode;
+  }
             
   private int doWork(final String[] args) throws IOException {
     String proto = "http://";
@@ -141,15 +203,22 @@ public class DFSck extends Configured im
       if (!args[idx].startsWith("-")) { dir = args[idx]; break; }
     }
     url.append(URLEncoder.encode(dir, "UTF-8"));
+    boolean doListCorruptFileBlocks = false;
     for (int idx = 0; idx < args.length; idx++) {
       if (args[idx].equals("-move")) { url.append("&move=1"); }
       else if (args[idx].equals("-delete")) { url.append("&delete=1"); }
       else if (args[idx].equals("-files")) { url.append("&files=1"); }
       else if (args[idx].equals("-openforwrite")) { url.append("&openforwrite=1"); }
-      else if (args[idx].equals("-list-corruptfiles")) { url.append("&corruptfiles=1"); }
       else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); }
       else if (args[idx].equals("-locations")) { url.append("&locations=1"); }
       else if (args[idx].equals("-racks")) { url.append("&racks=1"); }
+      else if (args[idx].equals("-list-corruptfileblocks")) {
+        url.append("&listcorruptfileblocks=1");
+        doListCorruptFileBlocks = true;
+      }
+    }
+    if (doListCorruptFileBlocks) {
+      return listCorruptFileBlocks(dir, url.toString());
     }
     URL path = new URL(url.toString());
     SecurityUtil.fetchServiceTicket(path);

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java Sat Sep 11 21:53:07 2010
@@ -22,12 +22,7 @@ import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
 import java.util.ArrayList;
-import java.util.Random;
 
 import junit.framework.TestCase;
 
@@ -35,11 +30,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ChecksumException;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.Block;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -188,172 +181,4 @@ public class TestFileCorruption extends 
     }
     return new Block(blockId, blocks[idx].length(), blockTimeStamp);
   }
-
-  /** check if ClientProtocol.getCorruptFiles() returns a file that has missing blocks */
-  public void testCorruptFilesMissingBlock() throws Exception {
-    MiniDFSCluster cluster = null;
-    try {
-      Configuration conf = new HdfsConfiguration();
-      conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
-      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
-      cluster = new MiniDFSCluster(conf, 1, true, null);
-      FileSystem fs = cluster.getFileSystem();
-
-      // create two files with one block each
-      DFSTestUtil util = new DFSTestUtil("testCorruptFilesMissingBlock", 2, 1, 512);
-      util.createFiles(fs, "/srcdat");
-
-      // verify that there are no bad blocks.
-      ClientProtocol namenode = DFSClient.createNamenode(conf);
-      FileStatus[] badFiles = namenode.getCorruptFiles();
-      assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting none.",
-          badFiles.length == 0);
-
-      // Now deliberately remove one block
-      File data_dir = new File(System.getProperty("test.build.data"),
-      "dfs/data/data1/current/finalized");
-      assertTrue("data directory does not exist", data_dir.exists());
-      File[] blocks = data_dir.listFiles();
-      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
-      for (int idx = 0; idx < blocks.length; idx++) {
-        if (!blocks[idx].getName().startsWith("blk_")) {
-          continue;
-        }
-        LOG.info("Deliberately removing file "+blocks[idx].getName());
-        assertTrue("Cannot remove file.", blocks[idx].delete());
-        break;
-      }
-
-      badFiles = namenode.getCorruptFiles();
-      while (badFiles.length == 0) {
-        Thread.sleep(1000);
-        badFiles = namenode.getCorruptFiles();
-      }
-      LOG.info("Namenode has bad files. " + badFiles.length);
-      assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 1.",
-          badFiles.length == 1);
-      util.cleanup(fs, "/srcdat");
-    } finally {
-      if (cluster != null) { cluster.shutdown(); }
-    }
-  }
-
-  /** check if ClientProtocol.getCorruptFiles() returns the right limit */
-  public void testMaxCorruptFiles() throws Exception {
-    MiniDFSCluster cluster = null;
-    try {
-      Configuration conf = new HdfsConfiguration();
-      conf.setInt("dfs.corruptfilesreturned.max", 2);
-      conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
-      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
-      cluster = new MiniDFSCluster(conf, 1, true, null);
-      FileSystem fs = cluster.getFileSystem();
-
-      // create two files with one block each
-      DFSTestUtil util = new DFSTestUtil("testMaxCorruptFiles", 4, 1, 512);
-      util.createFiles(fs, "/srcdat2");
-
-      // verify that there are no bad blocks.
-      ClientProtocol namenode = DFSClient.createNamenode(conf);
-      FileStatus[] badFiles = namenode.getCorruptFiles();
-      assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting none.",
-          badFiles.length == 0);
-
-      // Now deliberately remove one block
-      File data_dir = new File(System.getProperty("test.build.data"),
-      "dfs/data/data1/current/finalized");
-      assertTrue("data directory does not exist", data_dir.exists());
-      File[] blocks = data_dir.listFiles();
-      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
-      for (int idx = 0; idx < blocks.length; idx++) {
-        if (!blocks[idx].getName().startsWith("blk_")) {
-          continue;
-        }
-        LOG.info("Deliberately removing file "+blocks[idx].getName());
-        assertTrue("Cannot remove file.", blocks[idx].delete());
-      }
-
-      badFiles = namenode.getCorruptFiles();
-      while (badFiles.length < 2) {
-        badFiles = namenode.getCorruptFiles();
-        Thread.sleep(10000);
-      }
-      badFiles = namenode.getCorruptFiles(); // once more since time has passed
-      LOG.info("Namenode has bad files. " + badFiles.length);
-      assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 2.",
-          badFiles.length == 2);
-      util.cleanup(fs, "/srcdat2");
-    } finally {
-      if (cluster != null) { cluster.shutdown(); }
-    }
-  }
-
-  /** check if ClientProtocol.getCorruptFiles() returns a file that has corrupted blocks */
-  public void testCorruptFilesCorruptedBlock() throws Exception {
-    MiniDFSCluster cluster = null;
-    Random random = new Random();
-    
-    try {
-      Configuration conf = new HdfsConfiguration();
-      conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
-      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
-      cluster = new MiniDFSCluster(conf, 1, true, null);
-      FileSystem fs = cluster.getFileSystem();
-
-      // create two files with one block each
-      DFSTestUtil util = new DFSTestUtil("testCorruptFilesCorruptedBlock", 2, 1, 512);
-      util.createFiles(fs, "/srcdat10");
-
-      // fetch bad file list from namenode. There should be none.
-      ClientProtocol namenode = DFSClient.createNamenode(conf);
-      FileStatus[] badFiles = namenode.getCorruptFiles();
-      assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting None.",
-          badFiles.length == 0);
-
-      // Now deliberately corrupt one block
-      File data_dir = new File(System.getProperty("test.build.data"),
-      "dfs/data/data1/current/finalized");
-      assertTrue("data directory does not exist", data_dir.exists());
-      File[] blocks = data_dir.listFiles();
-      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
-      for (int idx = 0; idx < blocks.length; idx++) {
-        if (blocks[idx].getName().startsWith("blk_") &&
-            blocks[idx].getName().endsWith(".meta")) {
-          //
-          // shorten .meta file
-          //
-          RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
-          FileChannel channel = file.getChannel();
-          long position = channel.size() - 2;
-          int length = 2;
-          byte[] buffer = new byte[length];
-          random.nextBytes(buffer);
-          channel.write(ByteBuffer.wrap(buffer), position);
-          file.close();
-          LOG.info("Deliberately corrupting file " + blocks[idx].getName() +
-              " at offset " + position + " length " + length);
-
-          // read all files to trigger detection of corrupted replica
-          try {
-            util.checkFiles(fs, "/srcdat10");
-          } catch (BlockMissingException e) {
-            System.out.println("Received BlockMissingException as expected.");
-          } catch (IOException e) {
-            assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " +
-                " but received IOException " + e, false);
-          }
-          break;
-        }
-      }
-
-      // fetch bad file list from namenode. There should be one file.
-      badFiles = namenode.getCorruptFiles();
-      LOG.info("Namenode has bad files. " + badFiles.length);
-      assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 1.",
-          badFiles.length == 1);
-      util.cleanup(fs, "/srcdat10");
-    } finally {
-      if (cluster != null) { cluster.shutdown(); }
-    }
-  }
 }

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java Sat Sep 11 21:53:07 2010
@@ -20,20 +20,18 @@ package org.apache.hadoop.hdfs.server.na
 import static org.junit.Assert.assertTrue;
 
 import java.net.URL;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.TestDatanodeBlockScanner;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.junit.Test;
 
 /** A JUnit test for corrupt_files.jsp */
@@ -66,10 +64,11 @@ public class TestCorruptFilesJsp  {
       }
 
       // verify there are not corrupt files
-      ClientProtocol namenode = DFSClient.createNamenode(conf);
-      FileStatus[] badFiles = namenode.getCorruptFiles();
-      assertTrue("There are " + badFiles.length
-          + " corrupt files, but expecting none", badFiles.length == 0);
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+          .listCorruptFileBlocks("/", null);
+      assertTrue("There are " + badFiles.size()
+          + " corrupt files, but expecting none", badFiles.size() == 0);
 
       // Check if webui agrees
       URL url = new URL("http://"
@@ -95,9 +94,9 @@ public class TestCorruptFilesJsp  {
       }
 
       // verify if all corrupt files were reported to NN
-      badFiles = namenode.getCorruptFiles();
-      assertTrue("Expecting 3 corrupt files, but got " + badFiles.length,
-          badFiles.length == 3);
+      badFiles = namenode.listCorruptFileBlocks("/", null);
+      assertTrue("Expecting 3 corrupt files, but got " + badFiles.size(),
+          badFiles.size() == 3);
 
       // Check if webui agrees
       url = new URL("http://"

Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Sat Sep 11 21:53:07 2010
@@ -28,6 +28,7 @@ import java.io.RandomAccessFile;
 import java.net.InetSocketAddress;
 import java.nio.channels.FileChannel;
 import java.security.PrivilegedExceptionAction;
+import java.util.Collection;
 import java.util.Random;
 import java.util.regex.Pattern;
 
@@ -35,10 +36,7 @@ import junit.framework.TestCase;
 
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.ChecksumException;
-import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -47,8 +45,6 @@ import org.apache.hadoop.hdfs.DFSConfigK
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.TestDatanodeBlockScanner;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.tools.DFSck;
 import org.apache.hadoop.io.IOUtils;
@@ -455,121 +451,68 @@ public class TestFsck extends TestCase {
     }
   }
   
-  /**
-   * Check if NamenodeFsck.buildSummaryResultForListCorruptFiles constructs the
-   * proper string according to the number of corrupt files
-   */
-  public void testbuildResultForListCorruptFile() {
-    assertEquals("Verifying result for zero corrupt files",
-        "Unable to locate any corrupt files under '/'.\n\n"
-            + "Please run a complete fsck to confirm if '/' "
-            + NamenodeFsck.HEALTHY_STATUS, NamenodeFsck
-            .buildSummaryResultForListCorruptFiles(0, "/"));
-
-    assertEquals("Verifying result for one corrupt file",
-        "There is at least 1 corrupt file under '/', which "
-            + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck
-            .buildSummaryResultForListCorruptFiles(1, "/"));
-
-    assertEquals("Verifying result for than one corrupt file",
-        "There are at least 100 corrupt files under '/', which "
-            + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck
-            .buildSummaryResultForListCorruptFiles(100, "/"));
-
-    try {
-      NamenodeFsck.buildSummaryResultForListCorruptFiles(-1, "/");
-      fail("NamenodeFsck.buildSummaryResultForListCorruptFiles should "
-          + "have thrown IllegalArgumentException for non-positive argument");
-    } catch (IllegalArgumentException e) {
-      // expected result
-    }
-  }
-  
   /** check if option -list-corruptfiles of fsck command works properly */
-  public void testCorruptFilesOption() throws Exception {
+  public void testFsckListCorruptFilesBlocks() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setLong("dfs.blockreport.intervalMsec", 1000);
+    conf.setInt("dfs.datanode.directoryscan.interval", 1);
+    FileSystem fs = null;
+
     MiniDFSCluster cluster = null;
     try {
-
-      final int FILE_SIZE = 512;
-      // the files and directories are intentionally prefixes of each other in
-      // order to verify if fsck can distinguish correctly whether the path
-      // supplied by user is a file or a directory
-      Path[] filepaths = { new Path("/audiobook"), new Path("/audio/audio1"),
-          new Path("/audio/audio2"), new Path("/audio/audio") };
-
-      Configuration conf = new HdfsConfiguration();
-      conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans
-                                                             // directories
-      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends
-                                                             // block reports
       cluster = new MiniDFSCluster(conf, 1, true, null);
-      FileSystem fs = cluster.getFileSystem();
-
-      // create files
-      for (Path filepath : filepaths) {
-        DFSTestUtil.createFile(fs, filepath, FILE_SIZE, (short) 1, 0L);
-        DFSTestUtil.waitReplication(fs, filepath, (short) 1);
-      }
-
-      // verify there are not corrupt files
-      ClientProtocol namenode = DFSClient.createNamenode(conf);
-      FileStatus[] badFiles = namenode.getCorruptFiles();
-      assertTrue("There are " + badFiles.length
-          + " corrupt files, but expecting none", badFiles.length == 0);
-
-      // Check if fsck -list-corruptfiles agree
-      String outstr = runFsck(conf, 0, true, "/", "-list-corruptfiles");
-      assertTrue(outstr.contains(NamenodeFsck
-          .buildSummaryResultForListCorruptFiles(0, "/")));
-
-      // Now corrupt all the files except for the last one
-      for (int idx = 0; idx < filepaths.length - 1; idx++) {
-        String blockName = DFSTestUtil.getFirstBlock(fs, filepaths[idx])
-            .getBlockName();
-        TestDatanodeBlockScanner.corruptReplica(blockName, 0);
-
-        // read the file so that the corrupt block is reported to NN
-        FSDataInputStream in = fs.open(filepaths[idx]);
-        try {
-          in.readFully(new byte[FILE_SIZE]);
-        } catch (ChecksumException ignored) { // checksum error is expected.
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil util = new DFSTestUtil("testGetCorruptFiles", 3, 1, 1024);
+      util.createFiles(fs, "/corruptData", (short) 1);
+      util.waitReplication(fs, "/corruptData", (short) 1);
+
+      // String outStr = runFsck(conf, 0, true, "/corruptData", "-list-corruptfileblocks");
+      String outStr = runFsck(conf, 0, false, "/corruptData", "-list-corruptfileblocks");
+      System.out.println("1. good fsck out: " + outStr);
+      assertTrue(outStr.contains("has 0 CORRUPT files"));
+      // delete the blocks
+      File baseDir = new File(System.getProperty("test.build.data",
+      "build/test/data"),"dfs/data");
+      for (int i=0; i<8; i++) {
+        File data_dir = new File(baseDir, "data" +(i+1)+ MiniDFSCluster.FINALIZED_DIR_NAME);
+        File[] blocks = data_dir.listFiles();
+        if (blocks == null)
+          continue;
+
+        for (int idx = 0; idx < blocks.length; idx++) {
+          if (!blocks[idx].getName().startsWith("blk_")) {
+            continue;
+          }
+          assertTrue("Cannot remove file.", blocks[idx].delete());
         }
-        in.close();
       }
 
-      // verify if all corrupt files were reported to NN
-      badFiles = namenode.getCorruptFiles();
-      assertTrue("Expecting 3 corrupt files, but got " + badFiles.length,
-          badFiles.length == 3);
-
-      // check the corrupt file
-      String corruptFile = "/audiobook";
-      outstr = runFsck(conf, 1, true, corruptFile, "-list-corruptfiles");
-      assertTrue(outstr.contains(NamenodeFsck
-          .buildSummaryResultForListCorruptFiles(1, corruptFile)));
-
-      // check corrupt dir
-      String corruptDir = "/audio";
-      outstr = runFsck(conf, 1, true, corruptDir, "-list-corruptfiles");
-      assertTrue(outstr.contains("/audio/audio1"));
-      assertTrue(outstr.contains("/audio/audio2"));
-      assertTrue(outstr.contains(NamenodeFsck
-          .buildSummaryResultForListCorruptFiles(2, corruptDir)));
-
-      // check healthy file
-      String healthyFile = "/audio/audio";
-      outstr = runFsck(conf, 0, true, healthyFile, "-list-corruptfiles");
-      assertTrue(outstr.contains(NamenodeFsck
-          .buildSummaryResultForListCorruptFiles(0, healthyFile)));
-
-      // clean up
-      for (Path filepath : filepaths) {
-        fs.delete(filepath, false);
+      // wait for the namenode to see the corruption
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode
+          .listCorruptFileBlocks("/corruptData", null);
+      int numCorrupt = corruptFileBlocks.size();
+      while (numCorrupt == 0) {
+        Thread.sleep(1000);
+        corruptFileBlocks = namenode
+            .listCorruptFileBlocks("/corruptData", null);
+        numCorrupt = corruptFileBlocks.size();
       }
+      outStr = runFsck(conf, -1, true, "/corruptData", "-list-corruptfileblocks");
+      System.out.println("2. bad fsck out: " + outStr);
+      assertTrue(outStr.contains("has 3 CORRUPT files"));
+
+      // Do a listing on a dir which doesn't have any corrupt blocks and validate
+      util.createFiles(fs, "/goodData");
+      outStr = runFsck(conf, 0, true, "/goodData", "-list-corruptfileblocks");
+      System.out.println("3. good fsck out: " + outStr);
+      assertTrue(outStr.contains("has 0 CORRUPT files"));
+      util.cleanup(fs,"/corruptData");
+      util.cleanup(fs, "/goodData");
     } finally {
-      if (cluster != null) {
-        cluster.shutdown();
-      }
+      if (cluster != null) {cluster.shutdown();}
     }
   }
+
 }

Added: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java?rev=996213&view=auto
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java (added)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java Sat Sep 11 21:53:07 2010
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.Collection;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.BlockMissingException;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+/**
+ * This class tests the listCorruptFileBlocks API.
+ * We create 3 files; intentionally delete their blocks
+ * Use listCorruptFileBlocks to validate that we get the list of corrupt
+ * files/blocks; also test the "paging" support by calling the API
+ * with a block # from a previous call and validate that the subsequent
+ * blocks/files are also returned.
+ */
+public class TestListCorruptFileBlocks extends TestCase {
+  static Log LOG = NameNode.stateChangeLog;
+
+  /** check if nn.getCorruptFiles() returns a file that has corrupted blocks */
+  public void testListCorruptFilesCorruptedBlock() throws Exception {
+    MiniDFSCluster cluster = null;
+    Random random = new Random();
+    
+    try {
+      Configuration conf = new HdfsConfiguration();
+      conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
+      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
+      cluster = new MiniDFSCluster(conf, 1, true, null);
+      FileSystem fs = cluster.getFileSystem();
+
+      // create two files with one block each
+      DFSTestUtil util = new DFSTestUtil("testCorruptFilesCorruptedBlock", 2, 1, 512);
+      util.createFiles(fs, "/srcdat10");
+
+      // fetch bad file list from namenode. There should be none.
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+          .listCorruptFileBlocks("/", null);
+      assertTrue("Namenode has " + badFiles.size()
+          + " corrupt files. Expecting None.", badFiles.size() == 0);
+
+      // Now deliberately corrupt one block
+      File data_dir = new File(System.getProperty("test.build.data"),
+      "dfs/data/data1/current/finalized");
+      assertTrue("data directory does not exist", data_dir.exists());
+      File[] blocks = data_dir.listFiles();
+      assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
+      for (int idx = 0; idx < blocks.length; idx++) {
+        if (blocks[idx].getName().startsWith("blk_") &&
+            blocks[idx].getName().endsWith(".meta")) {
+          //
+          // shorten .meta file
+          //
+          RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
+          FileChannel channel = file.getChannel();
+          long position = channel.size() - 2;
+          int length = 2;
+          byte[] buffer = new byte[length];
+          random.nextBytes(buffer);
+          channel.write(ByteBuffer.wrap(buffer), position);
+          file.close();
+          LOG.info("Deliberately corrupting file " + blocks[idx].getName() +
+              " at offset " + position + " length " + length);
+
+          // read all files to trigger detection of corrupted replica
+          try {
+            util.checkFiles(fs, "/srcdat10");
+          } catch (BlockMissingException e) {
+            System.out.println("Received BlockMissingException as expected.");
+          } catch (IOException e) {
+            assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " +
+                " but received IOException " + e, false);
+          }
+          break;
+        }
+      }
+
+      // fetch bad file list from namenode. There should be one file.
+      badFiles = namenode.listCorruptFileBlocks("/", null);
+      LOG.info("Namenode has bad files. " + badFiles.size());
+      assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.",
+          badFiles.size() == 1);
+      util.cleanup(fs, "/srcdat10");
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
+  // deliberately remove blocks from a file and validate the list-corrupt-file-blocks API
+  public void testlistCorruptFileBlocks() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setLong("dfs.blockreport.intervalMsec", 1000);
+    conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans
+                                                           // directories
+    FileSystem fs = null;
+
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster(conf, 1, true, null);
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil util = new DFSTestUtil("testGetCorruptFiles", 3, 1, 1024);
+      util.createFiles(fs, "/corruptData");
+
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode
+          .listCorruptFileBlocks("/corruptData", null);
+      int numCorrupt = corruptFileBlocks.size();
+      assertTrue(numCorrupt == 0);
+      // delete the blocks
+      File baseDir = new File(System.getProperty("test.build.data",
+          "build/test/data"), "dfs/data");
+      for (int i = 0; i < 8; i++) {
+        File data_dir = new File(baseDir, "data" + (i + 1)
+            + MiniDFSCluster.FINALIZED_DIR_NAME);
+        File[] blocks = data_dir.listFiles();
+        if (blocks == null)
+          continue;
+        // assertTrue("Blocks do not exist in data-dir", (blocks != null) &&
+        // (blocks.length > 0));
+        for (int idx = 0; idx < blocks.length; idx++) {
+          if (!blocks[idx].getName().startsWith("blk_")) {
+            continue;
+          }
+          LOG.info("Deliberately removing file " + blocks[idx].getName());
+          assertTrue("Cannot remove file.", blocks[idx].delete());
+          // break;
+        }
+      }
+
+      int count = 0;
+      corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData", null);
+      numCorrupt = corruptFileBlocks.size();
+      while (numCorrupt < 3) {
+        Thread.sleep(1000);
+        corruptFileBlocks = namenode
+            .listCorruptFileBlocks("/corruptData", null);
+        numCorrupt = corruptFileBlocks.size();
+        count++;
+        if (count > 30)
+          break;
+      }
+      // Validate we get all the corrupt files
+      LOG.info("Namenode has bad files. " + numCorrupt);
+      assertTrue(numCorrupt == 3);
+      // test the paging here
+
+      FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks
+          .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
+      // now get the 2nd and 3rd file that is corrupt
+      Collection<FSNamesystem.CorruptFileBlockInfo> nextCorruptFileBlocks = namenode
+          .listCorruptFileBlocks("/corruptData", cfb[0].block.getBlockName());
+      FSNamesystem.CorruptFileBlockInfo[] ncfb = nextCorruptFileBlocks
+          .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
+      numCorrupt = nextCorruptFileBlocks.size();
+      assertTrue(numCorrupt == 2);
+      assertTrue(ncfb[0].block.getBlockName()
+          .equalsIgnoreCase(cfb[1].block.getBlockName()));
+
+      corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData",
+          ncfb[1].block.getBlockName());
+      numCorrupt = corruptFileBlocks.size();
+      assertTrue(numCorrupt == 0);
+      // Do a listing on a dir which doesn't have any corrupt blocks and
+      // validate
+      util.createFiles(fs, "/goodData");
+      corruptFileBlocks = namenode.listCorruptFileBlocks("/goodData", null);
+      numCorrupt = corruptFileBlocks.size();
+      assertTrue(numCorrupt == 0);
+      util.cleanup(fs, "/corruptData");
+      util.cleanup(fs, "/goodData");
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+  
+  /** check if NN.listCorruptFiles() returns the right limit */
+  public void testMaxCorruptFiles() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new HdfsConfiguration();
+      conf.setInt("dfs.datanode.directoryscan.interval", 15); // datanode scans directories
+      conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
+      cluster = new MiniDFSCluster(conf, 1, true, null);
+      FileSystem fs = cluster.getFileSystem();
+      final int maxCorruptFileBlocks = 
+        FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;
+
+      // create 110 files with one block each
+      DFSTestUtil util = new DFSTestUtil("testMaxCorruptFiles", 
+          maxCorruptFileBlocks * 3, 1, 512);
+      util.createFiles(fs, "/srcdat2", (short) 1);
+      util.waitReplication(fs, "/srcdat2", (short) 1);
+
+      // verify that there are no bad blocks.
+      final NameNode namenode = cluster.getNameNode();
+      Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+          .listCorruptFileBlocks("/srcdat2", null);
+      assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.",
+          badFiles.size() == 0);
+
+      // Now deliberately blocks from all files
+      File baseDir = new File(System.getProperty("test.build.data",
+      "build/test/data"),"dfs/data");
+      for (int i=0; i<8; i++) {
+        File data_dir = new File(baseDir, "data" +(i+1)+ MiniDFSCluster.FINALIZED_DIR_NAME);
+        File[] blocks = data_dir.listFiles();
+        if (blocks == null)
+          continue;
+
+        for (int idx = 0; idx < blocks.length; idx++) {
+          if (!blocks[idx].getName().startsWith("blk_")) {
+            continue;
+          }
+          assertTrue("Cannot remove file.", blocks[idx].delete());
+        }
+      }
+
+      badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+        
+       while (badFiles.size() < maxCorruptFileBlocks) {
+        LOG.info("# of corrupt files is: " + badFiles.size());
+        Thread.sleep(10000);
+        badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+      }
+      badFiles = namenode.listCorruptFileBlocks("/srcdat2", null); 
+      LOG.info("Namenode has bad files. " + badFiles.size());
+      assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + 
+          maxCorruptFileBlocks + ".",
+          badFiles.size() == maxCorruptFileBlocks);
+      util.cleanup(fs, "/srcdat2");
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+
+}

Propchange: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp (original)
+++ hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp Sat Sep 11 21:53:07 2010
@@ -23,6 +23,7 @@
 	import="org.apache.hadoop.fs.FileStatus"
 	import="org.apache.hadoop.fs.FileUtil"
 	import="org.apache.hadoop.fs.Path"
+	import="java.util.Collection"
 	import="java.util.Arrays" %>
 <%!//for java.io.Serializable
   private static final long serialVersionUID = 1L;%>
@@ -32,9 +33,9 @@
   String namenodeRole = nn.getRole().toString();
   String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":"
       + nn.getNameNodeAddress().getPort();
-  FileStatus[] corruptFileStatuses = nn.getCorruptFiles();
-  Path[] corruptFilePaths = FileUtil.stat2Paths(corruptFileStatuses);
-  int corruptFileCount = corruptFileStatuses.length;
+  Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = 
+	nn.listCorruptFileBlocks("/", null);
+  int corruptFileCount = corruptFileBlocks.size();
 %>
 
 <html>
@@ -58,11 +59,10 @@
     Please run fsck for a thorough health analysis.
 <%
   } else {
-    Arrays.sort(corruptFilePaths);
-    for (Path corruptFilePath : corruptFilePaths) {
-      String currentPath = corruptFilePath.toString();
+    for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
+      String currentFileBlock = c.toString();
 %>
-      <%=currentPath%><br>
+      <%=currentFileBlock%><br>
 <%
     }
 %>