You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by sh...@apache.org on 2010/09/11 23:53:08 UTC
svn commit: r996213 - in /hadoop/hdfs/trunk: ./
src/java/org/apache/hadoop/hdfs/protocol/
src/java/org/apache/hadoop/hdfs/server/namenode/
src/java/org/apache/hadoop/hdfs/tools/ src/test/hdfs/org/apache/hadoop/hdfs/
src/test/hdfs/org/apache/hadoop/hdfs...
Author: shv
Date: Sat Sep 11 21:53:07 2010
New Revision: 996213
URL: http://svn.apache.org/viewvc?rev=996213&view=rev
Log:
HDFS-1111. Introduce getCorruptFileBlocks() for fsck. Contributed by Sriram Rao.
Added:
hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java (with props)
Modified:
hadoop/hdfs/trunk/CHANGES.txt
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java
hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java
hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java
hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp
Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Sat Sep 11 21:53:07 2010
@@ -124,6 +124,8 @@ Trunk (unreleased changes)
HDFS-1205. FSDatasetAsyncDiskService should name its threads.
(Todd Lipcon via eli)
+ HDFS-1111. Introduce getCorruptFileBlocks() for fsck. (Sriram Rao via shv)
+
OPTIMIZATIONS
HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv)
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java Sat Sep 11 21:53:07 2010
@@ -26,7 +26,6 @@ import org.apache.hadoop.classification.
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.FileAlreadyExistsException;
@@ -68,9 +67,9 @@ public interface ClientProtocol extends
* Compared to the previous version the following changes have been introduced:
* (Only the latest change is reflected.
* The log of historical changes can be retrieved from the svn).
- * 63: remove getBlockLocations optimization
+ * 64: Remove ClientProtocol changes related to -list-corruptfiles
*/
- public static final long versionID = 63L;
+ public static final long versionID = 64L;
///////////////////////////////////////
// File contents
@@ -665,14 +664,6 @@ public interface ClientProtocol extends
* @throws IOException
*/
public void metaSave(String filename) throws IOException;
-
- /**
- * @return Array of FileStatus objects referring to corrupted files.
- * The server could return all or a few of the files that are corrupt.
- * @throws AccessControlException
- * @throws IOException
- */
- FileStatus[] getCorruptFiles() throws AccessControlException, IOException;
/**
* Get the file info for a specific file or directory.
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java Sat Sep 11 21:53:07 2010
@@ -24,7 +24,6 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -1724,27 +1723,13 @@ public class BlockManager {
Long startingBlockId) {
return corruptReplicas.getCorruptReplicaBlockIds(numExpectedBlocks,
startingBlockId);
- }
-
+ }
+
/**
- * @return inodes of files with corrupt blocks, with a maximum of
- * MAX_CORRUPT_FILES_RETURNED inodes listed in total
+ * Return an iterator over the set of blocks for which there are no replicas.
*/
- INode[] getCorruptInodes() {
- LinkedHashSet<INode> set = new LinkedHashSet<INode>();
-
- for (Block blk :
- neededReplications.getQueue(
- UnderReplicatedBlocks.QUEUE_WITH_CORRUPT_BLOCKS)){
- INode inode = blocksMap.getINode(blk);
- if (inode != null && countNodes(blk).liveReplicas() == 0) {
- set.add(inode);
- if (set.size() >= this.maxCorruptFilesReturned) {
- break;
- }
- }
- }
- return set.toArray(new INode[set.size()]);
+ BlockIterator getCorruptReplicaBlockIterator() {
+ return neededReplications
+ .iterator(UnderReplicatedBlocks.QUEUE_WITH_CORRUPT_BLOCKS);
}
-
}
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Sat Sep 11 21:53:07 2010
@@ -52,6 +52,7 @@ import org.apache.hadoop.net.Node;
import org.apache.hadoop.net.NodeBase;
import org.apache.hadoop.net.ScriptBasedMapping;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
+import org.apache.hadoop.hdfs.server.namenode.UnderReplicatedBlocks.BlockIterator;
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@@ -67,7 +68,6 @@ import org.apache.hadoop.hdfs.DFSConfigK
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FileAlreadyExistsException;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.ParentNotDirectoryException;
@@ -88,8 +88,8 @@ import java.io.File;
import java.io.FileWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.io.PrintWriter;
import java.io.DataOutputStream;
+import java.io.PrintWriter;
import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.URI;
@@ -162,6 +162,7 @@ public class FSNamesystem implements FSC
public static final Log auditLog = LogFactory.getLog(
FSNamesystem.class.getName() + ".audit");
+ static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
static int BLOCK_DELETION_INCREMENT = 1000;
private boolean isPermissionEnabled;
private UserGroupInformation fsOwner;
@@ -4464,37 +4465,57 @@ public class FSNamesystem implements FSC
return blockManager.getCorruptReplicaBlockIds(numExpectedBlocks,
startingBlockId);
}
-
+
+ static class CorruptFileBlockInfo {
+ String path;
+ Block block;
+
+ public CorruptFileBlockInfo(String p, Block b) {
+ path = p;
+ block = b;
+ }
+
+ public String toString() {
+ return block.getBlockName() + "\t" + path;
+ }
+ }
/**
- * @return Array of FileStatus objects representing files with
- * corrupted blocks.
+ * @param path Restrict corrupt files to this portion of namespace.
+ * @param startBlockAfter Support for continuation; the set of files we return
+ * back is ordered by blockid; startBlockAfter tells where to start from
+ * @return a list in which each entry describes a corrupt file/block
* @throws AccessControlException
* @throws IOException
*/
- synchronized FileStatus[] getCorruptFiles()
- throws AccessControlException, IOException {
-
+ synchronized Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
+ String startBlockAfter) throws AccessControlException, IOException {
+
checkSuperuserPrivilege();
-
- INode[] inodes = blockManager.getCorruptInodes();
- FileStatus[] ret = new FileStatus[inodes.length];
-
- int i = 0;
- for (INode inode: inodes) {
- String src = inode.getFullPathName();
- ret[i++] = new FileStatus(inode.computeContentSummary().getLength(),
- inode.isDirectory(),
- ((INodeFile)inode).getReplication(),
- ((INodeFile)inode).getPreferredBlockSize(),
- inode.getModificationTime(),
- inode.getAccessTime(),
- inode.getFsPermission(),
- inode.getUserName(),
- inode.getGroupName(),
- new Path(src));
+ long startBlockId = 0;
+ // print a limited # of corrupt files per call
+ int count = 0;
+ ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
+
+ if (startBlockAfter != null) {
+ startBlockId = Block.filename2id(startBlockAfter);
+ }
+ BlockIterator blkIterator = blockManager.getCorruptReplicaBlockIterator();
+ while (blkIterator.hasNext()) {
+ Block blk = blkIterator.next();
+ INode inode = blockManager.getINode(blk);
+ if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
+ String src = FSDirectory.getFullPathName(inode);
+ if (((startBlockAfter == null) || (blk.getBlockId() > startBlockId))
+ && (src.startsWith(path))) {
+ corruptFiles.add(new CorruptFileBlockInfo(src, blk));
+ count++;
+ if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED)
+ break;
+ }
+ }
}
-
- return ret;
+ LOG.info("list corrupt file blocks returned: " + count);
+ return corruptFiles;
}
public synchronized ArrayList<DatanodeDescriptor> getDecommissioningNodes() {
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Sat Sep 11 21:53:07 2010
@@ -33,7 +33,6 @@ import org.apache.hadoop.classification.
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.Options;
@@ -1118,12 +1117,20 @@ public class NameNode implements Namenod
namesystem.metaSave(filename);
}
- /** {@inheritDoc} */
- public FileStatus[] getCorruptFiles()
- throws AccessControlException, IOException {
-
- return namesystem.getCorruptFiles();
-
+ /**
+ *
+ * @param path
+ * Sub-tree used in querying corrupt files
+ * @param startBlockAfter
+ * Paging support---pass in the last block returned from the previous
+ * call and some # of corrupt blocks after that point are returned
+ * @return a list in which each entry describes a corrupt file/block
+ * @throws AccessControlException
+ * @throws IOException
+ */
+ public Collection<FSNamesystem.CorruptFileBlockInfo> listCorruptFileBlocks(String path,
+ String startBlockAfter) throws AccessControlException, IOException {
+ return namesystem.listCorruptFileBlocks(path, startBlockAfter);
}
/** {@inheritDoc} */
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java Sat Sep 11 21:53:07 2010
@@ -24,6 +24,7 @@ import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
@@ -35,8 +36,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.BlockReader;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.protocol.Block;
@@ -105,9 +104,14 @@ public class NamenodeFsck {
private boolean showBlocks = false;
private boolean showLocations = false;
private boolean showRacks = false;
- private boolean showCorruptFiles = false;
+ private boolean showCorruptFileBlocks = false;
private int fixing = FIXING_NONE;
private String path = "/";
+
+ // We return back N files that are corrupt; the list of files returned is
+ // ordered by block id; to allow continuation support, pass in the last block
+ // # from previous call
+ private String startBlockAfter = null;
private final Configuration conf;
private final PrintWriter out;
@@ -145,7 +149,12 @@ public class NamenodeFsck {
else if (key.equals("locations")) { this.showLocations = true; }
else if (key.equals("racks")) { this.showRacks = true; }
else if (key.equals("openforwrite")) {this.showOpenFiles = true; }
- else if (key.equals("corruptfiles")) {this.showCorruptFiles = true; }
+ else if (key.equals("listcorruptfileblocks")) {
+ this.showCorruptFileBlocks = true;
+ }
+ else if (key.equals("startblockafter")) {
+ this.startBlockAfter = pmap.get("startblockafter")[0];
+ }
}
}
@@ -164,8 +173,8 @@ public class NamenodeFsck {
final HdfsFileStatus file = namenode.getFileInfo(path);
if (file != null) {
- if (showCorruptFiles) {
- listCorruptFiles();
+ if (showCorruptFileBlocks) {
+ listCorruptFileBlocks();
return;
}
@@ -205,53 +214,25 @@ public class NamenodeFsck {
}
}
- static String buildSummaryResultForListCorruptFiles(int corruptFilesCount,
- String pathName) {
-
- String summary = "";
-
- if (corruptFilesCount == 0) {
- summary = "Unable to locate any corrupt files under '" + pathName
- + "'.\n\nPlease run a complete fsck to confirm if '" + pathName
- + "' " + HEALTHY_STATUS;
- } else if (corruptFilesCount == 1) {
- summary = "There is at least 1 corrupt file under '" + pathName
- + "', which " + CORRUPT_STATUS;
- } else if (corruptFilesCount > 1) {
- summary = "There are at least " + corruptFilesCount
- + " corrupt files under '" + pathName + "', which " + CORRUPT_STATUS;
+ private void listCorruptFileBlocks() throws AccessControlException,
+ IOException {
+ Collection<FSNamesystem.CorruptFileBlockInfo> corruptFiles = namenode
+ .listCorruptFileBlocks(path, startBlockAfter);
+ int numCorruptFiles = corruptFiles.size();
+ String filler;
+ if (numCorruptFiles > 0) {
+ filler = Integer.toString(numCorruptFiles);
+ } else if (startBlockAfter == null) {
+ filler = "no";
} else {
- throw new IllegalArgumentException("corruptFilesCount must be positive");
+ filler = "no more";
}
-
- return summary;
- }
-
- private void listCorruptFiles() throws AccessControlException, IOException {
- int matchedCorruptFilesCount = 0;
- // directory representation of path
- String pathdir = path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR;
- FileStatus[] corruptFileStatuses = namenode.getCorruptFiles();
-
- for (FileStatus fileStatus : corruptFileStatuses) {
- String currentPath = fileStatus.getPath().toString();
- if (currentPath.startsWith(pathdir) || currentPath.equals(path)) {
- matchedCorruptFilesCount++;
-
- // print the header before listing first item
- if (matchedCorruptFilesCount == 1 ) {
- out.println("Here are a few files that may be corrupted:");
- out.println("===========================================");
- }
-
- out.println(currentPath);
- }
+ for (FSNamesystem.CorruptFileBlockInfo c : corruptFiles) {
+ out.println(c.toString());
}
-
+ out.println("\n\nThe filesystem under path '" + path + "' has " + filler
+ + " CORRUPT files");
out.println();
- out.println(buildSummaryResultForListCorruptFiles(matchedCorruptFilesCount,
- path));
-
}
private void check(String parent, HdfsFileStatus file, Result res) throws IOException {
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/UnderReplicatedBlocks.java Sat Sep 11 21:53:07 2010
@@ -195,52 +195,66 @@ class UnderReplicatedBlocks implements I
}
}
- /* returns an interator of all blocks in a given priority queue */
- public synchronized Iterable<Block> getQueue(int priority) {
- if (priority < 0 || priority >= LEVEL) {
- return null;
- }
- return priorityQueues.get(priority);
+ /* returns an iterator of all blocks in a given priority queue */
+ synchronized BlockIterator iterator(int level) {
+ return new BlockIterator(level);
}
-
+
/* return an iterator of all the under replication blocks */
public synchronized BlockIterator iterator() {
return new BlockIterator();
}
- class BlockIterator implements Iterator<Block> {
- private int level;
- private List<Iterator<Block>> iterators = new ArrayList<Iterator<Block>>();
- BlockIterator()
- {
- level=0;
- for(int i=0; i<LEVEL; i++) {
- iterators.add(priorityQueues.get(i).iterator());
- }
- }
-
- private void update() {
- while(level< LEVEL-1 && !iterators.get(level).hasNext()) {
- level++;
- }
- }
-
- public Block next() {
- update();
- return iterators.get(level).next();
+ class BlockIterator implements Iterator<Block> {
+ private int level;
+ private boolean isIteratorForLevel = false;
+ private List<Iterator<Block>> iterators = new ArrayList<Iterator<Block>>();
+
+ BlockIterator()
+ {
+ level=0;
+ for(int i=0; i<LEVEL; i++) {
+ iterators.add(priorityQueues.get(i).iterator());
}
-
- public boolean hasNext() {
- update();
- return iterators.get(level).hasNext();
+ }
+
+ BlockIterator(int l) {
+ level = l;
+ isIteratorForLevel = true;
+ iterators.add(priorityQueues.get(level).iterator());
+ }
+
+ private void update() {
+ if (isIteratorForLevel)
+ return;
+ while(level< LEVEL-1 && !iterators.get(level).hasNext()) {
+ level++;
}
-
- public void remove() {
+ }
+
+ public Block next() {
+ if (isIteratorForLevel)
+ return iterators.get(0).next();
+ update();
+ return iterators.get(level).next();
+ }
+
+ public boolean hasNext() {
+ if (isIteratorForLevel)
+ return iterators.get(0).hasNext();
+ update();
+ return iterators.get(level).hasNext();
+ }
+
+ public void remove() {
+ if (isIteratorForLevel)
+ iterators.get(0).remove();
+ else
iterators.get(level).remove();
- }
-
- public int getPriority() {
- return level;
+ }
+
+ public int getPriority() {
+ return level;
};
- }
+ }
}
Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/tools/DFSck.java Sat Sep 11 21:53:07 2010
@@ -31,7 +31,6 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.security.Krb5AndCertsSslSocketConnector;
import org.apache.hadoop.security.SecurityUtil;
@@ -85,14 +84,16 @@ public class DFSck extends Configured im
* Print fsck usage information
*/
static void printUsage() {
- System.err.println("Usage: DFSck <path> [-list-corruptfiles | [-move | -delete | -openforwrite ] [-files [-blocks [-locations | -racks]]]] ");
+ System.err.println("Usage: DFSck <path> [-list-corruptfileblocks | " +
+ "[-move | -delete | -openforwrite] " +
+ "[-files [-blocks [-locations | -racks]]]]");
System.err.println("\t<path>\tstart checking from this path");
System.err.println("\t-move\tmove corrupted files to /lost+found");
System.err.println("\t-delete\tdelete corrupted files");
System.err.println("\t-files\tprint out files being checked");
System.err.println("\t-openforwrite\tprint out files opened for write");
- System.err.println("\t-list-corruptfiles\tprint out corrupt files up to a "+
- "maximum defined by property dfs.corruptfilesreturned.max");
+ System.err.println("\t-list-corruptfileblocks\tprint out list of missing "
+ + "blocks and files they belong to");
System.err.println("\t-blocks\tprint out block report");
System.err.println("\t-locations\tprint out locations for every block");
System.err.println("\t-racks\tprint out network topology for data-node locations");
@@ -123,6 +124,67 @@ public class DFSck extends Configured im
throw new IOException(e);
}
}
+
+ /*
+ * To get the list, we need to call iteratively until the server says
+ * there is no more left.
+ */
+ private Integer listCorruptFileBlocks(String dir, String baseUrl)
+ throws IOException {
+ int errCode = -1;
+ int numCorrupt = 0;
+ String lastBlock = null;
+ final String noCorruptLine = "has no CORRUPT files";
+ final String noMoreCorruptLine = "has no more CORRUPT files";
+ boolean allDone = false;
+ while (!allDone) {
+ final StringBuffer url = new StringBuffer(baseUrl);
+ if (lastBlock != null) {
+ url.append("&startblockafter=").append(lastBlock);
+ }
+ URL path = new URL(url.toString());
+ SecurityUtil.fetchServiceTicket(path);
+ URLConnection connection = path.openConnection();
+ InputStream stream = connection.getInputStream();
+ BufferedReader input = new BufferedReader(new InputStreamReader(
+ stream, "UTF-8"));
+ try {
+ String line = null;
+ while ((line = input.readLine()) != null) {
+ if ((line.endsWith(noCorruptLine)) ||
+ (line.endsWith(noMoreCorruptLine)) ||
+ (line.endsWith(NamenodeFsck.NONEXISTENT_STATUS))) {
+ allDone = true;
+ break;
+ }
+ if ((line.isEmpty())
+ || (line.startsWith("FSCK started by"))
+ || (line.startsWith("The filesystem under path")))
+ continue;
+ numCorrupt++;
+ if (numCorrupt == 1) {
+ System.out.println("The list of corrupt files under path '"
+ + dir + "' are:");
+ }
+ System.out.println(line);
+ try {
+ // Get the block # that we need to send in next call
+ lastBlock = line.split("\t")[0];
+ } catch (Exception e) {
+ allDone = true;
+ break;
+ }
+ }
+ } finally {
+ input.close();
+ }
+ }
+ System.out.println("The filesystem under path '" + dir + "' has "
+ + numCorrupt + " CORRUPT files");
+ if (numCorrupt == 0)
+ errCode = 0;
+ return errCode;
+ }
private int doWork(final String[] args) throws IOException {
String proto = "http://";
@@ -141,15 +203,22 @@ public class DFSck extends Configured im
if (!args[idx].startsWith("-")) { dir = args[idx]; break; }
}
url.append(URLEncoder.encode(dir, "UTF-8"));
+ boolean doListCorruptFileBlocks = false;
for (int idx = 0; idx < args.length; idx++) {
if (args[idx].equals("-move")) { url.append("&move=1"); }
else if (args[idx].equals("-delete")) { url.append("&delete=1"); }
else if (args[idx].equals("-files")) { url.append("&files=1"); }
else if (args[idx].equals("-openforwrite")) { url.append("&openforwrite=1"); }
- else if (args[idx].equals("-list-corruptfiles")) { url.append("&corruptfiles=1"); }
else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); }
else if (args[idx].equals("-locations")) { url.append("&locations=1"); }
else if (args[idx].equals("-racks")) { url.append("&racks=1"); }
+ else if (args[idx].equals("-list-corruptfileblocks")) {
+ url.append("&listcorruptfileblocks=1");
+ doListCorruptFileBlocks = true;
+ }
+ }
+ if (doListCorruptFileBlocks) {
+ return listCorruptFileBlocks(dir, url.toString());
}
URL path = new URL(url.toString());
SecurityUtil.fetchServiceTicket(path);
Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/TestFileCorruption.java Sat Sep 11 21:53:07 2010
@@ -22,12 +22,7 @@ import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
import java.util.ArrayList;
-import java.util.Random;
import junit.framework.TestCase;
@@ -35,11 +30,9 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -188,172 +181,4 @@ public class TestFileCorruption extends
}
return new Block(blockId, blocks[idx].length(), blockTimeStamp);
}
-
- /** check if ClientProtocol.getCorruptFiles() returns a file that has missing blocks */
- public void testCorruptFilesMissingBlock() throws Exception {
- MiniDFSCluster cluster = null;
- try {
- Configuration conf = new HdfsConfiguration();
- conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
- conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
- cluster = new MiniDFSCluster(conf, 1, true, null);
- FileSystem fs = cluster.getFileSystem();
-
- // create two files with one block each
- DFSTestUtil util = new DFSTestUtil("testCorruptFilesMissingBlock", 2, 1, 512);
- util.createFiles(fs, "/srcdat");
-
- // verify that there are no bad blocks.
- ClientProtocol namenode = DFSClient.createNamenode(conf);
- FileStatus[] badFiles = namenode.getCorruptFiles();
- assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting none.",
- badFiles.length == 0);
-
- // Now deliberately remove one block
- File data_dir = new File(System.getProperty("test.build.data"),
- "dfs/data/data1/current/finalized");
- assertTrue("data directory does not exist", data_dir.exists());
- File[] blocks = data_dir.listFiles();
- assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
- for (int idx = 0; idx < blocks.length; idx++) {
- if (!blocks[idx].getName().startsWith("blk_")) {
- continue;
- }
- LOG.info("Deliberately removing file "+blocks[idx].getName());
- assertTrue("Cannot remove file.", blocks[idx].delete());
- break;
- }
-
- badFiles = namenode.getCorruptFiles();
- while (badFiles.length == 0) {
- Thread.sleep(1000);
- badFiles = namenode.getCorruptFiles();
- }
- LOG.info("Namenode has bad files. " + badFiles.length);
- assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 1.",
- badFiles.length == 1);
- util.cleanup(fs, "/srcdat");
- } finally {
- if (cluster != null) { cluster.shutdown(); }
- }
- }
-
- /** check if ClientProtocol.getCorruptFiles() returns the right limit */
- public void testMaxCorruptFiles() throws Exception {
- MiniDFSCluster cluster = null;
- try {
- Configuration conf = new HdfsConfiguration();
- conf.setInt("dfs.corruptfilesreturned.max", 2);
- conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
- conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
- cluster = new MiniDFSCluster(conf, 1, true, null);
- FileSystem fs = cluster.getFileSystem();
-
- // create two files with one block each
- DFSTestUtil util = new DFSTestUtil("testMaxCorruptFiles", 4, 1, 512);
- util.createFiles(fs, "/srcdat2");
-
- // verify that there are no bad blocks.
- ClientProtocol namenode = DFSClient.createNamenode(conf);
- FileStatus[] badFiles = namenode.getCorruptFiles();
- assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting none.",
- badFiles.length == 0);
-
- // Now deliberately remove one block
- File data_dir = new File(System.getProperty("test.build.data"),
- "dfs/data/data1/current/finalized");
- assertTrue("data directory does not exist", data_dir.exists());
- File[] blocks = data_dir.listFiles();
- assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
- for (int idx = 0; idx < blocks.length; idx++) {
- if (!blocks[idx].getName().startsWith("blk_")) {
- continue;
- }
- LOG.info("Deliberately removing file "+blocks[idx].getName());
- assertTrue("Cannot remove file.", blocks[idx].delete());
- }
-
- badFiles = namenode.getCorruptFiles();
- while (badFiles.length < 2) {
- badFiles = namenode.getCorruptFiles();
- Thread.sleep(10000);
- }
- badFiles = namenode.getCorruptFiles(); // once more since time has passed
- LOG.info("Namenode has bad files. " + badFiles.length);
- assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 2.",
- badFiles.length == 2);
- util.cleanup(fs, "/srcdat2");
- } finally {
- if (cluster != null) { cluster.shutdown(); }
- }
- }
-
- /** check if ClientProtocol.getCorruptFiles() returns a file that has corrupted blocks */
- public void testCorruptFilesCorruptedBlock() throws Exception {
- MiniDFSCluster cluster = null;
- Random random = new Random();
-
- try {
- Configuration conf = new HdfsConfiguration();
- conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
- conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
- cluster = new MiniDFSCluster(conf, 1, true, null);
- FileSystem fs = cluster.getFileSystem();
-
- // create two files with one block each
- DFSTestUtil util = new DFSTestUtil("testCorruptFilesCorruptedBlock", 2, 1, 512);
- util.createFiles(fs, "/srcdat10");
-
- // fetch bad file list from namenode. There should be none.
- ClientProtocol namenode = DFSClient.createNamenode(conf);
- FileStatus[] badFiles = namenode.getCorruptFiles();
- assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting None.",
- badFiles.length == 0);
-
- // Now deliberately corrupt one block
- File data_dir = new File(System.getProperty("test.build.data"),
- "dfs/data/data1/current/finalized");
- assertTrue("data directory does not exist", data_dir.exists());
- File[] blocks = data_dir.listFiles();
- assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
- for (int idx = 0; idx < blocks.length; idx++) {
- if (blocks[idx].getName().startsWith("blk_") &&
- blocks[idx].getName().endsWith(".meta")) {
- //
- // shorten .meta file
- //
- RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
- FileChannel channel = file.getChannel();
- long position = channel.size() - 2;
- int length = 2;
- byte[] buffer = new byte[length];
- random.nextBytes(buffer);
- channel.write(ByteBuffer.wrap(buffer), position);
- file.close();
- LOG.info("Deliberately corrupting file " + blocks[idx].getName() +
- " at offset " + position + " length " + length);
-
- // read all files to trigger detection of corrupted replica
- try {
- util.checkFiles(fs, "/srcdat10");
- } catch (BlockMissingException e) {
- System.out.println("Received BlockMissingException as expected.");
- } catch (IOException e) {
- assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " +
- " but received IOException " + e, false);
- }
- break;
- }
- }
-
- // fetch bad file list from namenode. There should be one file.
- badFiles = namenode.getCorruptFiles();
- LOG.info("Namenode has bad files. " + badFiles.length);
- assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 1.",
- badFiles.length == 1);
- util.cleanup(fs, "/srcdat10");
- } finally {
- if (cluster != null) { cluster.shutdown(); }
- }
- }
}
Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCorruptFilesJsp.java Sat Sep 11 21:53:07 2010
@@ -20,20 +20,18 @@ package org.apache.hadoop.hdfs.server.na
import static org.junit.Assert.assertTrue;
import java.net.URL;
+import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.TestDatanodeBlockScanner;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.junit.Test;
/** A JUnit test for corrupt_files.jsp */
@@ -66,10 +64,11 @@ public class TestCorruptFilesJsp {
}
// verify there are not corrupt files
- ClientProtocol namenode = DFSClient.createNamenode(conf);
- FileStatus[] badFiles = namenode.getCorruptFiles();
- assertTrue("There are " + badFiles.length
- + " corrupt files, but expecting none", badFiles.length == 0);
+ final NameNode namenode = cluster.getNameNode();
+ Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+ .listCorruptFileBlocks("/", null);
+ assertTrue("There are " + badFiles.size()
+ + " corrupt files, but expecting none", badFiles.size() == 0);
// Check if webui agrees
URL url = new URL("http://"
@@ -95,9 +94,9 @@ public class TestCorruptFilesJsp {
}
// verify if all corrupt files were reported to NN
- badFiles = namenode.getCorruptFiles();
- assertTrue("Expecting 3 corrupt files, but got " + badFiles.length,
- badFiles.length == 3);
+ badFiles = namenode.listCorruptFileBlocks("/", null);
+ assertTrue("Expecting 3 corrupt files, but got " + badFiles.size(),
+ badFiles.size() == 3);
// Check if webui agrees
url = new URL("http://"
Modified: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Sat Sep 11 21:53:07 2010
@@ -28,6 +28,7 @@ import java.io.RandomAccessFile;
import java.net.InetSocketAddress;
import java.nio.channels.FileChannel;
import java.security.PrivilegedExceptionAction;
+import java.util.Collection;
import java.util.Random;
import java.util.regex.Pattern;
@@ -35,10 +36,7 @@ import junit.framework.TestCase;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.ChecksumException;
-import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
@@ -47,8 +45,6 @@ import org.apache.hadoop.hdfs.DFSConfigK
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.TestDatanodeBlockScanner;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.tools.DFSck;
import org.apache.hadoop.io.IOUtils;
@@ -455,121 +451,68 @@ public class TestFsck extends TestCase {
}
}
- /**
- * Check if NamenodeFsck.buildSummaryResultForListCorruptFiles constructs the
- * proper string according to the number of corrupt files
- */
- public void testbuildResultForListCorruptFile() {
- assertEquals("Verifying result for zero corrupt files",
- "Unable to locate any corrupt files under '/'.\n\n"
- + "Please run a complete fsck to confirm if '/' "
- + NamenodeFsck.HEALTHY_STATUS, NamenodeFsck
- .buildSummaryResultForListCorruptFiles(0, "/"));
-
- assertEquals("Verifying result for one corrupt file",
- "There is at least 1 corrupt file under '/', which "
- + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck
- .buildSummaryResultForListCorruptFiles(1, "/"));
-
- assertEquals("Verifying result for than one corrupt file",
- "There are at least 100 corrupt files under '/', which "
- + NamenodeFsck.CORRUPT_STATUS, NamenodeFsck
- .buildSummaryResultForListCorruptFiles(100, "/"));
-
- try {
- NamenodeFsck.buildSummaryResultForListCorruptFiles(-1, "/");
- fail("NamenodeFsck.buildSummaryResultForListCorruptFiles should "
- + "have thrown IllegalArgumentException for non-positive argument");
- } catch (IllegalArgumentException e) {
- // expected result
- }
- }
-
/** check if option -list-corruptfiles of fsck command works properly */
- public void testCorruptFilesOption() throws Exception {
+ public void testFsckListCorruptFilesBlocks() throws Exception {
+ Configuration conf = new Configuration();
+ conf.setLong("dfs.blockreport.intervalMsec", 1000);
+ conf.setInt("dfs.datanode.directoryscan.interval", 1);
+ FileSystem fs = null;
+
MiniDFSCluster cluster = null;
try {
-
- final int FILE_SIZE = 512;
- // the files and directories are intentionally prefixes of each other in
- // order to verify if fsck can distinguish correctly whether the path
- // supplied by user is a file or a directory
- Path[] filepaths = { new Path("/audiobook"), new Path("/audio/audio1"),
- new Path("/audio/audio2"), new Path("/audio/audio") };
-
- Configuration conf = new HdfsConfiguration();
- conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans
- // directories
- conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends
- // block reports
cluster = new MiniDFSCluster(conf, 1, true, null);
- FileSystem fs = cluster.getFileSystem();
-
- // create files
- for (Path filepath : filepaths) {
- DFSTestUtil.createFile(fs, filepath, FILE_SIZE, (short) 1, 0L);
- DFSTestUtil.waitReplication(fs, filepath, (short) 1);
- }
-
- // verify there are not corrupt files
- ClientProtocol namenode = DFSClient.createNamenode(conf);
- FileStatus[] badFiles = namenode.getCorruptFiles();
- assertTrue("There are " + badFiles.length
- + " corrupt files, but expecting none", badFiles.length == 0);
-
- // Check if fsck -list-corruptfiles agree
- String outstr = runFsck(conf, 0, true, "/", "-list-corruptfiles");
- assertTrue(outstr.contains(NamenodeFsck
- .buildSummaryResultForListCorruptFiles(0, "/")));
-
- // Now corrupt all the files except for the last one
- for (int idx = 0; idx < filepaths.length - 1; idx++) {
- String blockName = DFSTestUtil.getFirstBlock(fs, filepaths[idx])
- .getBlockName();
- TestDatanodeBlockScanner.corruptReplica(blockName, 0);
-
- // read the file so that the corrupt block is reported to NN
- FSDataInputStream in = fs.open(filepaths[idx]);
- try {
- in.readFully(new byte[FILE_SIZE]);
- } catch (ChecksumException ignored) { // checksum error is expected.
+ cluster.waitActive();
+ fs = cluster.getFileSystem();
+ DFSTestUtil util = new DFSTestUtil("testGetCorruptFiles", 3, 1, 1024);
+ util.createFiles(fs, "/corruptData", (short) 1);
+ util.waitReplication(fs, "/corruptData", (short) 1);
+
+ // String outStr = runFsck(conf, 0, true, "/corruptData", "-list-corruptfileblocks");
+ String outStr = runFsck(conf, 0, false, "/corruptData", "-list-corruptfileblocks");
+ System.out.println("1. good fsck out: " + outStr);
+ assertTrue(outStr.contains("has 0 CORRUPT files"));
+ // delete the blocks
+ File baseDir = new File(System.getProperty("test.build.data",
+ "build/test/data"),"dfs/data");
+ for (int i=0; i<8; i++) {
+ File data_dir = new File(baseDir, "data" +(i+1)+ MiniDFSCluster.FINALIZED_DIR_NAME);
+ File[] blocks = data_dir.listFiles();
+ if (blocks == null)
+ continue;
+
+ for (int idx = 0; idx < blocks.length; idx++) {
+ if (!blocks[idx].getName().startsWith("blk_")) {
+ continue;
+ }
+ assertTrue("Cannot remove file.", blocks[idx].delete());
}
- in.close();
}
- // verify if all corrupt files were reported to NN
- badFiles = namenode.getCorruptFiles();
- assertTrue("Expecting 3 corrupt files, but got " + badFiles.length,
- badFiles.length == 3);
-
- // check the corrupt file
- String corruptFile = "/audiobook";
- outstr = runFsck(conf, 1, true, corruptFile, "-list-corruptfiles");
- assertTrue(outstr.contains(NamenodeFsck
- .buildSummaryResultForListCorruptFiles(1, corruptFile)));
-
- // check corrupt dir
- String corruptDir = "/audio";
- outstr = runFsck(conf, 1, true, corruptDir, "-list-corruptfiles");
- assertTrue(outstr.contains("/audio/audio1"));
- assertTrue(outstr.contains("/audio/audio2"));
- assertTrue(outstr.contains(NamenodeFsck
- .buildSummaryResultForListCorruptFiles(2, corruptDir)));
-
- // check healthy file
- String healthyFile = "/audio/audio";
- outstr = runFsck(conf, 0, true, healthyFile, "-list-corruptfiles");
- assertTrue(outstr.contains(NamenodeFsck
- .buildSummaryResultForListCorruptFiles(0, healthyFile)));
-
- // clean up
- for (Path filepath : filepaths) {
- fs.delete(filepath, false);
+ // wait for the namenode to see the corruption
+ final NameNode namenode = cluster.getNameNode();
+ Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode
+ .listCorruptFileBlocks("/corruptData", null);
+ int numCorrupt = corruptFileBlocks.size();
+ while (numCorrupt == 0) {
+ Thread.sleep(1000);
+ corruptFileBlocks = namenode
+ .listCorruptFileBlocks("/corruptData", null);
+ numCorrupt = corruptFileBlocks.size();
}
+ outStr = runFsck(conf, -1, true, "/corruptData", "-list-corruptfileblocks");
+ System.out.println("2. bad fsck out: " + outStr);
+ assertTrue(outStr.contains("has 3 CORRUPT files"));
+
+ // Do a listing on a dir which doesn't have any corrupt blocks and validate
+ util.createFiles(fs, "/goodData");
+ outStr = runFsck(conf, 0, true, "/goodData", "-list-corruptfileblocks");
+ System.out.println("3. good fsck out: " + outStr);
+ assertTrue(outStr.contains("has 0 CORRUPT files"));
+ util.cleanup(fs,"/corruptData");
+ util.cleanup(fs, "/goodData");
} finally {
- if (cluster != null) {
- cluster.shutdown();
- }
+ if (cluster != null) {cluster.shutdown();}
}
}
+
}
Added: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java?rev=996213&view=auto
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java (added)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java Sat Sep 11 21:53:07 2010
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.Collection;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.BlockMissingException;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+/**
+ * This class tests the listCorruptFileBlocks API.
+ * We create 3 files; intentionally delete their blocks
+ * Use listCorruptFileBlocks to validate that we get the list of corrupt
+ * files/blocks; also test the "paging" support by calling the API
+ * with a block # from a previous call and validate that the subsequent
+ * blocks/files are also returned.
+ */
+public class TestListCorruptFileBlocks extends TestCase {
+ static Log LOG = NameNode.stateChangeLog;
+
+ /** check if nn.getCorruptFiles() returns a file that has corrupted blocks */
+ public void testListCorruptFilesCorruptedBlock() throws Exception {
+ MiniDFSCluster cluster = null;
+ Random random = new Random();
+
+ try {
+ Configuration conf = new HdfsConfiguration();
+ conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
+ conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
+ cluster = new MiniDFSCluster(conf, 1, true, null);
+ FileSystem fs = cluster.getFileSystem();
+
+ // create two files with one block each
+ DFSTestUtil util = new DFSTestUtil("testCorruptFilesCorruptedBlock", 2, 1, 512);
+ util.createFiles(fs, "/srcdat10");
+
+ // fetch bad file list from namenode. There should be none.
+ final NameNode namenode = cluster.getNameNode();
+ Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+ .listCorruptFileBlocks("/", null);
+ assertTrue("Namenode has " + badFiles.size()
+ + " corrupt files. Expecting None.", badFiles.size() == 0);
+
+ // Now deliberately corrupt one block
+ File data_dir = new File(System.getProperty("test.build.data"),
+ "dfs/data/data1/current/finalized");
+ assertTrue("data directory does not exist", data_dir.exists());
+ File[] blocks = data_dir.listFiles();
+ assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
+ for (int idx = 0; idx < blocks.length; idx++) {
+ if (blocks[idx].getName().startsWith("blk_") &&
+ blocks[idx].getName().endsWith(".meta")) {
+ //
+ // shorten .meta file
+ //
+ RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
+ FileChannel channel = file.getChannel();
+ long position = channel.size() - 2;
+ int length = 2;
+ byte[] buffer = new byte[length];
+ random.nextBytes(buffer);
+ channel.write(ByteBuffer.wrap(buffer), position);
+ file.close();
+ LOG.info("Deliberately corrupting file " + blocks[idx].getName() +
+ " at offset " + position + " length " + length);
+
+ // read all files to trigger detection of corrupted replica
+ try {
+ util.checkFiles(fs, "/srcdat10");
+ } catch (BlockMissingException e) {
+ System.out.println("Received BlockMissingException as expected.");
+ } catch (IOException e) {
+ assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " +
+ " but received IOException " + e, false);
+ }
+ break;
+ }
+ }
+
+ // fetch bad file list from namenode. There should be one file.
+ badFiles = namenode.listCorruptFileBlocks("/", null);
+ LOG.info("Namenode has bad files. " + badFiles.size());
+ assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.",
+ badFiles.size() == 1);
+ util.cleanup(fs, "/srcdat10");
+ } finally {
+ if (cluster != null) { cluster.shutdown(); }
+ }
+ }
+
+ // deliberately remove blocks from a file and validate the list-corrupt-file-blocks API
+ public void testlistCorruptFileBlocks() throws Exception {
+ Configuration conf = new Configuration();
+ conf.setLong("dfs.blockreport.intervalMsec", 1000);
+ conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans
+ // directories
+ FileSystem fs = null;
+
+ MiniDFSCluster cluster = null;
+ try {
+ cluster = new MiniDFSCluster(conf, 1, true, null);
+ cluster.waitActive();
+ fs = cluster.getFileSystem();
+ DFSTestUtil util = new DFSTestUtil("testGetCorruptFiles", 3, 1, 1024);
+ util.createFiles(fs, "/corruptData");
+
+ final NameNode namenode = cluster.getNameNode();
+ Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = namenode
+ .listCorruptFileBlocks("/corruptData", null);
+ int numCorrupt = corruptFileBlocks.size();
+ assertTrue(numCorrupt == 0);
+ // delete the blocks
+ File baseDir = new File(System.getProperty("test.build.data",
+ "build/test/data"), "dfs/data");
+ for (int i = 0; i < 8; i++) {
+ File data_dir = new File(baseDir, "data" + (i + 1)
+ + MiniDFSCluster.FINALIZED_DIR_NAME);
+ File[] blocks = data_dir.listFiles();
+ if (blocks == null)
+ continue;
+ // assertTrue("Blocks do not exist in data-dir", (blocks != null) &&
+ // (blocks.length > 0));
+ for (int idx = 0; idx < blocks.length; idx++) {
+ if (!blocks[idx].getName().startsWith("blk_")) {
+ continue;
+ }
+ LOG.info("Deliberately removing file " + blocks[idx].getName());
+ assertTrue("Cannot remove file.", blocks[idx].delete());
+ // break;
+ }
+ }
+
+ int count = 0;
+ corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData", null);
+ numCorrupt = corruptFileBlocks.size();
+ while (numCorrupt < 3) {
+ Thread.sleep(1000);
+ corruptFileBlocks = namenode
+ .listCorruptFileBlocks("/corruptData", null);
+ numCorrupt = corruptFileBlocks.size();
+ count++;
+ if (count > 30)
+ break;
+ }
+ // Validate we get all the corrupt files
+ LOG.info("Namenode has bad files. " + numCorrupt);
+ assertTrue(numCorrupt == 3);
+ // test the paging here
+
+ FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks
+ .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
+ // now get the 2nd and 3rd file that is corrupt
+ Collection<FSNamesystem.CorruptFileBlockInfo> nextCorruptFileBlocks = namenode
+ .listCorruptFileBlocks("/corruptData", cfb[0].block.getBlockName());
+ FSNamesystem.CorruptFileBlockInfo[] ncfb = nextCorruptFileBlocks
+ .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
+ numCorrupt = nextCorruptFileBlocks.size();
+ assertTrue(numCorrupt == 2);
+ assertTrue(ncfb[0].block.getBlockName()
+ .equalsIgnoreCase(cfb[1].block.getBlockName()));
+
+ corruptFileBlocks = namenode.listCorruptFileBlocks("/corruptData",
+ ncfb[1].block.getBlockName());
+ numCorrupt = corruptFileBlocks.size();
+ assertTrue(numCorrupt == 0);
+ // Do a listing on a dir which doesn't have any corrupt blocks and
+ // validate
+ util.createFiles(fs, "/goodData");
+ corruptFileBlocks = namenode.listCorruptFileBlocks("/goodData", null);
+ numCorrupt = corruptFileBlocks.size();
+ assertTrue(numCorrupt == 0);
+ util.cleanup(fs, "/corruptData");
+ util.cleanup(fs, "/goodData");
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
+ /** check if NN.listCorruptFiles() returns the right limit */
+ public void testMaxCorruptFiles() throws Exception {
+ MiniDFSCluster cluster = null;
+ try {
+ Configuration conf = new HdfsConfiguration();
+ conf.setInt("dfs.datanode.directoryscan.interval", 15); // datanode scans directories
+ conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
+ cluster = new MiniDFSCluster(conf, 1, true, null);
+ FileSystem fs = cluster.getFileSystem();
+ final int maxCorruptFileBlocks =
+ FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;
+
+ // create 110 files with one block each
+ DFSTestUtil util = new DFSTestUtil("testMaxCorruptFiles",
+ maxCorruptFileBlocks * 3, 1, 512);
+ util.createFiles(fs, "/srcdat2", (short) 1);
+ util.waitReplication(fs, "/srcdat2", (short) 1);
+
+ // verify that there are no bad blocks.
+ final NameNode namenode = cluster.getNameNode();
+ Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode
+ .listCorruptFileBlocks("/srcdat2", null);
+ assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.",
+ badFiles.size() == 0);
+
+ // Now deliberately blocks from all files
+ File baseDir = new File(System.getProperty("test.build.data",
+ "build/test/data"),"dfs/data");
+ for (int i=0; i<8; i++) {
+ File data_dir = new File(baseDir, "data" +(i+1)+ MiniDFSCluster.FINALIZED_DIR_NAME);
+ File[] blocks = data_dir.listFiles();
+ if (blocks == null)
+ continue;
+
+ for (int idx = 0; idx < blocks.length; idx++) {
+ if (!blocks[idx].getName().startsWith("blk_")) {
+ continue;
+ }
+ assertTrue("Cannot remove file.", blocks[idx].delete());
+ }
+ }
+
+ badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+
+ while (badFiles.size() < maxCorruptFileBlocks) {
+ LOG.info("# of corrupt files is: " + badFiles.size());
+ Thread.sleep(10000);
+ badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+ }
+ badFiles = namenode.listCorruptFileBlocks("/srcdat2", null);
+ LOG.info("Namenode has bad files. " + badFiles.size());
+ assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " +
+ maxCorruptFileBlocks + ".",
+ badFiles.size() == maxCorruptFileBlocks);
+ util.cleanup(fs, "/srcdat2");
+ } finally {
+ if (cluster != null) { cluster.shutdown(); }
+ }
+ }
+
+}
Propchange: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp?rev=996213&r1=996212&r2=996213&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp (original)
+++ hadoop/hdfs/trunk/src/webapps/hdfs/corrupt_files.jsp Sat Sep 11 21:53:07 2010
@@ -23,6 +23,7 @@
import="org.apache.hadoop.fs.FileStatus"
import="org.apache.hadoop.fs.FileUtil"
import="org.apache.hadoop.fs.Path"
+ import="java.util.Collection"
import="java.util.Arrays" %>
<%!//for java.io.Serializable
private static final long serialVersionUID = 1L;%>
@@ -32,9 +33,9 @@
String namenodeRole = nn.getRole().toString();
String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":"
+ nn.getNameNodeAddress().getPort();
- FileStatus[] corruptFileStatuses = nn.getCorruptFiles();
- Path[] corruptFilePaths = FileUtil.stat2Paths(corruptFileStatuses);
- int corruptFileCount = corruptFileStatuses.length;
+ Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks =
+ nn.listCorruptFileBlocks("/", null);
+ int corruptFileCount = corruptFileBlocks.size();
%>
<html>
@@ -58,11 +59,10 @@
Please run fsck for a thorough health analysis.
<%
} else {
- Arrays.sort(corruptFilePaths);
- for (Path corruptFilePath : corruptFilePaths) {
- String currentPath = corruptFilePath.toString();
+ for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) {
+ String currentFileBlock = c.toString();
%>
- <%=currentPath%><br>
+ <%=currentFileBlock%><br>
<%
}
%>