You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/06/02 02:42:10 UTC

svn commit: r1345410 - in /hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: CHANGES.txt src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java

Author: atm
Date: Sat Jun  2 00:42:09 2012
New Revision: 1345410

URL: http://svn.apache.org/viewvc?rev=1345410&view=rev
Log:
HDFS-3442. Incorrect count for Missing Replicas in FSCK report. Contributed by Andrew Wang.

Modified:
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1345410&r1=1345409&r2=1345410&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Sat Jun  2 00:42:09 2012
@@ -144,6 +144,8 @@ Release 2.0.1-alpha - UNRELEASED
     HDFS-3487. offlineimageviewer should give byte offset information
     when it encounters an exception. (Colin Patrick McCabe via eli)
 
+    HDFS-3442. Incorrect count for Missing Replicas in FSCK report. (Andrew Wang via atm)
+
 Release 2.0.0-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java?rev=1345410&r1=1345409&r2=1345410&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java Sat Jun  2 00:42:09 2012
@@ -53,6 +53,8 @@ import org.apache.hadoop.net.NetworkTopo
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.security.UserGroupInformation;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * This class provides rudimentary checking of DFS volumes for errors and
  * sub-optimal conditions.
@@ -244,7 +246,8 @@ public class NamenodeFsck {
     out.println();
   }
   
-  private void check(String parent, HdfsFileStatus file, Result res) throws IOException {
+  @VisibleForTesting
+  void check(String parent, HdfsFileStatus file, Result res) throws IOException {
     String path = file.getFullName(parent);
     boolean isOpen = false;
 
@@ -313,6 +316,7 @@ public class NamenodeFsck {
       DatanodeInfo[] locs = lBlk.getLocations();
       res.totalReplicas += locs.length;
       short targetFileReplication = file.getReplication();
+      res.numExpectedReplicas += targetFileReplication;
       if (locs.length > targetFileReplication) {
         res.excessiveReplicas += (locs.length - targetFileReplication);
         res.numOverReplicatedBlocks += 1;
@@ -608,29 +612,31 @@ public class NamenodeFsck {
   /**
    * FsckResult of checking, plus overall DFS statistics.
    */
-  private static class Result {
-    private List<String> missingIds = new ArrayList<String>();
-    private long missingSize = 0L;
-    private long corruptFiles = 0L;
-    private long corruptBlocks = 0L;
-    private long excessiveReplicas = 0L;
-    private long missingReplicas = 0L;
-    private long numOverReplicatedBlocks = 0L;
-    private long numUnderReplicatedBlocks = 0L;
-    private long numMisReplicatedBlocks = 0L;  // blocks that do not satisfy block placement policy
-    private long numMinReplicatedBlocks = 0L;  // minimally replicatedblocks
-    private long totalBlocks = 0L;
-    private long totalOpenFilesBlocks = 0L;
-    private long totalFiles = 0L;
-    private long totalOpenFiles = 0L;
-    private long totalDirs = 0L;
-    private long totalSize = 0L;
-    private long totalOpenFilesSize = 0L;
-    private long totalReplicas = 0L;
+  @VisibleForTesting
+  static class Result {
+    List<String> missingIds = new ArrayList<String>();
+    long missingSize = 0L;
+    long corruptFiles = 0L;
+    long corruptBlocks = 0L;
+    long excessiveReplicas = 0L;
+    long missingReplicas = 0L;
+    long numOverReplicatedBlocks = 0L;
+    long numUnderReplicatedBlocks = 0L;
+    long numMisReplicatedBlocks = 0L;  // blocks that do not satisfy block placement policy
+    long numMinReplicatedBlocks = 0L;  // minimally replicatedblocks
+    long totalBlocks = 0L;
+    long numExpectedReplicas = 0L;
+    long totalOpenFilesBlocks = 0L;
+    long totalFiles = 0L;
+    long totalOpenFiles = 0L;
+    long totalDirs = 0L;
+    long totalSize = 0L;
+    long totalOpenFilesSize = 0L;
+    long totalReplicas = 0L;
 
     final short replication;
     
-    private Result(Configuration conf) {
+    Result(Configuration conf) {
       this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, 
                                             DFSConfigKeys.DFS_REPLICATION_DEFAULT);
     }
@@ -726,7 +732,7 @@ public class NamenodeFsck {
               missingReplicas);
       if (totalReplicas > 0) {
         res.append(" (").append(
-            ((float) (missingReplicas * 100) / (float) totalReplicas)).append(
+            ((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append(
             " %)");
       }
       return res.toString();

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java?rev=1345410&r1=1345409&r2=1345410&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java Sat Jun  2 00:42:09 2012
@@ -18,21 +18,27 @@
 
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.junit.Assert.*;
+
 import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.io.PrintWriter;
 import java.io.RandomAccessFile;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.nio.channels.FileChannel;
 import java.security.PrivilegedExceptionAction;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Random;
 import java.util.regex.Pattern;
 
-import junit.framework.TestCase;
-
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -42,25 +48,30 @@ import org.apache.hadoop.fs.permission.F
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.tools.DFSck;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.log4j.PatternLayout;
 import org.apache.log4j.RollingFileAppender;
+import org.junit.Test;
 
 /**
  * A JUnit test for doing fsck
  */
-public class TestFsck extends TestCase {
+public class TestFsck {
   static final String auditLogFile = System.getProperty("test.build.dir",
       "build/test") + "/audit.log";
   
@@ -79,13 +90,15 @@ public class TestFsck extends TestCase {
     PrintStream out = new PrintStream(bStream, true);
     ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL);
     int errCode = ToolRunner.run(new DFSck(conf, out), path);
-    if (checkErrorCode)
+    if (checkErrorCode) {
       assertEquals(expectedErrCode, errCode);
+    }
     ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO);
     return bStream.toString();
   }
 
   /** do fsck */
+  @Test
   public void testFsck() throws Exception {
     DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
     MiniDFSCluster cluster = null;
@@ -158,6 +171,7 @@ public class TestFsck extends TestCase {
     assertNull("Unexpected event in audit log", reader.readLine());
   }
   
+  @Test
   public void testFsckNonExistent() throws Exception {
     DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
     MiniDFSCluster cluster = null;
@@ -180,6 +194,7 @@ public class TestFsck extends TestCase {
   }
 
   /** Test fsck with permission set on inodes */
+  @Test
   public void testFsckPermission() throws Exception {
     final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024);
     final Configuration conf = new HdfsConfiguration();
@@ -227,6 +242,7 @@ public class TestFsck extends TestCase {
     }
   }
 
+  @Test
   public void testFsckMoveAndDelete() throws Exception {
     final int MAX_MOVE_TRIES = 5;
     DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
@@ -300,6 +316,7 @@ public class TestFsck extends TestCase {
     }
   }
   
+  @Test
   public void testFsckOpenFiles() throws Exception {
     DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024); 
     MiniDFSCluster cluster = null;
@@ -350,6 +367,7 @@ public class TestFsck extends TestCase {
     }
   }
 
+  @Test
   public void testCorruptBlock() throws Exception {
     Configuration conf = new HdfsConfiguration();
     conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
@@ -426,6 +444,7 @@ public class TestFsck extends TestCase {
    * 
    * @throws Exception
    */
+  @Test
   public void testFsckError() throws Exception {
     MiniDFSCluster cluster = null;
     try {
@@ -460,6 +479,7 @@ public class TestFsck extends TestCase {
   }
   
   /** check if option -list-corruptfiles of fsck command works properly */
+  @Test
   public void testFsckListCorruptFilesBlocks() throws Exception {
     Configuration conf = new Configuration();
     conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
@@ -529,6 +549,7 @@ public class TestFsck extends TestCase {
    * Test for checking fsck command on illegal arguments should print the proper
    * usage.
    */
+  @Test
   public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
     MiniDFSCluster cluster = null;
     try {
@@ -560,4 +581,73 @@ public class TestFsck extends TestCase {
       }
     }
   }
+  
+  /**
+   * Tests that the # of missing block replicas and expected replicas is correct
+   * @throws IOException
+   */
+  @Test
+  public void testFsckMissingReplicas() throws IOException {
+    // Desired replication factor
+    // Set this higher than NUM_REPLICAS so it's under-replicated
+    final short REPL_FACTOR = 2;
+    // Number of replicas to actually start
+    final short NUM_REPLICAS = 1;
+    // Number of blocks to write
+    final short NUM_BLOCKS = 3;
+    // Set a small-ish blocksize
+    final long blockSize = 512;
+    
+    Configuration conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    
+    MiniDFSCluster cluster = null;
+    DistributedFileSystem dfs = null;
+    
+    try {
+      // Startup a minicluster
+      cluster = 
+          new MiniDFSCluster.Builder(conf).numDataNodes(NUM_REPLICAS).build();
+      assertNotNull("Failed Cluster Creation", cluster);
+      cluster.waitClusterUp();
+      dfs = (DistributedFileSystem) cluster.getFileSystem();
+      assertNotNull("Failed to get FileSystem", dfs);
+      
+      // Create a file that will be intentionally under-replicated
+      final String pathString = new String("/testfile");
+      final Path path = new Path(pathString);
+      long fileLen = blockSize * NUM_BLOCKS;
+      DFSTestUtil.createFile(dfs, path, fileLen, REPL_FACTOR, 1);
+      
+      // Create an under-replicated file
+      NameNode namenode = cluster.getNameNode();
+      NetworkTopology nettop = cluster.getNamesystem().getBlockManager()
+          .getDatanodeManager().getNetworkTopology();
+      Map<String,String[]> pmap = new HashMap<String, String[]>();
+      Writer result = new StringWriter();
+      PrintWriter out = new PrintWriter(result, true);
+      InetAddress remoteAddress = InetAddress.getLocalHost();
+      NamenodeFsck fsck = new NamenodeFsck(conf, namenode, nettop, pmap, out, 
+          NUM_REPLICAS, (short)1, remoteAddress);
+      
+      // Run the fsck and check the Result
+      final HdfsFileStatus file = 
+          namenode.getRpcServer().getFileInfo(pathString);
+      assertNotNull(file);
+      Result res = new Result(conf);
+      fsck.check(pathString, file, res);
+      // Also print the output from the fsck, for ex post facto sanity checks
+      System.out.println(result.toString());
+      assertEquals(res.missingReplicas, 
+          (NUM_BLOCKS*REPL_FACTOR) - (NUM_BLOCKS*NUM_REPLICAS));
+      assertEquals(res.numExpectedReplicas, NUM_BLOCKS*REPL_FACTOR);
+    } finally {
+      if(dfs != null) {
+        dfs.close();
+      }
+      if(cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
 }