You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2012/11/21 19:10:21 UTC

svn commit: r1412218 - in /hadoop/common/branches/branch-1: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java src/test/org/apache/hadoop/hdfs/server/namenode/TestStartup.java

Author: suresh
Date: Wed Nov 21 18:10:19 2012
New Revision: 1412218

URL: http://svn.apache.org/viewvc?rev=1412218&view=rev
Log:
HDFS-4208. NameNode could be stuck in SafeMode due to never-created blocks. Contributed by Brandon Li.

Modified:
    hadoop/common/branches/branch-1/CHANGES.txt
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStartup.java

Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1412218&r1=1412217&r2=1412218&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Wed Nov 21 18:10:19 2012
@@ -375,6 +375,9 @@ Release 1.1.1 - Unreleased
     HADOOP-8745. Incorrect version numbers in hadoop-core POM.
     (Matthias Friedrich via eli)
 
+    HDFS-4208. NameNode could be stuck in SafeMode due to never-created
+    blocks. (Brandon Li via suresh)
+
 Release 1.1.0 - 2012.09.28
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1412218&r1=1412217&r2=1412218&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Wed Nov 21 18:10:19 2012
@@ -5339,7 +5339,7 @@ public class FSNamesystem implements FSC
   void setBlockTotal() {
     if (safeMode == null)
       return;
-    safeMode.setBlockTotal(blocksMap.size());
+    safeMode.setBlockTotal((int)getSafeBlockCount());
   }
 
   /**
@@ -5350,6 +5350,50 @@ public class FSNamesystem implements FSC
   }
 
   /**
+   * There are times when blocks are allocated by a client but was never used to
+   * write to. This could happen because the response to block allocation
+   * request never made it to the client or the client failed right after block
+   * allocation. In such a case, NameNode might get stuck in safemode waiting
+   * for such blocks to be reported. To handle this, such blocks should not be
+   * counted toward total blocks needed to exit safemode.
+   * <br>
+   * This method returns the total number of blocks excluding the last blocks of
+   * files under construction with length zero.
+   */
+  long getSafeBlockCount() {
+    // Calculate number of blocks excluded by SafeMode
+    long numExculdedBlocks = 0;
+    for (Lease lease : leaseManager.getSortedLeases()) {
+      for (String path : lease.getPaths()) {
+        INode node = dir.getFileINode(path);
+        if (node == null) {
+          LOG.error("Found a lease for nonexisting file: " + path);
+          continue;
+        }
+        if (!node.isUnderConstruction()) {
+          LOG.error("Found a lease for file that is not under construction:"
+              + path);
+          continue;
+        }
+        INodeFileUnderConstruction cons = (INodeFileUnderConstruction) node;
+        BlockInfo[] blocks = cons.getBlocks();
+        if (blocks == null) {
+          continue;
+        }
+        // Exclude the last block of a file under construction with zero length
+        if (blocks[blocks.length - 1].getNumBytes() == 0) {
+          numExculdedBlocks++;
+        }
+      }
+    }
+    LOG.info("Number of blocks excluded by SafeMode: " + numExculdedBlocks
+        + " total blocks: " + getBlocksTotal() + " and thus the safe blocks: "
+        + (getBlocksTotal() - numExculdedBlocks));
+
+    return getBlocksTotal() - numExculdedBlocks;
+  }
+  
+  /**
    * Enter safe mode manually.
    * @throws IOException
    */

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStartup.java?rev=1412218&r1=1412217&r2=1412218&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStartup.java (original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/server/namenode/TestStartup.java Wed Nov 21 18:10:19 2012
@@ -15,12 +15,17 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
 import org.apache.hadoop.util.StringUtils;
+import org.junit.Test;
 
 /**
  * Startup and checkpoint tests
@@ -303,4 +308,28 @@ public class TestStartup extends TestCas
         cluster.shutdown();
     }
   }
+  
+  /** Test SafeMode counts only complete blocks */
+  @Test(timeout=60000)
+  public void testGetBlocks() throws Exception {
+    final Configuration CONF = new Configuration();
+
+    config.set(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "1.0f");
+    MiniDFSCluster cluster = new MiniDFSCluster(CONF, 2, true, null);
+    try {
+      cluster.waitActive();
+
+      // Create a file and add one block, but not write to DataNode
+      DFSClient client = new DFSClient(CONF);
+      client.namenode.create("/tmp1.txt", new FsPermission("755"),
+          "clientName", false, (short) 2, 1024);
+      client.namenode.addBlock("/tmp1.txt", "clientName", new DatanodeInfo[0]);
+
+      // Should not be stuck in safe mode
+      cluster.restartNameNode();
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }