You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ma...@apache.org on 2012/11/19 08:19:47 UTC

svn commit: r1411087 - in /hadoop/common/branches/branch-1.1: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java

Author: mattf
Date: Mon Nov 19 07:19:46 2012
New Revision: 1411087

URL: http://svn.apache.org/viewvc?rev=1411087&view=rev
Log:
merged r1372708 from branch-1: HDFS-3658. Fix bugs in TestDFSClientRetries and add more tests. Contributed by Tsz Wo Sze.

Modified:
    hadoop/common/branches/branch-1.1/CHANGES.txt
    hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java
    hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java

Modified: hadoop/common/branches/branch-1.1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/CHANGES.txt?rev=1411087&r1=1411086&r2=1411087&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1.1/CHANGES.txt Mon Nov 19 07:19:46 2012
@@ -59,6 +59,8 @@ Release 1.1.1 - 2012.11.18
     MAPREDUCE-4792. Unit Test TestJobTrackerRestartWithLostTracker fails 
     with ant-1.8.4. (Amir Sanjar via mattf)
 
+    HDFS-3658. Fix bugs in TestDFSClientRetries and add more tests.  (szetszwo)
+
 Release 1.1.0 - 2012.09.28
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1411087&r1=1411086&r2=1411087&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-1.1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Nov 19 07:19:46 2012
@@ -1610,6 +1610,9 @@ public class FSNamesystem implements FSC
                                   +src+" for "+clientName);
 
     synchronized (this) {
+      if (isInSafeMode()) {//check safemode first for failing-fast
+        throw new SafeModeException("Cannot add block to " + src, safeMode);
+      }
       // have we exceeded the configured limit of fs objects.
       checkFsObjectLimit();
 
@@ -1627,7 +1630,7 @@ public class FSNamesystem implements FSC
       replication = (int)pendingFile.getReplication();
     }
 
-    // choose targets for the new block tobe allocated.
+    // choose targets for the new block to be allocated.
     DatanodeDescriptor targets[] = replicator.chooseTarget(replication,
                                                            clientNode,
                                                            excludedNodes,
@@ -1640,7 +1643,7 @@ public class FSNamesystem implements FSC
 
     // Allocate a new block and record it in the INode. 
     synchronized (this) {
-      if (isInSafeMode()) {
+      if (isInSafeMode()) { //make sure it is not in safemode again.
         throw new SafeModeException("Cannot add block to " + src, safeMode);
       }
       INode[] pathINodes = dir.getExistingPathINodes(src);
@@ -4879,6 +4882,10 @@ public class FSNamesystem implements FSC
       this.safeReplication = conf.getInt("dfs.replication.min", 1);
       this.blockTotal = 0; 
       this.blockSafe = 0;
+
+      LOG.info("dfs.safemode.threshold.pct          = " + threshold);
+      LOG.info("dfs.namenode.safemode.min.datanodes = " + datanodeThreshold);
+      LOG.info("dfs.safemode.extension              = " + extension);
     }
 
     /**

Modified: hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1411087&r1=1411086&r2=1411087&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java (original)
+++ hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/MiniDFSCluster.java Mon Nov 19 07:19:46 2012
@@ -252,7 +252,8 @@ public class MiniDFSCluster {
     
     int replication = conf.getInt("dfs.replication", 3);
     conf.setInt("dfs.replication", Math.min(replication, numDataNodes));
-    conf.setInt("dfs.safemode.extension", 0);
+    int safemodeExtension = conf.getInt("dfs.safemode.extension.testing", 0);
+    conf.setInt("dfs.safemode.extension", safemodeExtension);
     conf.setInt("dfs.namenode.decommission.interval", 3); // 3 second
 
     // Set a small delay on blockReceived in the minicluster to approximate

Modified: hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java?rev=1411087&r1=1411086&r2=1411087&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java (original)
+++ hadoop/common/branches/branch-1.1/src/test/org/apache/hadoop/hdfs/TestDFSClientRetries.java Mon Nov 19 07:19:46 2012
@@ -31,6 +31,7 @@ import java.net.SocketTimeoutException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Random;
 import java.util.concurrent.TimeUnit;
 
 import junit.framework.Assert;
@@ -42,6 +43,7 @@ import org.apache.commons.logging.impl.L
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -78,6 +80,7 @@ import org.apache.hadoop.security.Access
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
@@ -544,6 +547,8 @@ public class TestDFSClientRetries extend
     final Path dir = new Path("/testNamenodeRestart");
 
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY, true);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
+    conf.setInt("dfs.safemode.extension.testing", 5000);
 
     final short numDatanodes = 3;
     final MiniDFSCluster cluster = new MiniDFSCluster(
@@ -565,11 +570,38 @@ public class TestDFSClientRetries extend
       final FileStatus s1 = fs.getFileStatus(file1);
       assertEquals(length, s1.getLen());
 
+      //create file4, write some data but not close
+      final Path file4 = new Path(dir, "file4"); 
+      final FSDataOutputStream out4 = fs.create(file4, false, 4096,
+          fs.getDefaultReplication(file4), 1024L, null);
+      final byte[] bytes = new byte[1000];
+      new Random().nextBytes(bytes);
+      out4.write(bytes);
+      out4.write(bytes);
+      out4.sync();
+
       //shutdown namenode
       assertTrue(DistributedFileSystem.isHealthy(uri));
       cluster.shutdownNameNode();
       assertFalse(DistributedFileSystem.isHealthy(uri));
 
+      //namenode is down, continue writing file4 in a thread
+      final Thread file4thread = new Thread(new Runnable() {
+        @Override
+        public void run() {
+          try {
+            //write some more data and then close the file
+            out4.write(bytes);
+            out4.write(bytes);
+            out4.write(bytes);
+            out4.close();
+          } catch (Exception e) {
+            exceptions.add(e);
+          }
+        }
+      });
+      file4thread.start();
+
       //namenode is down, read the file in a thread
       final Thread reader = new Thread(new Runnable() {
         @Override
@@ -628,10 +660,26 @@ public class TestDFSClientRetries extend
 
       //check file1 and file3
       thread.join();
+      assertEmpty(exceptions);
       assertEquals(s1.getLen(), fs.getFileStatus(file3).getLen());
       assertEquals(fs.getFileChecksum(file1), fs.getFileChecksum(file3));
 
       reader.join();
+      assertEmpty(exceptions);
+
+      //check file4
+      file4thread.join();
+      assertEmpty(exceptions);
+      {
+        final FSDataInputStream in = fs.open(file4);
+        int count = 0;
+        for(int r; (r = in.read()) != -1; count++) {
+          Assert.assertEquals(String.format("count=%d", count),
+              bytes[count % bytes.length], (byte)r);
+        }
+        Assert.assertEquals(5 * bytes.length, count);
+        in.close();
+      }
 
       //enter safe mode
       assertTrue(DistributedFileSystem.isHealthy(uri));
@@ -671,19 +719,28 @@ public class TestDFSClientRetries extend
         LOG.info("GOOD!", fnfe);
       }
 
-      if (!exceptions.isEmpty()) {
-        LOG.error("There are " + exceptions.size() + " exception(s):");
-        for(int i = 0; i < exceptions.size(); i++) {
-          LOG.error("Exception " + i, exceptions.get(i));
-        }
-        fail();
-      }
+      assertEmpty(exceptions);
     } finally {
       cluster.shutdown();
     }
   }
 
-  public static FileSystem createFsWithDifferentUsername(
+  static void assertEmpty(final List<Exception> exceptions) {
+    if (!exceptions.isEmpty()) {
+      final StringBuilder b = new StringBuilder("There are ")
+        .append(exceptions.size())
+        .append(" exception(s):");
+      for(int i = 0; i < exceptions.size(); i++) {
+        b.append("\n  Exception ")
+         .append(i)
+         .append(": ")
+         .append(StringUtils.stringifyException(exceptions.get(i)));
+      }
+      fail(b.toString());
+    }
+  }
+
+  private static FileSystem createFsWithDifferentUsername(
       final Configuration conf, final boolean isWebHDFS
       ) throws IOException, InterruptedException {
     String username = UserGroupInformation.getCurrentUser().getShortUserName()+"_XXX";