You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2011/09/03 00:03:59 UTC

svn commit: r1164736 - in /hadoop/common/branches/branch-0.20-security: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java src/test/org/apache/hadoop/hdfs/TestFileAppend4.java

Author: suresh
Date: Fri Sep  2 22:03:59 2011
New Revision: 1164736

URL: http://svn.apache.org/viewvc?rev=1164736&view=rev
Log:
Port from 0.20-append - HDFS-1141. completeFile does not check lease ownership. Contributed by Todd Lipcon.

Modified:
    hadoop/common/branches/branch-0.20-security/CHANGES.txt
    hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/TestFileAppend4.java

Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1164736&r1=1164735&r2=1164736&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Fri Sep  2 22:03:59 2011
@@ -69,6 +69,9 @@ Release 0.20.205.0 - unreleased
     HDFS-1207. FSNamesystem.stallReplicationWork should be volatile.
     (Todd Lipcon via dhruba)
 
+    HDFS-1141. completeFile does not check lease ownership.
+    (Todd Lipcon via dhruba)
+
   IMPROVEMENTS
 
     MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via

Modified: hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1164736&r1=1164735&r2=1164736&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Sep  2 22:03:59 2011
@@ -1581,24 +1581,19 @@ public class FSNamesystem implements FSC
     NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + src + " for " + holder);
     if (isInSafeMode())
       throw new SafeModeException("Cannot complete file " + src, safeMode);
-    INode iFile = dir.getFileINode(src);
-    INodeFileUnderConstruction pendingFile = null;
-    Block[] fileBlocks = null;
-
-    if (iFile != null && iFile.isUnderConstruction()) {
-      pendingFile = (INodeFileUnderConstruction) iFile;
-      fileBlocks =  dir.getFileBlocks(src);
-    }
-    if (fileBlocks == null ) {    
+
+    INodeFileUnderConstruction pendingFile  = checkLease(src, holder);
+    Block[] fileBlocks =  dir.getFileBlocks(src);
+
+    if (fileBlocks == null ) {
       NameNode.stateChangeLog.warn("DIR* NameSystem.completeFile: "
                                    + "failed to complete " + src
-                                   + " because dir.getFileBlocks() is null " + 
-                                   " and pendingFile is " + 
-                                   ((pendingFile == null) ? "null" : 
-                                     ("from " + pendingFile.getClientMachine()))
-                                  );                      
+                                   + " because dir.getFileBlocks() is null,"
+                                   + " pending from " + pendingFile.getClientMachine());
       return CompleteFileStatus.OPERATION_FAILED;
-    } else if (!checkFileProgress(pendingFile, true)) {
+    }
+
+    if (!checkFileProgress(pendingFile, true)) {
       return CompleteFileStatus.STILL_WAITING;
     }
 

Modified: hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/TestFileAppend4.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/TestFileAppend4.java?rev=1164736&r1=1164735&r2=1164736&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/TestFileAppend4.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/TestFileAppend4.java Fri Sep  2 22:03:59 2011
@@ -691,6 +691,85 @@ public class TestFileAppend4 extends Tes
     }
   }
 
+
+  /**
+   * Test case that stops a writer after finalizing a block but
+   * before calling completeFile, recovers a file from another writer,
+   * starts writing from that writer, and then has the old lease holder
+   * call completeFile
+   */
+  public void testCompleteOtherLeaseHoldersFile() throws Throwable {
+    cluster = new MiniDFSCluster(conf, 3, true, null);
+
+    try {
+      cluster.waitActive();
+      NameNode preSpyNN = cluster.getNameNode();
+      NameNode spyNN = spy(preSpyNN);
+
+      // Delay completeFile
+      DelayAnswer delayer = new DelayAnswer();
+      doAnswer(delayer).when(spyNN).complete(anyString(), anyString());
+
+      DFSClient client = new DFSClient(null, spyNN, conf, null);
+      file1 = new Path("/testRecoverFinalized");
+      final OutputStream stm = client.create("/testRecoverFinalized", true);
+
+      // write 1/2 block
+      AppendTestUtil.write(stm, 0, 4096);
+      final AtomicReference<Throwable> err = new AtomicReference<Throwable>();
+      Thread t = new Thread() { 
+          public void run() {
+            try {
+              stm.close();
+            } catch (Throwable t) {
+              err.set(t);
+            }
+          }};
+      t.start();
+      LOG.info("Waiting for close to get to latch...");
+      delayer.waitForCall();
+
+      // At this point, the block is finalized on the DNs, but the file
+      // has not been completed in the NN.
+      // Lose the leases
+      LOG.info("Killing lease checker");
+      client.leasechecker.interruptAndJoin();
+
+      FileSystem fs1 = cluster.getFileSystem();
+      FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(
+        fs1.getConf());
+
+      LOG.info("Recovering file");
+      recoverFile(fs2);
+
+      LOG.info("Opening file for append from new fs");
+      FSDataOutputStream appenderStream = fs2.append(file1);
+      
+      LOG.info("Writing some data from new appender");
+      AppendTestUtil.write(appenderStream, 0, 4096);
+      
+      LOG.info("Telling old close to proceed.");
+      delayer.proceed();
+      LOG.info("Waiting for close to finish.");
+      t.join();
+      LOG.info("Close finished.");
+
+      // We expect that close will get a "Lease mismatch"
+      // error.
+      Throwable thrownByClose = err.get();
+      assertNotNull(thrownByClose);
+      assertTrue(thrownByClose instanceof IOException);
+      if (!thrownByClose.getMessage().contains(
+            "Lease mismatch"))
+        throw thrownByClose;
+      
+      // The appender should be able to close properly
+      appenderStream.close();
+    } finally {
+      cluster.shutdown();
+    }
+  }  
+  
   /**
    * Test for an intermittent failure of commitBlockSynchronization.
    * This could happen if the DN crashed between calling updateBlocks
@@ -727,6 +806,7 @@ public class TestFileAppend4 extends Tes
     LOG.info("STOP");
   }
 
+  
   /**
    * Test that when a DN starts up with bbws from a file that got
    * removed or finalized when it was down, the block gets deleted.