You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 05:08:24 UTC

svn commit: r1077372 - in /hadoop/common/branches/branch-0.20-security-patches: ./ src/hdfs/org/apache/hadoop/hdfs/server/common/ src/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/org/apache/hadoop/hdfs/ src/test/org/apache/hadoop/hdfs/server/n...

Author: omalley
Date: Fri Mar  4 04:08:24 2011
New Revision: 1077372

URL: http://svn.apache.org/viewvc?rev=1077372&view=rev
Log:
commit d18a5b878fae3b80dc781f8dcce37b913b2d7a61
Author: Suresh Srinivas <su...@yahoo-inc.com>
Date:   Tue Apr 6 17:26:10 2010 -0700

    HDFS-955 from https://issues.apache.org/jira/secure/attachment/12440925/saveNamespace-0.20.patch

Added:
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
Modified:
    hadoop/common/branches/branch-0.20-security-patches/CHANGES.txt
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/common/Storage.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/UpgradeUtilities.java

Modified: hadoop/common/branches/branch-0.20-security-patches/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/CHANGES.txt?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security-patches/CHANGES.txt Fri Mar  4 04:08:24 2011
@@ -1,5 +1,14 @@
 Hadoop Change Log
 
+Release 0.20.3 - Unreleased
+
+  IMPROVEMENTS
+
+  BUG FIXES
+
+    HDFS-955. New implementation of saveNamespace() to avoid loss of edits 
+    when name-node fails during saving. (shv)
+
 Release 0.20.2 - Unreleased
 
   BUG FIXES

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/common/Storage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/common/Storage.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/common/Storage.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/common/Storage.java Fri Mar  4 04:08:24 2011
@@ -35,7 +35,6 @@ import org.apache.hadoop.hdfs.protocol.F
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.VersionInfo;
 
 
@@ -295,30 +294,119 @@ public abstract class Storage extends St
         throw new IOException("Cannot create directory " + curDir);
     }
 
+    /**
+     * Directory {@code current} contains latest files defining
+     * the file system meta-data.
+     * 
+     * @return the directory path
+     */
     public File getCurrentDir() {
       return new File(root, STORAGE_DIR_CURRENT);
     }
+
+    /**
+     * File {@code VERSION} contains the following fields:
+     * <ol>
+     * <li>node type</li>
+     * <li>layout version</li>
+     * <li>namespaceID</li>
+     * <li>fs state creation time</li>
+     * <li>other fields specific for this node type</li>
+     * </ol>
+     * The version file is always written last during storage directory updates.
+     * The existence of the version file indicates that all other files have
+     * been successfully written in the storage directory, the storage is valid
+     * and does not need to be recovered.
+     * 
+     * @return the version file path
+     */
     public File getVersionFile() {
       return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
     }
+
+    /**
+     * File {@code VERSION} from the {@code previous} directory.
+     * 
+     * @return the previous version file path
+     */
     public File getPreviousVersionFile() {
       return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
     }
+
+    /**
+     * Directory {@code previous} contains the previous file system state,
+     * which the system can be rolled back to.
+     * 
+     * @return the directory path
+     */
     public File getPreviousDir() {
       return new File(root, STORAGE_DIR_PREVIOUS);
     }
+
+    /**
+     * {@code previous.tmp} is a transient directory, which holds
+     * current file system state while the new state is saved into the new
+     * {@code current} during upgrade.
+     * If the saving succeeds {@code previous.tmp} will be moved to
+     * {@code previous}, otherwise it will be renamed back to 
+     * {@code current} by the recovery procedure during startup.
+     * 
+     * @return the directory path
+     */
     public File getPreviousTmp() {
       return new File(root, STORAGE_TMP_PREVIOUS);
     }
+
+    /**
+     * {@code removed.tmp} is a transient directory, which holds
+     * current file system state while the previous state is moved into
+     * {@code current} during rollback.
+     * If the moving succeeds {@code removed.tmp} will be removed,
+     * otherwise it will be renamed back to 
+     * {@code current} by the recovery procedure during startup.
+     * 
+     * @return the directory path
+     */
     public File getRemovedTmp() {
       return new File(root, STORAGE_TMP_REMOVED);
     }
+
+    /**
+     * {@code finalized.tmp} is a transient directory, which holds
+     * the {@code previous} file system state while it is being removed
+     * in response to the finalize request.
+     * Finalize operation will remove {@code finalized.tmp} when completed,
+     * otherwise the removal will resume upon the system startup.
+     * 
+     * @return the directory path
+     */
     public File getFinalizedTmp() {
       return new File(root, STORAGE_TMP_FINALIZED);
     }
+
+    /**
+     * {@code lastcheckpoint.tmp} is a transient directory, which holds
+     * current file system state while the new state is saved into the new
+     * {@code current} during regular namespace updates.
+     * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
+     * {@code previous.checkpoint}, otherwise it will be renamed back to 
+     * {@code current} by the recovery procedure during startup.
+     * 
+     * @return the directory path
+     */
     public File getLastCheckpointTmp() {
       return new File(root, STORAGE_TMP_LAST_CKPT);
     }
+
+    /**
+     * {@code previous.checkpoint} is a directory, which holds the previous
+     * (before the last save) state of the storage directory.
+     * The directory is created as a reference only, it does not play role
+     * in state recovery procedures, and is recycled automatically, 
+     * but it may be useful for manual recovery of a stale state of the system.
+     * 
+     * @return the directory path
+     */
     public File getPreviousCheckpoint() {
       return new File(root, STORAGE_PREVIOUS_CKPT);
     }
@@ -329,8 +417,9 @@ public abstract class Storage extends St
      * @param startOpt a startup option.
      *  
      * @return state {@link StorageState} of the storage directory 
-     * @throws {@link InconsistentFSStateException} if directory state is not 
-     * consistent and cannot be recovered 
+     * @throws InconsistentFSStateException if directory state is not 
+     * consistent and cannot be recovered.
+     * @throws IOException
      */
     public StorageState analyzeStorage(StartupOption startOpt) throws IOException {
       assert root != null : "root is null";
@@ -529,7 +618,7 @@ public abstract class Storage extends St
         file.close();
         return null;
       } catch(IOException e) {
-        LOG.info(StringUtils.stringifyException(e));
+        LOG.error("Cannot create lock on " + lockF, e);
         file.close();
         throw e;
       }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Fri Mar  4 04:08:24 2011
@@ -92,7 +92,7 @@ class FSDirectory implements FSConstants
     }
     try {
       if (fsImage.recoverTransitionRead(dataDirs, editsDirs, startOpt)) {
-        fsImage.saveFSImage();
+        fsImage.saveNamespace(true);
       }
       FSEditLog editLog = fsImage.getEditLog();
       assert editLog != null : "editLog must be initialized";

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Fri Mar  4 04:08:24 2011
@@ -351,18 +351,6 @@ public class FSEditLog {
   }
 
   /**
-   * Create edits.new if non existent.
-   */
-  synchronized void createNewIfMissing() throws IOException {
- for (Iterator<StorageDirectory> it = 
-       fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
-      File newFile = getEditNewFile(it.next());
-      if (!newFile.exists())
-        createEditLogFile(newFile);
-    }
-  }
-  
-  /**
    * Shutdown the file store.
    */
   public synchronized void close() throws IOException {

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Fri Mar  4 04:08:24 2011
@@ -410,12 +410,7 @@ public class FSImage extends Storage {
       // rename current to tmp
       rename(curDir, tmpDir);
       // save new image
-      if (!curDir.mkdir())
-        throw new IOException("Cannot create directory " + curDir);
-      saveFSImage(getImageFile(sd, NameNodeFile.IMAGE));
-      editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-      // write version and time files
-      sd.write();
+      saveCurrent(sd);
       // rename tmp to previous
       rename(tmpDir, prevDir);
       isUpgradeFinalized = false;
@@ -523,7 +518,7 @@ public class FSImage extends Storage {
     realImage.setStorageInfo(ckptImage);
     fsNamesys.dir.fsImage = realImage;
     // and save it
-    saveFSImage();
+    saveNamespace(false);
   }
 
   void finalizeUpgrade() throws IOException {
@@ -791,9 +786,24 @@ public class FSImage extends Storage {
       throw new IOException("Edits file is not found in " + editsDirs);
 
     // Make sure we are loading image and edits from same checkpoint
-    if (latestNameCheckpointTime != latestEditsCheckpointTime)
-      throw new IOException("Inconsitent storage detected, " +
-                            "name and edits storage do not match");
+    if (latestNameCheckpointTime > latestEditsCheckpointTime
+        && latestNameSD != latestEditsSD
+        && latestNameSD.getStorageDirType() == NameNodeDirType.IMAGE
+        && latestEditsSD.getStorageDirType() == NameNodeDirType.EDITS) {
+      // This is a rare failure when NN has image-only and edits-only
+      // storage directories, and fails right after saving images,
+      // in some of the storage directories, but before purging edits.
+      // See -NOTE- in saveNamespace().
+      LOG.error("This is a rare failure scenario!!!");
+      LOG.error("Image checkpoint time " + latestNameCheckpointTime +
+                " > edits checkpoint time " + latestEditsCheckpointTime);
+      LOG.error("Name-node will treat the image as the latest state of " +
+                "the namespace. Old edits will be discarded.");
+    } else if (latestNameCheckpointTime != latestEditsCheckpointTime)
+      throw new IOException("Inconsistent storage detected, " +
+                      "image and edits checkpoint times do not match. " +
+                      "image checkpoint time = " + latestNameCheckpointTime +
+                      "edits checkpoint time = " + latestEditsCheckpointTime);
     
     // Recover from previous interrrupted checkpoint if any
     needToSave |= recoverInterruptedCheckpoint(latestNameSD, latestEditsSD);
@@ -810,7 +820,11 @@ public class FSImage extends Storage {
         + (FSNamesystem.now() - startTime)/1000 + " seconds.");
     
     // Load latest edits
-    needToSave |= (loadFSEdits(latestEditsSD) > 0);
+    if (latestNameCheckpointTime > latestEditsCheckpointTime)
+      // the image is already current, discard edits
+      needToSave |= true;
+    else // latestNameCheckpointTime == latestEditsCheckpointTime
+      needToSave |= (loadFSEdits(latestEditsSD) > 0);
     
     return needToSave;
   }
@@ -1041,26 +1055,141 @@ public class FSImage extends Storage {
   }
 
   /**
-   * Save the contents of the FS image
-   * and create empty edits.
+   * Save the contents of the FS image and create empty edits.
+   * 
+   * In order to minimize the recovery effort in case of failure during
+   * saveNamespace the algorithm reduces discrepancy between directory states
+   * by performing updates in the following order:
+   * <ol>
+   * <li> rename current to lastcheckpoint.tmp for all of them,</li>
+   * <li> save image and recreate edits for all of them,</li>
+   * <li> rename lastcheckpoint.tmp to previous.checkpoint.</li>
+   * </ol>
+   * On stage (2) we first save all images, then recreate edits.
+   * Otherwise the name-node may purge all edits and fail,
+   * in which case the journal will be lost.
    */
-  public void saveFSImage() throws IOException {
-    editLog.createNewIfMissing();
-    for (Iterator<StorageDirectory> it = 
-                           dirIterator(); it.hasNext();) {
+  void saveNamespace(boolean renewCheckpointTime) throws IOException {
+    editLog.close();
+    if(renewCheckpointTime)
+      this.checkpointTime = FSNamesystem.now();
+
+    // mv current -> lastcheckpoint.tmp
+    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
+      StorageDirectory sd = it.next();
+      try {
+        moveCurrent(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to move current for " + sd.getRoot(), ie);
+        processIOError(sd.getRoot());
+      }
+    }
+
+    // save images into current
+    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
+                                                              it.hasNext();) {
+      StorageDirectory sd = it.next();
+      try {
+        saveCurrent(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to save image for " + sd.getRoot(), ie);
+        processIOError(sd.getRoot());
+      }
+    }
+
+    // -NOTE-
+    // If NN has image-only and edits-only storage directories and fails here 
+    // the image will have the latest namespace state.
+    // During startup the image-only directories will recover by discarding
+    // lastcheckpoint.tmp, while
+    // the edits-only directories will recover by falling back
+    // to the old state contained in their lastcheckpoint.tmp.
+    // The edits directories should be discarded during startup because their
+    // checkpointTime is older than that of image directories.
+
+    // recreate edits in current
+    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.EDITS);
+                                                              it.hasNext();) {
       StorageDirectory sd = it.next();
-      NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
-      if (dirType.isOfType(NameNodeDirType.IMAGE))
-        saveFSImage(getImageFile(sd, NameNodeFile.IMAGE_NEW));
-      if (dirType.isOfType(NameNodeDirType.EDITS)) {    
-        editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-        File editsNew = getImageFile(sd, NameNodeFile.EDITS_NEW);
-        if (editsNew.exists()) 
-          editLog.createEditLogFile(editsNew);
+      try {
+        saveCurrent(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to save edits for " + sd.getRoot(), ie);
+        processIOError(sd.getRoot());
       }
     }
+    // mv lastcheckpoint.tmp -> previous.checkpoint
+    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
+      StorageDirectory sd = it.next();
+      try {
+        moveLastCheckpoint(sd);
+      } catch(IOException ie) {
+        LOG.error("Unable to move last checkpoint for " + sd.getRoot(), ie);
+        processIOError(sd.getRoot());
+      }
+    }
+    if(!editLog.isOpen()) editLog.open();
     ckptState = CheckpointStates.UPLOAD_DONE;
-    rollFSImage();
+  }
+
+  /**
+   * Save current image and empty journal into {@code current} directory.
+   */
+  protected void saveCurrent(StorageDirectory sd) throws IOException {
+    File curDir = sd.getCurrentDir();
+    NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
+    // save new image or new edits
+    if (!curDir.exists() && !curDir.mkdir())
+      throw new IOException("Cannot create directory " + curDir);
+    if (dirType.isOfType(NameNodeDirType.IMAGE))
+      saveFSImage(getImageFile(sd, NameNodeFile.IMAGE));
+    if (dirType.isOfType(NameNodeDirType.EDITS))
+      editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
+    // write version and time files
+    sd.write();
+  }
+
+  /**
+   * Move {@code current} to {@code lastcheckpoint.tmp} and
+   * recreate empty {@code current}.
+   * {@code current} is moved only if it is well formatted,
+   * that is contains VERSION file.
+   * 
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getLastCheckpointTmp()
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getPreviousCheckpoint()
+   */
+  protected void moveCurrent(StorageDirectory sd)
+  throws IOException {
+    File curDir = sd.getCurrentDir();
+    File tmpCkptDir = sd.getLastCheckpointTmp();
+    // mv current -> lastcheckpoint.tmp
+    // only if current is formatted - has VERSION file
+    if(sd.getVersionFile().exists()) {
+      assert curDir.exists() : curDir + " directory must exist.";
+      assert !tmpCkptDir.exists() : tmpCkptDir + " directory must not exist.";
+      rename(curDir, tmpCkptDir);
+    }
+    // recreate current
+    if(!curDir.exists() && !curDir.mkdir())
+      throw new IOException("Cannot create directory " + curDir);
+  }
+
+  /**
+   * Move {@code lastcheckpoint.tmp} to {@code previous.checkpoint}
+   * 
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getPreviousCheckpoint()
+   * @see org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory#getLastCheckpointTmp()
+   */
+  protected void moveLastCheckpoint(StorageDirectory sd)
+  throws IOException {
+    File tmpCkptDir = sd.getLastCheckpointTmp();
+    File prevCkptDir = sd.getPreviousCheckpoint();
+    // remove previous.checkpoint
+    if (prevCkptDir.exists())
+      deleteDir(prevCkptDir);
+    // rename lastcheckpoint.tmp -> previous.checkpoint
+    if(tmpCkptDir.exists())
+      rename(tmpCkptDir, prevCkptDir);
   }
 
   /**
@@ -1090,12 +1219,7 @@ public class FSImage extends Storage {
     sd.clearDirectory(); // create currrent dir
     sd.lock();
     try {
-      NameNodeDirType dirType = (NameNodeDirType)sd.getStorageDirType();
-      if (dirType.isOfType(NameNodeDirType.IMAGE))
-        saveFSImage(getImageFile(sd, NameNodeFile.IMAGE));
-      if (dirType.isOfType(NameNodeDirType.EDITS))
-        editLog.createEditLogFile(getImageFile(sd, NameNodeFile.EDITS));
-      sd.write();
+      saveCurrent(sd);
     } finally {
       sd.unlock();
     }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Mar  4 04:08:24 2011
@@ -3707,7 +3707,7 @@ public class FSNamesystem implements FSC
       throw new IOException("Safe mode should be turned ON " +
                             "in order to create namespace image.");
     }
-    getFSImage().saveFSImage();
+    getFSImage().saveNamespace(true);
     LOG.info("New namespace image has been created.");
   }
 

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java Fri Mar  4 04:08:24 2011
@@ -650,29 +650,13 @@ public class SecondaryNameNode implement
      */
     void startCheckpoint() throws IOException {
       for(StorageDirectory sd : storageDirs) {
-        File curDir = sd.getCurrentDir();
-        File tmpCkptDir = sd.getLastCheckpointTmp();
-        assert !tmpCkptDir.exists() : 
-          tmpCkptDir.getName() + " directory must not exist.";
-        if(curDir.exists()) {
-          // rename current to tmp
-          rename(curDir, tmpCkptDir);
-        }
-        if (!curDir.mkdir())
-          throw new IOException("Cannot create directory " + curDir);
+        moveCurrent(sd);
       }
     }
 
     void endCheckpoint() throws IOException {
       for(StorageDirectory sd : storageDirs) {
-        File tmpCkptDir = sd.getLastCheckpointTmp();
-        File prevCkptDir = sd.getPreviousCheckpoint();
-        // delete previous dir
-        if (prevCkptDir.exists())
-          deleteDir(prevCkptDir);
-        // rename tmp to previous
-        if (tmpCkptDir.exists())
-          rename(tmpCkptDir, prevCkptDir);
+        moveLastCheckpoint(sd);
       }
     }
 
@@ -695,7 +679,7 @@ public class SecondaryNameNode implement
       loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE));
       loadFSEdits(sdEdits);
       sig.validateStorageInfo(this);
-      saveFSImage();
+      saveNamespace(false);
     }
   }
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/TestDFSStorageStateRecovery.java Fri Mar  4 04:08:24 2011
@@ -52,31 +52,42 @@ public class TestDFSStorageStateRecovery
    *  1) previous directory exists
    *  2) previous.tmp directory exists
    *  3) removed.tmp directory exists
-   *  4) node should recover and startup
-   *  5) current directory should exist after recovery but before startup
-   *  6) previous directory should exist after recovery but before startup
+   *  4) lastcheckpoint.tmp directory exists
+   *  5) node should recover and startup
+   *  6) current directory should exist after recovery but before startup
+   *  7) previous directory should exist after recovery but before startup
    */
   static boolean[][] testCases = new boolean[][] {
-    new boolean[] {true,  false, false, false, true,  true,  false}, // 1
-    new boolean[] {true,  true,  false, false, true,  true,  true }, // 2
-    new boolean[] {true,  false, true,  false, true,  true,  true }, // 3
-    new boolean[] {true,  true,  true,  true,  false, false, false }, // 4
-    new boolean[] {true,  true,  true,  false, false, false, false }, // 4
-    new boolean[] {false, true,  true,  true,  false, false, false }, // 4
-    new boolean[] {false, true,  true,  false, false, false, false }, // 4
-    new boolean[] {false, false, false, false, false, false, false }, // 5
-    new boolean[] {false, true,  false, false, false, false, false }, // 6
-    new boolean[] {false, false, true,  false, true,  true,  false}, // 7
-    new boolean[] {true,  false, false, true,  true,  true,  false}, // 8
-    new boolean[] {true,  true,  false, true,  false, false, false }, // 9
-    new boolean[] {true,  true,  true,  true,  false, false, false }, // 10
-    new boolean[] {true,  false, true,  true,  false, false, false }, // 10
-    new boolean[] {false, true,  true,  true,  false, false, false }, // 10
-    new boolean[] {false, false, true,  true,  false, false, false }, // 10
-    new boolean[] {false, false, false, true,  false, false, false }, // 11
-    new boolean[] {false, true,  false, true,  true,  true,  true }, // 12
+    new boolean[] {true,  false, false, false, false, true,  true,  false}, // 1
+    new boolean[] {true,  true,  false, false, false, true,  true,  true }, // 2
+    new boolean[] {true,  false, true,  false, false, true,  true,  true }, // 3
+    new boolean[] {true,  true,  true,  true,  false, false, false, false}, // 4
+    new boolean[] {true,  true,  true,  false, false, false, false, false}, // 4
+    new boolean[] {false, true,  true,  true,  false, false, false, false}, // 4
+    new boolean[] {false, true,  true,  false, false, false, false, false}, // 4
+    new boolean[] {false, false, false, false, false, false, false, false}, // 5
+    new boolean[] {false, true,  false, false, false, false, false, false}, // 6
+    new boolean[] {false, false, true,  false, false, true,  true,  false}, // 7
+    new boolean[] {true,  false, false, true,  false, true,  true,  false}, // 8
+    new boolean[] {true,  true,  false, true,  false, false, false, false}, // 9
+    new boolean[] {true,  true,  true,  true,  false, false, false, false}, // 10
+    new boolean[] {true,  false, true,  true,  false, false, false, false}, // 10
+    new boolean[] {false, true,  true,  true,  false, false, false, false}, // 10
+    new boolean[] {false, false, true,  true,  false, false, false, false}, // 10
+    new boolean[] {false, false, false, true,  false, false, false, false}, // 11
+    new boolean[] {false, true,  false, true,  false, true,  true,  true }, // 12
+    // name-node specific cases
+    new boolean[] {true,  false, false, false, true,  true,  true,  false}, // 13
+    new boolean[] {true,  true,  false, false, true,  true,  true,  false}, // 13
+    new boolean[] {false, false, false, false, true,  true,  true,  false}, // 14
+    new boolean[] {false, true,  false, false, true,  true,  true,  false}, // 14
+    new boolean[] {true,  false, true,  false, true,  false, false, false}, // 15
+    new boolean[] {true,  true,  false, true,  true,  false, false, false}  // 16
   };
-  
+
+  private static final int NUM_NN_TEST_CASES = testCases.length;
+  private static final int NUM_DN_TEST_CASES = 18;
+
   /**
    * Writes an INFO log message containing the parameters. Only
    * the first 4 elements of the state array are included in the message.
@@ -90,7 +101,8 @@ public class TestDFSStorageStateRecovery
              + " current="+state[0]
              + " previous="+state[1]
              + " previous.tmp="+state[2]
-             + " removed.tmp="+state[3]);
+             + " removed.tmp="+state[3]
+             + " lastcheckpoint.tmp="+state[4]);
   }
   
   /**
@@ -122,6 +134,8 @@ public class TestDFSStorageStateRecovery
       UpgradeUtilities.createStorageDirs(nodeType, baseDirs, "previous.tmp");
     if (state[3])  // removed.tmp
       UpgradeUtilities.createStorageDirs(nodeType, baseDirs, "removed.tmp");
+    if (state[4])  // lastcheckpoint.tmp
+      UpgradeUtilities.createStorageDirs(nodeType, baseDirs, "lastcheckpoint.tmp");
     return baseDirs;
   }
  
@@ -172,21 +186,20 @@ public class TestDFSStorageStateRecovery
  
   /**
    * This test iterates over the testCases table and attempts
-   * to startup the NameNode and DataNode normally.
+   * to startup the NameNode normally.
    */
-  public void testStorageStates() throws Exception {
+  public void testNNStorageStates() throws Exception {
     String[] baseDirs;
-    UpgradeUtilities.initialize();
 
     for (int numDirs = 1; numDirs <= 2; numDirs++) {
       conf = new Configuration();
       conf.setInt("dfs.datanode.scan.period.hours", -1);      
       conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
-      for (int i = 0; i < testCases.length; i++) {
+      for (int i = 0; i < NUM_NN_TEST_CASES; i++) {
         boolean[] testCase = testCases[i];
-        boolean shouldRecover = testCase[4];
-        boolean curAfterRecover = testCase[5];
-        boolean prevAfterRecover = testCase[6];
+        boolean shouldRecover = testCase[5];
+        boolean curAfterRecover = testCase[6];
+        boolean prevAfterRecover = testCase[7];
 
         log("NAME_NODE recovery", numDirs, i, testCase);
         baseDirs = createStorageState(NAME_NODE, testCase);
@@ -203,15 +216,37 @@ public class TestDFSStorageStateRecovery
             // check that the message says "not formatted" 
             // when storage directory is empty (case #5)
             if(!testCases[i][0] && !testCases[i][2] 
-                      && !testCases[i][1] && !testCases[i][3]) {
+                  && !testCases[i][1] && !testCases[i][3] && !testCases[i][4]) {
               assertTrue(expected.getLocalizedMessage().contains(
                   "NameNode is not formatted"));
             }
           }
         }
-        
+        cluster.shutdown();
+      } // end testCases loop
+    } // end numDirs loop
+  }
+
+  /**
+   * This test iterates over the testCases table and attempts
+   * to startup the DataNode normally.
+   */
+  public void testDNStorageStates() throws Exception {
+    String[] baseDirs;
+
+    for (int numDirs = 1; numDirs <= 2; numDirs++) {
+      conf = new Configuration();
+      conf.setInt("dfs.datanode.scan.period.hours", -1);      
+      conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
+      for (int i = 0; i < NUM_DN_TEST_CASES; i++) {
+        boolean[] testCase = testCases[i];
+        boolean shouldRecover = testCase[5];
+        boolean curAfterRecover = testCase[6];
+        boolean prevAfterRecover = testCase[7];
+
         log("DATA_NODE recovery", numDirs, i, testCase);
-        createStorageState(NAME_NODE, new boolean[] {true, true, false, false});
+        createStorageState(NAME_NODE,
+                           new boolean[] {true, true, false, false, false});
         cluster = new MiniDFSCluster(conf, 0, StartupOption.REGULAR);
         baseDirs = createStorageState(DATA_NODE, testCase);
         if (!testCase[0] && !testCase[1] && !testCase[2] && !testCase[3]) {
@@ -234,14 +269,21 @@ public class TestDFSStorageStateRecovery
       } // end testCases loop
     } // end numDirs loop
   }
- 
+
+  protected void setUp() throws Exception {
+    LOG.info("Setting up the directory structures.");
+    UpgradeUtilities.initialize();
+  }
+
   protected void tearDown() throws Exception {
     LOG.info("Shutting down MiniDFSCluster");
     if (cluster != null) cluster.shutdown();
   }
   
   public static void main(String[] args) throws Exception {
-    new TestDFSStorageStateRecovery().testStorageStates();
+    TestDFSStorageStateRecovery test = new TestDFSStorageStateRecovery();
+    test.testNNStorageStates();
+    test.testDNStorageStates();
   }
   
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/UpgradeUtilities.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/UpgradeUtilities.java?rev=1077372&r1=1077371&r2=1077372&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/UpgradeUtilities.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/UpgradeUtilities.java Fri Mar  4 04:08:24 2011
@@ -34,13 +34,13 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
+import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 
 import static org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType.NAME_NODE;
 import static org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType.DATA_NODE;
 
-import org.apache.hadoop.hdfs.server.common.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
@@ -116,8 +116,9 @@ public class UpgradeUtilities {
       writeFile(fs, new Path(baseDir, "file2"), buffer, bufferSize);
       
       // save image
-      namenode.getFSImage().saveFSImage();
-      namenode.getFSImage().getEditLog().open();
+      namenode.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      namenode.saveNamespace();
+      namenode.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
       
       // write more files
       writeFile(fs, new Path(baseDir, "file3"), buffer, bufferSize);

Added: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=1077372&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java Fri Mar  4 04:08:24 2011
@@ -0,0 +1,227 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.spy;
+
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+/**
+ * Test various failure scenarios during saveNamespace() operation.
+ * Cases covered:
+ * <ol>
+ * <li>Recover from failure while saving into the second storage directory</li>
+ * <li>Recover from failure while moving current into lastcheckpoint.tmp</li>
+ * <li>Recover from failure while moving lastcheckpoint.tmp into
+ * previous.checkpoint</li>
+ * <li>Recover from failure while rolling edits file</li>
+ * </ol>
+ */
+public class TestSaveNamespace extends TestCase {
+  private static final Log LOG = LogFactory.getLog(TestSaveNamespace.class);
+
+  private static class FaultySaveImage implements Answer<Void> {
+    int count = 0;
+    FSImage origImage;
+
+    public FaultySaveImage(FSImage origImage) {
+      this.origImage = origImage;
+    }
+
+    public Void answer(InvocationOnMock invocation) throws Exception {
+      Object[] args = invocation.getArguments();
+      File f = (File)args[0];
+
+      if (count++ == 1) {
+        LOG.info("Injecting fault for file: " + f);
+        throw new RuntimeException("Injected fault: saveFSImage second time");
+      }
+      LOG.info("Not injecting fault for file: " + f);
+      origImage.saveFSImage(f);
+      return null;
+    }
+  }
+
+  private enum Fault {
+    SAVE_FSIMAGE,
+    MOVE_CURRENT,
+    MOVE_LAST_CHECKPOINT
+  };
+
+  private void saveNamespaceWithInjectedFault(Fault fault) throws IOException {
+    Configuration conf = getConf();
+    NameNode.myMetrics = new NameNodeMetrics(conf, null);
+    NameNode.format(conf);
+    NameNode nn = new NameNode(conf);
+    FSNamesystem fsn = nn.getNamesystem();
+
+    // Replace the FSImage with a spy
+    FSImage originalImage = fsn.dir.fsImage;
+    FSImage spyImage = spy(originalImage);
+    fsn.dir.fsImage = spyImage;
+
+    // inject fault
+    switch(fault) {
+    case SAVE_FSIMAGE:
+      // The spy throws a RuntimeException when writing to the second directory
+      doAnswer(new FaultySaveImage(originalImage)).
+        when(spyImage).saveFSImage((File)anyObject());
+      break;
+    case MOVE_CURRENT:
+      // The spy throws a RuntimeException when calling moveCurrent()
+      doThrow(new RuntimeException("Injected fault: moveCurrent")).
+        when(spyImage).moveCurrent((StorageDirectory)anyObject());
+      break;
+    case MOVE_LAST_CHECKPOINT:
+      // The spy throws a RuntimeException when calling moveLastCheckpoint()
+      doThrow(new RuntimeException("Injected fault: moveLastCheckpoint")).
+        when(spyImage).moveLastCheckpoint((StorageDirectory)anyObject());
+      break;
+    }
+
+    try {
+      doAnEdit(fsn, 1);
+
+      // Save namespace - this will fail because we inject a fault.
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      try {
+        fsn.saveNamespace();
+      } catch (Exception e) {
+        LOG.info("Test caught expected exception", e);
+      }
+
+      // Now shut down and restart the namesystem
+      nn.stop();
+      nn = null;
+
+      // Start a new namesystem, which should be able to recover
+      // the namespace from the previous incarnation.
+      nn = new NameNode(conf);
+      fsn = nn.getNamesystem();
+
+      // Make sure the image loaded including our edit.
+      checkEditExists(fsn, 1);
+    } finally {
+      if (nn != null) {
+        nn.stop();
+      }
+    }
+  }
+
+  // @Test
+  public void testCrashWhileSavingSecondImage() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.SAVE_FSIMAGE);
+  }
+
+  // @Test
+  public void testCrashWhileMoveCurrent() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.MOVE_CURRENT);
+  }
+
+  // @Test
+  public void testCrashWhileMoveLastCheckpoint() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.MOVE_LAST_CHECKPOINT);
+  }
+
+  // @Test
+  public void testSaveWhileEditsRolled() throws Exception {
+    Configuration conf = getConf();
+    NameNode.myMetrics = new NameNodeMetrics(conf, null);
+    NameNode.format(conf);
+    NameNode nn = new NameNode(conf);
+    FSNamesystem fsn = nn.getNamesystem();
+
+    // Replace the FSImage with a spy
+    final FSImage originalImage = fsn.dir.fsImage;
+    FSImage spyImage = spy(originalImage);
+    fsn.dir.fsImage = spyImage;
+
+    try {
+      doAnEdit(fsn, 1);
+      CheckpointSignature sig = fsn.rollEditLog();
+      LOG.warn("Checkpoint signature: " + sig);
+      // Do another edit
+      doAnEdit(fsn, 2);
+
+      // Save namespace
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      fsn.saveNamespace();
+
+      // Now shut down and restart the NN
+      nn.stop();
+      nn = null;
+
+      // Start a new namesystem, which should be able to recover
+      // the namespace from the previous incarnation.
+      nn = new NameNode(conf);
+      fsn = nn.getNamesystem();
+
+      // Make sure the image loaded including our edits.
+      checkEditExists(fsn, 1);
+      checkEditExists(fsn, 2);
+    } finally {
+      if (nn != null) {
+        nn.stop();
+      }
+    }
+  }
+
+  private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
+    // Make an edit
+    fsn.mkdirs(
+      "/test" + id,
+      new PermissionStatus("test", "Test",
+          new FsPermission((short)0777)));
+  }
+
+  private void checkEditExists(FSNamesystem fsn, int id) throws IOException {
+    // Make sure the image loaded including our edit.
+    assertNotNull(fsn.getFileInfo("/test" + id));
+  }
+
+  private Configuration getConf() throws IOException {
+    String baseDir = System.getProperty("test.build.data", "build/test/data/dfs/");
+    String nameDirs = baseDir + "name1" + "," + baseDir + "name2";
+    Configuration conf = new Configuration();
+    FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
+    conf.set("dfs.http.address", "0.0.0.0:0");
+    conf.set("dfs.name.dir", nameDirs);
+    conf.set("dfs.name.edits.dir", nameDirs);
+    conf.set("dfs.secondary.http.address", "0.0.0.0:0");
+    conf.setBoolean("dfs.permissions", false); 
+    return conf;
+  }
+}