You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2008/10/20 23:29:04 UTC

svn commit: r706417 - in /hadoop/core/trunk: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java

Author: shv
Date: Mon Oct 20 14:29:04 2008
New Revision: 706417

URL: http://svn.apache.org/viewvc?rev=706417&view=rev
Log:
HADOOP-4404. saveFSImage() removes files from a storage directory that do not correspond to its type. Contributed by Konstantin Shvachko.

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=706417&r1=706416&r2=706417&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon Oct 20 14:29:04 2008
@@ -986,6 +986,9 @@
     HADOOP-4464. Separate out TestFileCreationClient from TestFileCreation.
     (Tsz Wo (Nicholas), SZE via cdouglas)
 
+    HADOOP-4404. saveFSImage() removes files from a storage directory that do 
+    not correspond to its type. (shv)
+
 Release 0.18.2 - Unreleased
 
   BUG FIXES

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=706417&r1=706416&r2=706417&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Mon Oct 20 14:29:04 2008
@@ -54,7 +54,6 @@
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLog.EditLogFileInputStream;
-import org.apache.hadoop.hdfs.server.common.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
@@ -723,18 +722,23 @@
     StorageDirectory latestEditsSD = null;
     boolean needToSave = false;
     isUpgradeFinalized = true;
+    Collection<String> imageDirs = new ArrayList<String>();
+    Collection<String> editsDirs = new ArrayList<String>();
     for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
       StorageDirectory sd = it.next();
       if (!sd.getVersionFile().exists()) {
         needToSave |= true;
         continue; // some of them might have just been formatted
       }
-      if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE))
-        assert getImageFile(sd, NameNodeFile.IMAGE).exists() :
-          "Image file must exist.";
-      if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS))
-        assert getImageFile(sd, NameNodeFile.EDITS).exists() :
-          "Edits file must exist.";
+      boolean imageExists = false, editsExists = false;
+      if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
+        imageExists = getImageFile(sd, NameNodeFile.IMAGE).exists();
+        imageDirs.add(sd.getRoot().getCanonicalPath());
+      }
+      if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
+        editsExists = getImageFile(sd, NameNodeFile.EDITS).exists();
+        editsDirs.add(sd.getRoot().getCanonicalPath());
+      }
       
       checkpointTime = readCheckpointTime(sd);
       if ((checkpointTime != Long.MIN_VALUE) && 
@@ -745,12 +749,12 @@
         needToSave |= true;
       }
       if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE) && 
-         (latestNameCheckpointTime < checkpointTime)) {
+         (latestNameCheckpointTime < checkpointTime) && imageExists) {
         latestNameCheckpointTime = checkpointTime;
         latestNameSD = sd;
       }
       if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS) && 
-           (latestEditsCheckpointTime < checkpointTime)) {
+           (latestEditsCheckpointTime < checkpointTime) && editsExists) {
         latestEditsCheckpointTime = checkpointTime;
         latestEditsSD = sd;
       }
@@ -759,11 +763,13 @@
       // set finalized flag
       isUpgradeFinalized = isUpgradeFinalized && !sd.getPreviousDir().exists();
     }
-    assert latestNameSD != null : "Latest image storage directory was " +
-                                  "not determined.";
-    assert latestEditsSD != null : "Latest edits storage directory was " +
-                                   "not determined.";
-    
+
+    // We should have at least one image and one edits dirs
+    if (latestNameSD == null)
+      throw new IOException("Image file is not found in " + imageDirs);
+    if (latestEditsSD == null)
+      throw new IOException("Edits file is not found in " + editsDirs);
+
     // Make sure we are loading image and edits from same checkpoint
     if (latestNameCheckpointTime != latestEditsCheckpointTime)
       throw new IOException("Inconsitent storage detected, " +
@@ -1312,6 +1318,16 @@
     for (Iterator<StorageDirectory> it = 
                            dirIterator(); it.hasNext();) {
       StorageDirectory sd = it.next();
+      // delete old edits if sd is the image only the directory
+      if (!sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
+        File editsFile = getImageFile(sd, NameNodeFile.EDITS);
+        editsFile.delete();
+      }
+      // delete old fsimage if sd is the edits only the directory
+      if (!sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
+        File imageFile = getImageFile(sd, NameNodeFile.IMAGE);
+        imageFile.delete();
+      }
       try {
         sd.write();
       } catch (IOException e) {

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java?rev=706417&r1=706416&r2=706417&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java Mon Oct 20 14:29:04 2008
@@ -19,17 +19,9 @@
 
 import junit.framework.TestCase;
 import java.io.*;
-import java.util.Collection;
-import java.util.List;
-import java.util.Iterator;
 import java.util.Random;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.server.common.Storage;
-import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
-import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode.ErrorSimulator;
-import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
-import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -40,34 +32,55 @@
  * and dfs.name.edits.dir configurations.
  */
 public class TestNameEditsConfigs extends TestCase {
-  static final long seed = 0xDEADBEEFL;
-  static final int blockSize = 4096;
-  static final int fileSize = 8192;
-  static final int numDatanodes = 3;
+  static final long SEED = 0xDEADBEEFL;
+  static final int BLOCK_SIZE = 4096;
+  static final int FILE_SIZE = 8192;
+  static final int NUM_DATA_NODES = 3;
+  static final String FILE_IMAGE = "current/fsimage";
+  static final String FILE_EDITS = "current/edits";
+
   short replication = 3;
+  private File base_dir = new File(
+      System.getProperty("test.build.data", "build/test/data"), "dfs/");
+
+  protected void setUp() throws java.lang.Exception {
+    if(base_dir.exists())
+      tearDown();
+  }
+
+  protected void tearDown() throws java.lang.Exception {
+    if (!FileUtil.fullyDelete(base_dir)) 
+      throw new IOException("Cannot remove directory " + base_dir);
+  }
 
   private void writeFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
     FSDataOutputStream stm = fileSys.create(name, true,
                                             fileSys.getConf().getInt("io.file.buffer.size", 4096),
-                                            (short)repl, (long)blockSize);
-    byte[] buffer = new byte[fileSize];
-    Random rand = new Random(seed);
+                                            (short)repl, (long)BLOCK_SIZE);
+    byte[] buffer = new byte[FILE_SIZE];
+    Random rand = new Random(SEED);
     rand.nextBytes(buffer);
     stm.write(buffer);
     stm.close();
   }
-  
-  
+
+  void checkImageAndEditsFilesExistence(File dir, 
+                                        boolean imageMustExist,
+                                        boolean editsMustExist) {
+    assertTrue(imageMustExist == new File(dir, FILE_IMAGE).exists());
+    assertTrue(editsMustExist == new File(dir, FILE_EDITS).exists());
+  }
+
   private void checkFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
     assertTrue(fileSys.exists(name));
     int replication = fileSys.getFileStatus(name).getReplication();
     assertEquals("replication for " + name, repl, replication);
     long size = fileSys.getContentSummary(name).getLength();
-    assertEquals("file size for " + name, size, (long)fileSize);
+    assertEquals("file size for " + name, size, (long)FILE_SIZE);
   }
-  
+
   private void cleanupFile(FileSystem fileSys, Path name)
     throws IOException {
     assertTrue(fileSys.exists(name));
@@ -101,7 +114,6 @@
     SecondaryNameNode secondary = null;
     Configuration conf = null;
     FileSystem fileSys = null;
-    File base_dir = new File(System.getProperty("test.build.data"), "dfs/");
     File newNameDir = new File(base_dir, "name");
     File newEditsDir = new File(base_dir, "edits");
     File nameAndEdits = new File(base_dir, "name_and_edits");
@@ -117,7 +129,7 @@
     conf.set("fs.checkpoint.edits.dir", checkpointNameAndEdits.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
     // Manage our own dfs directories
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, true, false, true, null,
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, true, false, true, null,
                                   null, null, null);
     cluster.waitActive();
     secondary = startSecondaryNameNode(conf);
@@ -149,7 +161,7 @@
              "," + checkpointNameAndEdits.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
     // Manage our own dfs directories. Do not format.
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true, 
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 
                                   null, null, null, null);
     cluster.waitActive();
     secondary = startSecondaryNameNode(conf);
@@ -167,16 +179,31 @@
       cluster.shutdown();
       secondary.shutdown();
     }
-    
+
+    checkImageAndEditsFilesExistence(nameAndEdits, true, true);
+    checkImageAndEditsFilesExistence(newNameDir, true, false);
+    checkImageAndEditsFilesExistence(newEditsDir, false, true);
+    checkImageAndEditsFilesExistence(checkpointNameAndEdits, true, true);
+    checkImageAndEditsFilesExistence(checkpointNameDir, true, false);
+    checkImageAndEditsFilesExistence(checkpointEditsDir, false, true);
+
     // Now remove common directory both have and start namenode with 
     // separate name and edits dirs
+    new File(nameAndEdits, FILE_EDITS).renameTo(
+        new File(newNameDir, FILE_EDITS));
+    new File(nameAndEdits, FILE_IMAGE).renameTo(
+        new File(newEditsDir, FILE_IMAGE));
+    new File(checkpointNameAndEdits, FILE_EDITS).renameTo(
+        new File(checkpointNameDir, FILE_EDITS));
+    new File(checkpointNameAndEdits, FILE_IMAGE).renameTo(
+        new File(checkpointEditsDir, FILE_IMAGE));
     conf =  new Configuration();
     conf.set("dfs.name.dir", newNameDir.getPath());
     conf.set("dfs.name.edits.dir", newEditsDir.getPath());
     conf.set("fs.checkpoint.dir", checkpointNameDir.getPath());
     conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
                                   null, null, null, null);
     cluster.waitActive();
     secondary = startSecondaryNameNode(conf);
@@ -195,18 +222,30 @@
       cluster.shutdown();
       secondary.shutdown();
     }
-    
+
+    checkImageAndEditsFilesExistence(newNameDir, true, false);
+    checkImageAndEditsFilesExistence(newEditsDir, false, true);
+    checkImageAndEditsFilesExistence(checkpointNameDir, true, false);
+    checkImageAndEditsFilesExistence(checkpointEditsDir, false, true);
+
     // Add old name_and_edits dir. File system should not read image or edits
     // from old dir
+    assertTrue(FileUtil.fullyDelete(new File(nameAndEdits, "current")));
+    assertTrue(FileUtil.fullyDelete(new File(checkpointNameAndEdits, "current")));
     conf = new Configuration();
     conf.set("dfs.name.dir", nameAndEdits.getPath() +
               "," + newNameDir.getPath());
     conf.set("dfs.name.edits.dir", nameAndEdits +
               "," + newEditsDir.getPath());
+    conf.set("fs.checkpoint.dir", checkpointNameDir.getPath() +
+        "," + checkpointNameAndEdits.getPath());
+    conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath() +
+        "," + checkpointNameAndEdits.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
                                   null, null, null, null);
     cluster.waitActive();
+    secondary = startSecondaryNameNode(conf);
     fileSys = cluster.getFileSystem();
 
     try {
@@ -214,25 +253,14 @@
       assertTrue(!fileSys.exists(file2));
       assertTrue(fileSys.exists(file3));
       checkFile(fileSys, file3, replication);
+      secondary.doCheckpoint();
     } finally {
       fileSys.close();
       cluster.shutdown();
+      secondary.shutdown();
     }
-
-    // Cleanup
-    if (!FileUtil.fullyDelete(newNameDir)) 
-      throw new IOException("Cannot remove directory " + newNameDir);
-    if (!FileUtil.fullyDelete(newEditsDir)) 
-      throw new IOException("Cannot remove directory " + newEditsDir);
-    if (!FileUtil.fullyDelete(nameAndEdits))
-      throw new IOException("Cannot remove directory " + nameAndEdits);
-    if (!FileUtil.fullyDelete(checkpointNameDir))
-      throw new IOException("Cannot remove directory " + checkpointNameDir);
-    if (!FileUtil.fullyDelete(checkpointEditsDir))
-      throw new IOException("Cannot remove directory " + checkpointEditsDir);
-    if (!FileUtil.fullyDelete(checkpointNameAndEdits))
-      throw new IOException("Cannot remove directory " + 
-                             checkpointNameAndEdits);
+    checkImageAndEditsFilesExistence(nameAndEdits, true, true);
+    checkImageAndEditsFilesExistence(checkpointNameAndEdits, true, true);
   }
 
   /**
@@ -253,7 +281,6 @@
     MiniDFSCluster cluster = null;
     Configuration conf = null;
     FileSystem fileSys = null;
-    File base_dir = new File(System.getProperty("test.build.data"), "dfs/");
     File newNameDir = new File(base_dir, "name");
     File newEditsDir = new File(base_dir, "edits");
     File nameAndEdits = new File(base_dir, "name_and_edits");
@@ -264,7 +291,7 @@
     conf.set("dfs.name.edits.dir", nameAndEdits.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
     // Manage our own dfs directories
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, true, false, true, null,
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, true, false, true, null,
                                   null, null, null);
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
@@ -289,7 +316,7 @@
               "," + newEditsDir.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
     // Manage our own dfs directories. Do not format.
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true, 
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 
                                   null, null, null, null);
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
@@ -311,7 +338,7 @@
     conf.set("dfs.name.dir", newNameDir.getPath());
     conf.set("dfs.name.edits.dir", newEditsDir.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
-    cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+    cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
                                   null, null, null, null);
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
@@ -335,7 +362,7 @@
     conf.set("dfs.name.edits.dir", nameAndEdits.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
     try {
-      cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+      cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
                                   null, null, null, null);
       assertTrue(false);
     } catch (IOException e) { // expect to fail
@@ -352,7 +379,7 @@
              "," + nameAndEdits.getPath());
     replication = (short)conf.getInt("dfs.replication", 3);
     try {
-      cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+      cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
                                    null, null, null, null);
       assertTrue(false);
     } catch (IOException e) { // expect to fail
@@ -360,13 +387,5 @@
     } finally {
       cluster = null;
     }
-
-    // Cleanup
-    if (!FileUtil.fullyDelete(newNameDir)) 
-      throw new IOException("Cannot remove directory " + newNameDir);
-    if (!FileUtil.fullyDelete(newEditsDir)) 
-      throw new IOException("Cannot remove directory " + newEditsDir);
-    if (!FileUtil.fullyDelete(nameAndEdits))
-      throw new IOException("Cannot remove directory " + nameAndEdits);
   }
 }