You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2008/10/20 23:29:04 UTC
svn commit: r706417 - in /hadoop/core/trunk: CHANGES.txt
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
Author: shv
Date: Mon Oct 20 14:29:04 2008
New Revision: 706417
URL: http://svn.apache.org/viewvc?rev=706417&view=rev
Log:
HADOOP-4404. saveFSImage() removes files from a storage directory that do not correspond to its type. Contributed by Konstantin Shvachko.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=706417&r1=706416&r2=706417&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon Oct 20 14:29:04 2008
@@ -986,6 +986,9 @@
HADOOP-4464. Separate out TestFileCreationClient from TestFileCreation.
(Tsz Wo (Nicholas), SZE via cdouglas)
+ HADOOP-4404. saveFSImage() removes files from a storage directory that do
+ not correspond to its type. (shv)
+
Release 0.18.2 - Unreleased
BUG FIXES
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=706417&r1=706416&r2=706417&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Mon Oct 20 14:29:04 2008
@@ -54,7 +54,6 @@
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.FSEditLog.EditLogFileInputStream;
-import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
@@ -723,18 +722,23 @@
StorageDirectory latestEditsSD = null;
boolean needToSave = false;
isUpgradeFinalized = true;
+ Collection<String> imageDirs = new ArrayList<String>();
+ Collection<String> editsDirs = new ArrayList<String>();
for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
StorageDirectory sd = it.next();
if (!sd.getVersionFile().exists()) {
needToSave |= true;
continue; // some of them might have just been formatted
}
- if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE))
- assert getImageFile(sd, NameNodeFile.IMAGE).exists() :
- "Image file must exist.";
- if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS))
- assert getImageFile(sd, NameNodeFile.EDITS).exists() :
- "Edits file must exist.";
+ boolean imageExists = false, editsExists = false;
+ if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
+ imageExists = getImageFile(sd, NameNodeFile.IMAGE).exists();
+ imageDirs.add(sd.getRoot().getCanonicalPath());
+ }
+ if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
+ editsExists = getImageFile(sd, NameNodeFile.EDITS).exists();
+ editsDirs.add(sd.getRoot().getCanonicalPath());
+ }
checkpointTime = readCheckpointTime(sd);
if ((checkpointTime != Long.MIN_VALUE) &&
@@ -745,12 +749,12 @@
needToSave |= true;
}
if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE) &&
- (latestNameCheckpointTime < checkpointTime)) {
+ (latestNameCheckpointTime < checkpointTime) && imageExists) {
latestNameCheckpointTime = checkpointTime;
latestNameSD = sd;
}
if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS) &&
- (latestEditsCheckpointTime < checkpointTime)) {
+ (latestEditsCheckpointTime < checkpointTime) && editsExists) {
latestEditsCheckpointTime = checkpointTime;
latestEditsSD = sd;
}
@@ -759,11 +763,13 @@
// set finalized flag
isUpgradeFinalized = isUpgradeFinalized && !sd.getPreviousDir().exists();
}
- assert latestNameSD != null : "Latest image storage directory was " +
- "not determined.";
- assert latestEditsSD != null : "Latest edits storage directory was " +
- "not determined.";
-
+
+ // We should have at least one image and one edits dirs
+ if (latestNameSD == null)
+ throw new IOException("Image file is not found in " + imageDirs);
+ if (latestEditsSD == null)
+ throw new IOException("Edits file is not found in " + editsDirs);
+
// Make sure we are loading image and edits from same checkpoint
if (latestNameCheckpointTime != latestEditsCheckpointTime)
throw new IOException("Inconsitent storage detected, " +
@@ -1312,6 +1318,16 @@
for (Iterator<StorageDirectory> it =
dirIterator(); it.hasNext();) {
StorageDirectory sd = it.next();
+ // delete old edits if sd is the image only the directory
+ if (!sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
+ File editsFile = getImageFile(sd, NameNodeFile.EDITS);
+ editsFile.delete();
+ }
+ // delete old fsimage if sd is the edits only the directory
+ if (!sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
+ File imageFile = getImageFile(sd, NameNodeFile.IMAGE);
+ imageFile.delete();
+ }
try {
sd.write();
} catch (IOException e) {
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java?rev=706417&r1=706416&r2=706417&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java Mon Oct 20 14:29:04 2008
@@ -19,17 +19,9 @@
import junit.framework.TestCase;
import java.io.*;
-import java.util.Collection;
-import java.util.List;
-import java.util.Iterator;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.server.common.Storage;
-import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
-import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode.ErrorSimulator;
-import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
-import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
@@ -40,34 +32,55 @@
* and dfs.name.edits.dir configurations.
*/
public class TestNameEditsConfigs extends TestCase {
- static final long seed = 0xDEADBEEFL;
- static final int blockSize = 4096;
- static final int fileSize = 8192;
- static final int numDatanodes = 3;
+ static final long SEED = 0xDEADBEEFL;
+ static final int BLOCK_SIZE = 4096;
+ static final int FILE_SIZE = 8192;
+ static final int NUM_DATA_NODES = 3;
+ static final String FILE_IMAGE = "current/fsimage";
+ static final String FILE_EDITS = "current/edits";
+
short replication = 3;
+ private File base_dir = new File(
+ System.getProperty("test.build.data", "build/test/data"), "dfs/");
+
+ protected void setUp() throws java.lang.Exception {
+ if(base_dir.exists())
+ tearDown();
+ }
+
+ protected void tearDown() throws java.lang.Exception {
+ if (!FileUtil.fullyDelete(base_dir))
+ throw new IOException("Cannot remove directory " + base_dir);
+ }
private void writeFile(FileSystem fileSys, Path name, int repl)
throws IOException {
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
- (short)repl, (long)blockSize);
- byte[] buffer = new byte[fileSize];
- Random rand = new Random(seed);
+ (short)repl, (long)BLOCK_SIZE);
+ byte[] buffer = new byte[FILE_SIZE];
+ Random rand = new Random(SEED);
rand.nextBytes(buffer);
stm.write(buffer);
stm.close();
}
-
-
+
+ void checkImageAndEditsFilesExistence(File dir,
+ boolean imageMustExist,
+ boolean editsMustExist) {
+ assertTrue(imageMustExist == new File(dir, FILE_IMAGE).exists());
+ assertTrue(editsMustExist == new File(dir, FILE_EDITS).exists());
+ }
+
private void checkFile(FileSystem fileSys, Path name, int repl)
throws IOException {
assertTrue(fileSys.exists(name));
int replication = fileSys.getFileStatus(name).getReplication();
assertEquals("replication for " + name, repl, replication);
long size = fileSys.getContentSummary(name).getLength();
- assertEquals("file size for " + name, size, (long)fileSize);
+ assertEquals("file size for " + name, size, (long)FILE_SIZE);
}
-
+
private void cleanupFile(FileSystem fileSys, Path name)
throws IOException {
assertTrue(fileSys.exists(name));
@@ -101,7 +114,6 @@
SecondaryNameNode secondary = null;
Configuration conf = null;
FileSystem fileSys = null;
- File base_dir = new File(System.getProperty("test.build.data"), "dfs/");
File newNameDir = new File(base_dir, "name");
File newEditsDir = new File(base_dir, "edits");
File nameAndEdits = new File(base_dir, "name_and_edits");
@@ -117,7 +129,7 @@
conf.set("fs.checkpoint.edits.dir", checkpointNameAndEdits.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
// Manage our own dfs directories
- cluster = new MiniDFSCluster(0, conf, numDatanodes, true, false, true, null,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, true, false, true, null,
null, null, null);
cluster.waitActive();
secondary = startSecondaryNameNode(conf);
@@ -149,7 +161,7 @@
"," + checkpointNameAndEdits.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
// Manage our own dfs directories. Do not format.
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
cluster.waitActive();
secondary = startSecondaryNameNode(conf);
@@ -167,16 +179,31 @@
cluster.shutdown();
secondary.shutdown();
}
-
+
+ checkImageAndEditsFilesExistence(nameAndEdits, true, true);
+ checkImageAndEditsFilesExistence(newNameDir, true, false);
+ checkImageAndEditsFilesExistence(newEditsDir, false, true);
+ checkImageAndEditsFilesExistence(checkpointNameAndEdits, true, true);
+ checkImageAndEditsFilesExistence(checkpointNameDir, true, false);
+ checkImageAndEditsFilesExistence(checkpointEditsDir, false, true);
+
// Now remove common directory both have and start namenode with
// separate name and edits dirs
+ new File(nameAndEdits, FILE_EDITS).renameTo(
+ new File(newNameDir, FILE_EDITS));
+ new File(nameAndEdits, FILE_IMAGE).renameTo(
+ new File(newEditsDir, FILE_IMAGE));
+ new File(checkpointNameAndEdits, FILE_EDITS).renameTo(
+ new File(checkpointNameDir, FILE_EDITS));
+ new File(checkpointNameAndEdits, FILE_IMAGE).renameTo(
+ new File(checkpointEditsDir, FILE_IMAGE));
conf = new Configuration();
conf.set("dfs.name.dir", newNameDir.getPath());
conf.set("dfs.name.edits.dir", newEditsDir.getPath());
conf.set("fs.checkpoint.dir", checkpointNameDir.getPath());
conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
cluster.waitActive();
secondary = startSecondaryNameNode(conf);
@@ -195,18 +222,30 @@
cluster.shutdown();
secondary.shutdown();
}
-
+
+ checkImageAndEditsFilesExistence(newNameDir, true, false);
+ checkImageAndEditsFilesExistence(newEditsDir, false, true);
+ checkImageAndEditsFilesExistence(checkpointNameDir, true, false);
+ checkImageAndEditsFilesExistence(checkpointEditsDir, false, true);
+
// Add old name_and_edits dir. File system should not read image or edits
// from old dir
+ assertTrue(FileUtil.fullyDelete(new File(nameAndEdits, "current")));
+ assertTrue(FileUtil.fullyDelete(new File(checkpointNameAndEdits, "current")));
conf = new Configuration();
conf.set("dfs.name.dir", nameAndEdits.getPath() +
"," + newNameDir.getPath());
conf.set("dfs.name.edits.dir", nameAndEdits +
"," + newEditsDir.getPath());
+ conf.set("fs.checkpoint.dir", checkpointNameDir.getPath() +
+ "," + checkpointNameAndEdits.getPath());
+ conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath() +
+ "," + checkpointNameAndEdits.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
cluster.waitActive();
+ secondary = startSecondaryNameNode(conf);
fileSys = cluster.getFileSystem();
try {
@@ -214,25 +253,14 @@
assertTrue(!fileSys.exists(file2));
assertTrue(fileSys.exists(file3));
checkFile(fileSys, file3, replication);
+ secondary.doCheckpoint();
} finally {
fileSys.close();
cluster.shutdown();
+ secondary.shutdown();
}
-
- // Cleanup
- if (!FileUtil.fullyDelete(newNameDir))
- throw new IOException("Cannot remove directory " + newNameDir);
- if (!FileUtil.fullyDelete(newEditsDir))
- throw new IOException("Cannot remove directory " + newEditsDir);
- if (!FileUtil.fullyDelete(nameAndEdits))
- throw new IOException("Cannot remove directory " + nameAndEdits);
- if (!FileUtil.fullyDelete(checkpointNameDir))
- throw new IOException("Cannot remove directory " + checkpointNameDir);
- if (!FileUtil.fullyDelete(checkpointEditsDir))
- throw new IOException("Cannot remove directory " + checkpointEditsDir);
- if (!FileUtil.fullyDelete(checkpointNameAndEdits))
- throw new IOException("Cannot remove directory " +
- checkpointNameAndEdits);
+ checkImageAndEditsFilesExistence(nameAndEdits, true, true);
+ checkImageAndEditsFilesExistence(checkpointNameAndEdits, true, true);
}
/**
@@ -253,7 +281,6 @@
MiniDFSCluster cluster = null;
Configuration conf = null;
FileSystem fileSys = null;
- File base_dir = new File(System.getProperty("test.build.data"), "dfs/");
File newNameDir = new File(base_dir, "name");
File newEditsDir = new File(base_dir, "edits");
File nameAndEdits = new File(base_dir, "name_and_edits");
@@ -264,7 +291,7 @@
conf.set("dfs.name.edits.dir", nameAndEdits.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
// Manage our own dfs directories
- cluster = new MiniDFSCluster(0, conf, numDatanodes, true, false, true, null,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, true, false, true, null,
null, null, null);
cluster.waitActive();
fileSys = cluster.getFileSystem();
@@ -289,7 +316,7 @@
"," + newEditsDir.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
// Manage our own dfs directories. Do not format.
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
cluster.waitActive();
fileSys = cluster.getFileSystem();
@@ -311,7 +338,7 @@
conf.set("dfs.name.dir", newNameDir.getPath());
conf.set("dfs.name.edits.dir", newEditsDir.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
cluster.waitActive();
fileSys = cluster.getFileSystem();
@@ -335,7 +362,7 @@
conf.set("dfs.name.edits.dir", nameAndEdits.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
try {
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
assertTrue(false);
} catch (IOException e) { // expect to fail
@@ -352,7 +379,7 @@
"," + nameAndEdits.getPath());
replication = (short)conf.getInt("dfs.replication", 3);
try {
- cluster = new MiniDFSCluster(0, conf, numDatanodes, false, false, true,
+ cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true,
null, null, null, null);
assertTrue(false);
} catch (IOException e) { // expect to fail
@@ -360,13 +387,5 @@
} finally {
cluster = null;
}
-
- // Cleanup
- if (!FileUtil.fullyDelete(newNameDir))
- throw new IOException("Cannot remove directory " + newNameDir);
- if (!FileUtil.fullyDelete(newEditsDir))
- throw new IOException("Cannot remove directory " + newEditsDir);
- if (!FileUtil.fullyDelete(nameAndEdits))
- throw new IOException("Cannot remove directory " + nameAndEdits);
}
}