You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by at...@apache.org on 2011/08/08 23:54:54 UTC
svn commit: r1155121 - in /hadoop/common/branches/branch-0.20-security: ./
src/hdfs/org/apache/hadoop/hdfs/server/namenode/
src/hdfs/org/apache/hadoop/hdfs/util/
src/test/org/apache/hadoop/hdfs/server/namenode/
Author: atm
Date: Mon Aug 8 21:54:54 2011
New Revision: 1155121
URL: http://svn.apache.org/viewvc?rev=1155121&view=rev
Log:
HDFS-2190. NN fails to start if it encounters an empty or malformed fstime file. Contributed by Aaron T. Myers.
Added:
hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java
hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameNodeCorruptionRecovery.java
Modified:
hadoop/common/branches/branch-0.20-security/CHANGES.txt
hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1155121&r1=1155120&r2=1155121&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Mon Aug 8 21:54:54 2011
@@ -33,6 +33,9 @@ Release 0.20.205.0 - unreleased
MAPREDUCE-2489. Jobsplits with random hostnames can make the
queue unusable. (Jeffrey Naisbitt via mahadev)
+ HDFS-2190. NN fails to start if it encounters an empty or malformed fstime
+ file. (atm)
+
IMPROVEMENTS
MAPREDUCE-7343. Make the number of warnings accepted by test-patch
Modified: hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1155121&r1=1155120&r2=1155121&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Mon Aug 8 21:54:54 2011
@@ -60,6 +60,7 @@ import org.apache.hadoop.hdfs.server.com
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.common.UpgradeManager;
+import org.apache.hadoop.hdfs.util.AtomicFileOutputStream;
/**
* FSImage handles checkpointing and logging of the namespace edits.
@@ -235,10 +236,6 @@ public class FSImage extends Storage {
return getFileNames(NameNodeFile.EDITS, NameNodeDirType.EDITS);
}
- File[] getTimeFiles() {
- return getFileNames(NameNodeFile.TIME, null);
- }
-
/**
* Analyze storage directories.
* Recover from previous transitions if required.
@@ -555,6 +552,8 @@ public class FSImage extends Storage {
DataInputStream in = new DataInputStream(new FileInputStream(timeFile));
try {
timeStamp = in.readLong();
+ } catch (IOException e) {
+ LOG.info("Could not read fstime file in storage directory " + sd, e);
} finally {
in.close();
}
@@ -595,9 +594,8 @@ public class FSImage extends Storage {
if (checkpointTime < 0L)
return; // do not write negative time
File timeFile = getImageFile(sd, NameNodeFile.TIME);
- if (timeFile.exists()) { timeFile.delete(); }
DataOutputStream out = new DataOutputStream(
- new FileOutputStream(timeFile));
+ new AtomicFileOutputStream(timeFile));
try {
out.writeLong(checkpointTime);
} finally {
@@ -622,12 +620,13 @@ public class FSImage extends Storage {
writeCheckpointTime(sd);
} catch(IOException e) {
// Close any edits stream associated with this dir and remove directory
- if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS))
- editLog.processIOError(sd);
-
- //add storage to the removed list
- removedStorageDirs.add(sd);
- it.remove();
+ if (sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
+ editLog.processIOError(sd);
+ }
+
+ //add storage to the removed list
+ removedStorageDirs.add(sd);
+ it.remove();
}
}
}
Added: hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java?rev=1155121&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/util/AtomicFileOutputStream.java Mon Aug 8 21:54:54 2011
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.util;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FilterOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.IOUtils;
+
+/**
+ * A FileOutputStream that has the property that it will only show
+ * up at its destination once it has been entirely written and flushed
+ * to disk. While being written, it will use a .tmp suffix.
+ *
+ * When the output stream is closed, it is flushed, fsynced, and
+ * will be moved into place, overwriting any file that already
+ * exists at that location.
+ *
+ * <b>NOTE</b>: on Windows platforms, it will not atomically
+ * replace the target file - instead the target file is deleted
+ * before this one is moved into place.
+ */
+public class AtomicFileOutputStream extends FilterOutputStream {
+
+ private static final String TMP_EXTENSION = ".tmp";
+
+ private final static Log LOG = LogFactory.getLog(
+ AtomicFileOutputStream.class);
+
+ private final File origFile;
+ private final File tmpFile;
+
+ public AtomicFileOutputStream(File f) throws FileNotFoundException {
+ // Code unfortunately must be duplicated below since we can't assign anything
+ // before calling super
+ super(new FileOutputStream(new File(f.getParentFile(), f.getName() + TMP_EXTENSION)));
+ origFile = f.getAbsoluteFile();
+ tmpFile = new File(f.getParentFile(), f.getName() + TMP_EXTENSION).getAbsoluteFile();
+ }
+
+ @Override
+ public void close() throws IOException {
+ boolean triedToClose = false, success = false;
+ try {
+ flush();
+ ((FileOutputStream)out).getChannel().force(true);
+
+ triedToClose = true;
+ super.close();
+ success = true;
+ } finally {
+ if (success) {
+ boolean renamed = tmpFile.renameTo(origFile);
+ if (!renamed) {
+ // On windows, renameTo does not replace.
+ if (!origFile.delete() || !tmpFile.renameTo(origFile)) {
+ throw new IOException("Could not rename temporary file " +
+ tmpFile + " to " + origFile);
+ }
+ }
+ } else {
+ if (!triedToClose) {
+ // If we failed when flushing, try to close it to not leak an FD
+ IOUtils.closeStream(out);
+ }
+ // close wasn't successful, try to delete the tmp file
+ if (!tmpFile.delete()) {
+ LOG.warn("Unable to delete tmp file " + tmpFile);
+ }
+ }
+ }
+ }
+
+}
Added: hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameNodeCorruptionRecovery.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameNodeCorruptionRecovery.java?rev=1155121&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameNodeCorruptionRecovery.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameNodeCorruptionRecovery.java Mon Aug 8 21:54:54 2011
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test the name node's ability to recover from partially corrupted storage
+ * directories.
+ */
+public class TestNameNodeCorruptionRecovery {
+
+ private MiniDFSCluster cluster;
+
+ @Before
+ public void setUpCluster() throws IOException {
+ cluster = new MiniDFSCluster(new Configuration(), 0, true, null);
+ cluster.waitActive();
+ }
+
+ @After
+ public void tearDownCluster() {
+ cluster.shutdown();
+ }
+
+ /**
+ * Test that a corrupted fstime file in a single storage directory does not
+ * prevent the NN from starting up.
+ */
+ @Test
+ public void testFsTimeFileCorrupt() throws IOException, InterruptedException {
+ assertEquals(cluster.getNameDirs().size(), 2);
+ // Get the first fstime file and truncate it.
+ truncateStorageDirFile(cluster, NameNodeFile.TIME, 0);
+ // Make sure we can start up despite the fact the fstime file is corrupted.
+ cluster.restartNameNode();
+ }
+
+ private static void truncateStorageDirFile(MiniDFSCluster cluster,
+ NameNodeFile f, int storageDirIndex) throws IOException {
+ File currentDir = cluster.getNameNode().getFSImage()
+ .getStorageDir(storageDirIndex).getCurrentDir();
+ File nameNodeFile = new File(currentDir, f.getName());
+ assertTrue(nameNodeFile.isFile());
+ assertTrue(nameNodeFile.delete());
+ assertTrue(nameNodeFile.createNewFile());
+ }
+}
\ No newline at end of file