You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2013/09/05 18:21:26 UTC
svn commit: r1520363 - in
/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: CHANGES.txt
src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
Author: atm
Date: Thu Sep 5 16:21:26 2013
New Revision: 1520363
URL: http://svn.apache.org/r1520363
Log:
HDFS-5159. Secondary NameNode fails to checkpoint if error occurs downloading edits on first checkpoint. Contributed by Aaron T. Myers.
Modified:
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1520363&r1=1520362&r2=1520363&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Thu Sep 5 16:21:26 2013
@@ -415,6 +415,9 @@ Release 2.1.1-beta - UNRELEASED
HDFS-5140. Too many safemode monitor threads being created in the standby
namenode causing it to fail with out of memory error. (jing9)
+ HDFS-5159. Secondary NameNode fails to checkpoint if error occurs
+ downloading edits on first checkpoint. (atm)
+
Release 2.1.0-beta - 2013-08-22
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=1520363&r1=1520362&r2=1520363&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java Thu Sep 5 16:21:26 2013
@@ -429,10 +429,8 @@ public class SecondaryNameNode implement
dstImage.getStorage().cTime = sig.cTime;
// get fsimage
- boolean downloadImage = true;
if (sig.mostRecentCheckpointTxId ==
dstImage.getStorage().getMostRecentCheckpointTxId()) {
- downloadImage = false;
LOG.info("Image has not changed. Will not download image.");
} else {
LOG.info("Image has changed. Downloading updated image from NN.");
@@ -448,7 +446,9 @@ public class SecondaryNameNode implement
nnHostPort, log, dstImage.getStorage());
}
- return Boolean.valueOf(downloadImage);
+ // true if we haven't loaded all the transactions represented by the
+ // downloaded fsimage.
+ return dstImage.getLastAppliedTxId() < sig.mostRecentCheckpointTxId;
}
});
return b.booleanValue();
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1520363&r1=1520362&r2=1520363&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Thu Sep 5 16:21:26 2013
@@ -39,7 +39,6 @@ import java.util.Collection;
import java.util.List;
import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
@@ -1224,7 +1223,6 @@ public class TestCheckpoint {
}
/* Test case to test CheckpointSignature */
- @SuppressWarnings("deprecation")
@Test
public void testCheckpointSignature() throws IOException {
@@ -1562,12 +1560,65 @@ public class TestCheckpoint {
Mockito.reset(faultInjector);
}
}
+
+ /**
+ * Test that a fault while downloading edits the first time after the 2NN
+ * starts up does not prevent future checkpointing.
+ */
+ @Test(timeout = 30000)
+ public void testEditFailureOnFirstCheckpoint() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+ SecondaryNameNode secondary = null;
+ MiniDFSCluster cluster = null;
+ FileSystem fs = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+ .build();
+ cluster.waitActive();
+ fs = cluster.getFileSystem();
+ fs.mkdirs(new Path("test-file-1"));
+
+ // Make sure the on-disk fsimage on the NN has txid > 0.
+ FSNamesystem fsns = cluster.getNamesystem();
+ fsns.enterSafeMode(false);
+ fsns.saveNamespace();
+ fsns.leaveSafeMode();
+
+ secondary = startSecondaryNameNode(conf);
+
+ // Cause edit rename to fail during next checkpoint
+ Mockito.doThrow(new IOException("Injecting failure before edit rename"))
+ .when(faultInjector).beforeEditsRename();
+
+ try {
+ secondary.doCheckpoint();
+ fail("Fault injection failed.");
+ } catch (IOException ioe) {
+ GenericTestUtils.assertExceptionContains(
+ "Injecting failure before edit rename", ioe);
+ }
+ Mockito.reset(faultInjector);
+
+ // Next checkpoint should succeed
+ secondary.doCheckpoint();
+ } finally {
+ if (secondary != null) {
+ secondary.shutdown();
+ }
+ if (fs != null) {
+ fs.close();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ Mockito.reset(faultInjector);
+ }
+ }
/**
* Test that the secondary namenode correctly deletes temporary edits
* on startup.
*/
-
@Test(timeout = 30000)
public void testDeleteTemporaryEditsOnStartup() throws IOException {
Configuration conf = new HdfsConfiguration();
@@ -1943,7 +1994,6 @@ public class TestCheckpoint {
* Test that, if a storage directory is failed when a checkpoint occurs,
* the non-failed storage directory receives the checkpoint.
*/
- @SuppressWarnings("deprecation")
@Test
public void testCheckpointWithFailedStorageDir() throws Exception {
MiniDFSCluster cluster = null;
@@ -2006,7 +2056,6 @@ public class TestCheckpoint {
* should function correctly.
* @throws Exception
*/
- @SuppressWarnings("deprecation")
@Test
public void testCheckpointWithSeparateDirsAfterNameFails() throws Exception {
MiniDFSCluster cluster = null;