You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/02/09 23:23:48 UTC
svn commit: r1242564 - in
/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/org/apache/hadoop/hdfs/server/namenode/
src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/
Author: atm
Date: Thu Feb 9 22:23:47 2012
New Revision: 1242564
URL: http://svn.apache.org/viewvc?rev=1242564&view=rev
Log:
HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. Contributed by Bikas Saha.
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt Thu Feb 9 22:23:47 2012
@@ -191,3 +191,4 @@ HDFS-2924. Standby checkpointing fails t
HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race condition. (Bikas Saha via jitendra)
+HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Thu Feb 9 22:23:47 2012
@@ -806,6 +806,14 @@ public class FSEditLog {
}
/**
+ * Used only by tests.
+ */
+ @VisibleForTesting
+ public JournalSet getJournalSet() {
+ return journalSet;
+ }
+
+ /**
* Used only by unit tests.
*/
@VisibleForTesting
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java Thu Feb 9 22:23:47 2012
@@ -25,8 +25,10 @@ import java.util.SortedSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@@ -35,8 +37,6 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
-import org.apache.hadoop.classification.InterfaceAudience;
-
/**
* Manages a collection of Journals. None of the methods are synchronized, it is
* assumed that FSEditLog methods, that use this class, use proper
@@ -148,11 +148,17 @@ public class JournalSet implements Journ
private List<JournalAndStream> journals = Lists.newArrayList();
final int minimumRedundantJournals;
+ private volatile Runtime runtime = Runtime.getRuntime();
JournalSet(int minimumRedundantResources) {
this.minimumRedundantJournals = minimumRedundantResources;
}
+ @VisibleForTesting
+ public void setRuntimeForTesting(Runtime runtime) {
+ this.runtime = runtime;
+ }
+
@Override
public EditLogOutputStream startLogSegment(final long txId) throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {
@@ -323,6 +329,12 @@ public class JournalSet implements Journ
// continue on any of the other journals. Abort them to ensure that
// retry behavior doesn't allow them to keep going in any way.
abortAllJournals();
+ // the current policy is to shutdown the NN on errors to shared edits
+ // dir. There are many code paths to shared edits failures - syncs,
+ // roll of edits etc. All of them go through this common function
+ // where the isRequired() check is made. Applying exit policy here
+ // to catch all code paths.
+ runtime.exit(1);
throw new IOException(msg);
} else {
LOG.error("Error: " + status + " failed for (journal " + jas + ")", t);
Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java Thu Feb 9 22:23:47 2012
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.nam
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Test;
+import org.mockito.Mockito;
import com.google.common.base.Joiner;
@@ -129,7 +130,6 @@ public class TestFailureOfSharedDir {
// The shared edits dir will automatically be marked required.
MiniDFSCluster cluster = null;
- int chmodSucceeded = -1;
File sharedEditsDir = null;
try {
cluster = new MiniDFSCluster.Builder(conf)
@@ -145,16 +145,15 @@ public class TestFailureOfSharedDir {
assertTrue(fs.mkdirs(new Path("/test1")));
// Blow away the shared edits dir.
+ Runtime mockRuntime = Mockito.mock(Runtime.class);
URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
sharedEditsDir = new File(sharedEditsUri);
- chmodSucceeded = FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
- true);
- if (chmodSucceeded != 0) {
- LOG.error("Failed to remove write permissions on shared edits dir:"
- + sharedEditsDir.getAbsolutePath());
- }
+ assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
+ true));
NameNode nn0 = cluster.getNameNode(0);
+ nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
+ .setRuntimeForTesting(mockRuntime);
try {
// Make sure that subsequent operations on the NN fail.
nn0.getRpcServer().rollEditLog();
@@ -163,6 +162,12 @@ public class TestFailureOfSharedDir {
GenericTestUtils.assertExceptionContains(
"Unable to start log segment 4: too few journals successfully started",
ioe);
+ // By current policy the NN should exit upon this error.
+ // exit() should be called once, but since it is mocked, exit gets
+ // called once during FSEditsLog.endCurrentLogSegment() and then after
+ // that during FSEditsLog.startLogSegment(). So the check is atLeast(1)
+ Mockito.verify(mockRuntime, Mockito.atLeastOnce()).exit(
+ Mockito.anyInt());
LOG.info("Got expected exception", ioe);
}
@@ -179,7 +184,7 @@ public class TestFailureOfSharedDir {
NNStorage.getInProgressEditsFileName(1));
}
} finally {
- if (chmodSucceeded == 0) {
+ if (sharedEditsDir != null) {
// without this test cleanup will fail
FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
}