You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/02/09 23:23:48 UTC

svn commit: r1242564 - in /hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/

Author: atm
Date: Thu Feb  9 22:23:47 2012
New Revision: 1242564

URL: http://svn.apache.org/viewvc?rev=1242564&view=rev
Log:
HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. Contributed by Bikas Saha.

Modified:
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
    hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt Thu Feb  9 22:23:47 2012
@@ -191,3 +191,4 @@ HDFS-2924. Standby checkpointing fails t
 
 HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race condition. (Bikas Saha via jitendra)
 
+HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Thu Feb  9 22:23:47 2012
@@ -806,6 +806,14 @@ public class FSEditLog  {
   }
   
   /**
+   * Used only by tests.
+   */
+  @VisibleForTesting
+  public JournalSet getJournalSet() {
+    return journalSet;
+  }
+  
+  /**
    * Used only by unit tests.
    */
   @VisibleForTesting

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java Thu Feb  9 22:23:47 2012
@@ -25,8 +25,10 @@ import java.util.SortedSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -35,8 +37,6 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Multimaps;
 import com.google.common.collect.Sets;
 
-import org.apache.hadoop.classification.InterfaceAudience;
-
 /**
  * Manages a collection of Journals. None of the methods are synchronized, it is
  * assumed that FSEditLog methods, that use this class, use proper
@@ -148,11 +148,17 @@ public class JournalSet implements Journ
   
   private List<JournalAndStream> journals = Lists.newArrayList();
   final int minimumRedundantJournals;
+  private volatile Runtime runtime = Runtime.getRuntime();
   
   JournalSet(int minimumRedundantResources) {
     this.minimumRedundantJournals = minimumRedundantResources;
   }
   
+  @VisibleForTesting
+  public void setRuntimeForTesting(Runtime runtime) {
+    this.runtime = runtime;
+  }
+  
   @Override
   public EditLogOutputStream startLogSegment(final long txId) throws IOException {
     mapJournalsAndReportErrors(new JournalClosure() {
@@ -323,6 +329,12 @@ public class JournalSet implements Journ
           // continue on any of the other journals. Abort them to ensure that
           // retry behavior doesn't allow them to keep going in any way.
           abortAllJournals();
+          // the current policy is to shutdown the NN on errors to shared edits
+          // dir. There are many code paths to shared edits failures - syncs,
+          // roll of edits etc. All of them go through this common function 
+          // where the isRequired() check is made. Applying exit policy here 
+          // to catch all code paths.
+          runtime.exit(1);
           throw new IOException(msg);
         } else {
           LOG.error("Error: " + status + " failed for (journal " + jas + ")", t);

Modified: hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java?rev=1242564&r1=1242563&r2=1242564&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java (original)
+++ hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java Thu Feb  9 22:23:47 2012
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 import com.google.common.base.Joiner;
 
@@ -129,7 +130,6 @@ public class TestFailureOfSharedDir {
     
     // The shared edits dir will automatically be marked required.
     MiniDFSCluster cluster = null;
-    int chmodSucceeded = -1;
     File sharedEditsDir = null;
     try {
       cluster = new MiniDFSCluster.Builder(conf)
@@ -145,16 +145,15 @@ public class TestFailureOfSharedDir {
       assertTrue(fs.mkdirs(new Path("/test1")));
       
       // Blow away the shared edits dir.
+      Runtime mockRuntime = Mockito.mock(Runtime.class);
       URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
       sharedEditsDir = new File(sharedEditsUri);
-      chmodSucceeded = FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
-          true);
-      if (chmodSucceeded != 0) {
-        LOG.error("Failed to remove write permissions on shared edits dir:"
-            + sharedEditsDir.getAbsolutePath());
-      }
+      assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
+          true));
 
       NameNode nn0 = cluster.getNameNode(0);
+      nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
+          .setRuntimeForTesting(mockRuntime);
       try {
         // Make sure that subsequent operations on the NN fail.
         nn0.getRpcServer().rollEditLog();
@@ -163,6 +162,12 @@ public class TestFailureOfSharedDir {
         GenericTestUtils.assertExceptionContains(
             "Unable to start log segment 4: too few journals successfully started",
             ioe);
+        // By current policy the NN should exit upon this error.
+        // exit() should be called once, but since it is mocked, exit gets
+        // called once during FSEditsLog.endCurrentLogSegment() and then after
+        // that during FSEditsLog.startLogSegment(). So the check is atLeast(1)
+        Mockito.verify(mockRuntime, Mockito.atLeastOnce()).exit(
+            Mockito.anyInt());
         LOG.info("Got expected exception", ioe);
       }
       
@@ -179,7 +184,7 @@ public class TestFailureOfSharedDir {
             NNStorage.getInProgressEditsFileName(1));
       }
     } finally {
-      if (chmodSucceeded == 0) {
+      if (sharedEditsDir != null) {
         // without this test cleanup will fail
         FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
       }