You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by at...@apache.org on 2012/04/12 23:24:32 UTC

svn commit: r1325528 - in /hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/

Author: atm
Date: Thu Apr 12 21:24:31 2012
New Revision: 1325528

URL: http://svn.apache.org/viewvc?rev=1325528&view=rev
Log:
HDFS-3259. NameNode#initializeSharedEdits should populate shared edits dir with edit log segments. Contributed by Aaron T. Myers.

Modified:
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1325528&r1=1325527&r2=1325528&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Thu Apr 12 21:24:31 2012
@@ -246,6 +246,9 @@ Release 2.0.0 - UNRELEASED
 
     HDFS-2983. Relax the build version check to permit rolling upgrades within a release. (atm)
 
+    HDFS-3259. NameNode#initializeSharedEdits should populate shared edits dir
+    with edit log segments. (atm)
+
   OPTIMIZATIONS
 
     HDFS-2477. Optimize computing the diff between a block report and the

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1325528&r1=1325527&r2=1325528&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Thu Apr 12 21:24:31 2012
@@ -312,10 +312,12 @@ public class FSEditLog  {
       endCurrentLogSegment(true);
     }
     
-    try {
-      journalSet.close();
-    } catch (IOException ioe) {
-      LOG.warn("Error closing journalSet", ioe);
+    if (!journalSet.isEmpty()) {
+      try {
+        journalSet.close();
+      } catch (IOException ioe) {
+        LOG.warn("Error closing journalSet", ioe);
+      }
     }
 
     state = State.CLOSED;
@@ -814,9 +816,8 @@ public class FSEditLog  {
   }
   
   /**
-   * Used only by unit tests.
+   * Get all the journals this edit log is currently operating on.
    */
-  @VisibleForTesting
   synchronized List<JournalAndStream> getJournals() {
     return journalSet.getAllJournalStreams();
   }

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java?rev=1325528&r1=1325527&r2=1325528&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java Thu Apr 12 21:24:31 2012
@@ -344,7 +344,7 @@ class FileJournalManager implements Jour
     }
   }
 
-  private List<EditLogFile> getLogFiles(long fromTxId) throws IOException {
+  List<EditLogFile> getLogFiles(long fromTxId) throws IOException {
     File currentDir = sd.getCurrentDir();
     List<EditLogFile> allLogFiles = matchEditLogs(currentDir);
     List<EditLogFile> logFiles = Lists.newArrayList();

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1325528&r1=1325527&r2=1325528&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Thu Apr 12 21:24:31 2012
@@ -18,14 +18,17 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.HadoopIllegalArgumentException;
@@ -41,7 +44,6 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Trash;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
-import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -49,6 +51,9 @@ import org.apache.hadoop.hdfs.protocol.C
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
+import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
 import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
 import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
@@ -61,6 +66,8 @@ import org.apache.hadoop.hdfs.server.pro
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.util.AtomicFileOutputStream;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
@@ -749,9 +756,10 @@ public class NameNode {
       boolean force) {
     return initializeSharedEdits(conf, force, false);
   }
-  
+
   /**
-   * Format a new shared edits dir.
+   * Format a new shared edits dir and copy in enough edit log segments so that
+   * the standby NN can start up.
    * 
    * @param conf configuration
    * @param force format regardless of whether or not the shared edits dir exists
@@ -785,8 +793,19 @@ public class NameNode {
           existingStorage.getBlockPoolID(),
           existingStorage.getCTime(),
           existingStorage.getDistributedUpgradeVersion()));
-    } catch (Exception e) {
-      LOG.error("Could not format shared edits dir", e);
+      
+      // Need to make sure the edit log segments are in good shape to initialize
+      // the shared edits dir.
+      fsns.getFSImage().getEditLog().close();
+      fsns.getFSImage().getEditLog().initJournalsForWrite();
+      fsns.getFSImage().getEditLog().recoverUnclosedStreams();
+      
+      if (copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs,
+          newSharedStorage, conf)) {
+        return true; // aborted
+      }
+    } catch (IOException ioe) {
+      LOG.error("Could not initialize shared edits dir", ioe);
       return true; // aborted
     } finally {
       // Have to unlock storage explicitly for the case when we're running in a
@@ -802,6 +821,44 @@ public class NameNode {
     }
     return false; // did not abort
   }
+  
+  private static boolean copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
+      Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
+      Configuration conf) throws FileNotFoundException, IOException {
+    // Copy edit log segments into the new shared edits dir.
+    for (JournalAndStream jas : fsns.getFSImage().getEditLog().getJournals()) {
+      FileJournalManager fjm = null;
+      if (!(jas.getManager() instanceof FileJournalManager)) {
+        LOG.error("Cannot populate shared edits dir from non-file " +
+            "journal manager: " + jas.getManager());
+        return true; // aborted
+      } else {
+        fjm = (FileJournalManager) jas.getManager();
+      }
+      for (EditLogFile elf : fjm.getLogFiles(fsns.getFSImage()
+          .getMostRecentCheckpointTxId())) {
+        File editLogSegment = elf.getFile();
+        for (URI sharedEditsUri : sharedEditsDirs) {
+          StorageDirectory sharedEditsDir = newSharedStorage
+              .getStorageDirectory(sharedEditsUri);
+          File targetFile = new File(sharedEditsDir.getCurrentDir(),
+              editLogSegment.getName());
+          if (!targetFile.exists()) {
+            InputStream in = null;
+            OutputStream out = null;
+            try {
+              in = new FileInputStream(editLogSegment);
+              out = new AtomicFileOutputStream(targetFile);
+              IOUtils.copyBytes(in, out, conf);
+            } finally {
+              IOUtils.cleanup(LOG, in, out);
+            }
+          }
+        }
+      }
+    }
+    return false; // did not abort
+  }
 
   private static boolean finalize(Configuration conf,
                                boolean isConfirmationNeeded

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java?rev=1325528&r1=1325527&r2=1325528&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java Thu Apr 12 21:24:31 2012
@@ -19,17 +19,22 @@ package org.apache.hadoop.hdfs.server.na
 
 import java.io.File;
 import java.io.IOException;
+import java.net.URISyntaxException;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Before;
@@ -48,7 +53,10 @@ public class TestInitializeSharedEdits {
   @Before
   public void setupCluster() throws IOException {
     conf = new Configuration();
-
+    conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    HAUtil.setAllowStandbyReads(conf, true);
+    
     MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology();
     
     cluster = new MiniDFSCluster.Builder(conf)
@@ -56,11 +64,8 @@ public class TestInitializeSharedEdits {
       .numDataNodes(0)
       .build();
     cluster.waitActive();
-  
-    cluster.shutdownNameNode(0);
-    cluster.shutdownNameNode(1);
-    File sharedEditsDir = new File(cluster.getSharedEditsDir(0, 1));
-    assertTrue(FileUtil.fullyDelete(sharedEditsDir));
+
+    shutdownClusterAndRemoveSharedEditsDir();
   }
   
   @After
@@ -70,8 +75,14 @@ public class TestInitializeSharedEdits {
     }
   }
   
-  @Test
-  public void testInitializeSharedEdits() throws Exception {
+  private void shutdownClusterAndRemoveSharedEditsDir() throws IOException {
+    cluster.shutdownNameNode(0);
+    cluster.shutdownNameNode(1);
+    File sharedEditsDir = new File(cluster.getSharedEditsDir(0, 1));
+    assertTrue(FileUtil.fullyDelete(sharedEditsDir));
+  }
+  
+  private void assertCannotStartNameNodes() {
     // Make sure we can't currently start either NN.
     try {
       cluster.restartNameNode(0, false);
@@ -89,24 +100,27 @@ public class TestInitializeSharedEdits {
       GenericTestUtils.assertExceptionContains(
           "Cannot start an HA namenode with name dirs that need recovery", ioe);
     }
-    
-    // Initialize the shared edits dir.
-    assertFalse(NameNode.initializeSharedEdits(conf));
-    
+  }
+  
+  private void assertCanStartHaNameNodes(String pathSuffix)
+      throws ServiceFailedException, IOException, URISyntaxException,
+      InterruptedException {
     // Now should be able to start both NNs. Pass "false" here so that we don't
     // try to waitActive on all NNs, since the second NN doesn't exist yet.
     cluster.restartNameNode(0, false);
     cluster.restartNameNode(1, true);
     
     // Make sure HA is working.
-    cluster.transitionToActive(0);
+    cluster.getNameNode(0).getRpcServer().transitionToActive();
     FileSystem fs = null;
     try {
+      Path newPath = new Path(TEST_PATH, pathSuffix);
       fs = HATestUtil.configureFailoverFs(cluster, conf);
-      assertTrue(fs.mkdirs(TEST_PATH));
-      cluster.transitionToStandby(0);
-      cluster.transitionToActive(1);
-      assertTrue(fs.isDirectory(TEST_PATH));
+      assertTrue(fs.mkdirs(newPath));
+      HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
+          cluster.getNameNode(1));
+      assertTrue(NameNodeAdapter.getFileInfo(cluster.getNameNode(1),
+          newPath.toString(), false).isDir());
     } finally {
       if (fs != null) {
         fs.close();
@@ -115,6 +129,29 @@ public class TestInitializeSharedEdits {
   }
   
   @Test
+  public void testInitializeSharedEdits() throws Exception {
+    assertCannotStartNameNodes();
+    
+    // Initialize the shared edits dir.
+    assertFalse(NameNode.initializeSharedEdits(cluster.getConfiguration(0)));
+    
+    assertCanStartHaNameNodes("1");
+    
+    // Now that we've done a metadata operation, make sure that deleting and
+    // re-initializing the shared edits dir will let the standby still start.
+    
+    shutdownClusterAndRemoveSharedEditsDir();
+    
+    assertCannotStartNameNodes();
+    
+    // Re-initialize the shared edits dir.
+    assertFalse(NameNode.initializeSharedEdits(cluster.getConfiguration(0)));
+    
+    // Should *still* be able to start both NNs
+    assertCanStartHaNameNodes("2");
+  }
+  
+  @Test
   public void testDontOverWriteExistingDir() {
     assertFalse(NameNode.initializeSharedEdits(conf, false));
     assertTrue(NameNode.initializeSharedEdits(conf, false));