You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2011/06/24 23:57:50 UTC

svn commit: r1139453 - in /hadoop/common/branches/HDFS-1073/hdfs: ./ src/java/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/test/

Author: todd
Date: Fri Jun 24 21:57:49 2011
New Revision: 1139453

URL: http://svn.apache.org/viewvc?rev=1139453&view=rev
Log:
HDFS-2078. NameNode should not clear directory when restoring removed storage. Contributed by Todd Lipcon.

Modified:
    hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
    hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
    hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java

Modified: hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt Fri Jun 24 21:57:49 2011
@@ -57,3 +57,6 @@ HDFS-2026. SecondaryNameNode should prop
            NameNode is reformatted. (todd)
 HDFS-2077. Address checkpoint upload when one of the storage dirs is failed
            (todd)
+HDFS-2078. NameNode should not clear directory when restoring removed storage.
+           (todd)
+           

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java Fri Jun 24 21:57:49 2011
@@ -242,20 +242,10 @@ public class NNStorage extends Storage i
         LOG.info("currently disabled dir " + root.getAbsolutePath() +
                  "; type="+sd.getStorageDirType() 
                  + ";canwrite="+root.canWrite());
-        try {
-          
-          if(root.exists() && root.canWrite()) {
-            // when we try to restore we just need to remove all the data
-            // without saving current in-memory state (which could've changed).
-            // TODO does this still make sense with 1073?
-            sd.clearDirectory();
-            
-            LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
-            this.addStorageDir(sd); // restore
-            this.removedStorageDirs.remove(sd);
-          }
-        } catch(IOException e) {
-          LOG.warn("failed to restore " + sd.getRoot().getAbsolutePath(), e);
+        if(root.exists() && root.canWrite()) {
+          LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
+          this.addStorageDir(sd); // restore
+          this.removedStorageDirs.remove(sd);
         }
       }
     }

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Fri Jun 24 21:57:49 2011
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
 import junit.framework.TestCase;
 import java.net.InetSocketAddress;
 import java.io.File;
@@ -45,6 +46,7 @@ import org.apache.hadoop.hdfs.MiniDFSClu
 import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@@ -1440,6 +1442,83 @@ public class TestCheckpoint extends Test
       }
     }
   }
+  
+  /**
+   * Test case where the NN is configured with a name-only and an edits-only
+   * dir, with storage-restore turned on. In this case, if the name-only dir
+   * disappears and comes back, a new checkpoint after it has been restored
+   * should function correctly.
+   * @throws Exception
+   */
+  @SuppressWarnings("deprecation")
+  public void testCheckpointWithSeparateDirsAfterNameFails() throws Exception {
+    MiniDFSCluster cluster = null;
+    SecondaryNameNode secondary = null;
+    File currentDir = null;
+    
+    Configuration conf = new HdfsConfiguration();
+
+    File base_dir = new File(MiniDFSCluster.getBaseDirectory());
+    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_KEY, true);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
+        MiniDFSCluster.getBaseDirectory() + "/name-only");
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
+        MiniDFSCluster.getBaseDirectory() + "/edits-only");
+    conf.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,
+        fileAsURI(new File(base_dir, "namesecondary1")).toString());
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .format(true)
+          .manageNameDfsDirs(false)
+          .build();
+  
+      secondary = startSecondaryNameNode(conf);
+
+      // Checkpoint once
+      secondary.doCheckpoint();
+
+      // Now primary NN experiences failure of its only name dir -- fake by
+      // setting its current dir to a-x permissions
+      NameNode nn = cluster.getNameNode();
+      NNStorage storage = nn.getFSImage().getStorage();
+      StorageDirectory sd0 = storage.getStorageDir(0);
+      assertEquals(NameNodeDirType.IMAGE, sd0.getStorageDirType());
+      currentDir = sd0.getCurrentDir();
+      currentDir.setExecutable(false);
+
+      // Try to upload checkpoint -- this should fail since there are no
+      // valid storage dirs
+      try {
+        secondary.doCheckpoint();
+        fail("Did not fail to checkpoint when there are no valid storage dirs");
+      } catch (IOException ioe) {
+        GenericTestUtils.assertExceptionContains(
+            "Unable to download to any storage dir", ioe);
+      }
+      
+      // Restore the good dir
+      currentDir.setExecutable(true);
+      nn.restoreFailedStorage("true");
+      nn.rollEditLog();
+
+      // Checkpoint again -- this should upload to the restored name dir
+      secondary.doCheckpoint();
+      
+      assertNNHasCheckpoints(cluster, ImmutableList.of(8));
+      assertParallelFilesInvariant(cluster, ImmutableList.of(secondary));
+    } finally {
+      if (currentDir != null) {
+        currentDir.setExecutable(true);
+      }
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
 
   @SuppressWarnings("deprecation")
   private void cleanup(SecondaryNameNode snn) {

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java Fri Jun 24 21:57:49 2011
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
@@ -34,6 +36,8 @@ import org.mockito.invocation.Invocation
 import org.mockito.stubbing.Answer;
 
 import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
 
@@ -46,6 +50,7 @@ public class TestNNStorageArchivalManage
   @Test
   public void testArchiveEasyCase() throws IOException {
     TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
     tc.addImage("/foo1/current/fsimage_100", true);
     tc.addImage("/foo1/current/fsimage_200", true);
     tc.addImage("/foo1/current/fsimage_300", false);
@@ -66,6 +71,8 @@ public class TestNNStorageArchivalManage
   @Test
   public void testArchiveMultipleDirs() throws IOException {
     TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
+    tc.addRoot("/foo2", NameNodeDirType.IMAGE_AND_EDITS);
     tc.addImage("/foo1/current/fsimage_100", true);
     tc.addImage("/foo1/current/fsimage_200", true);
     tc.addImage("/foo2/current/fsimage_200", true);
@@ -87,6 +94,7 @@ public class TestNNStorageArchivalManage
   @Test
   public void testArchiveLessThanRetention() throws IOException {
     TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
     tc.addImage("/foo1/current/fsimage_100", false);
     tc.addLog("/foo1/current/edits_101-200", false);
     tc.addLog("/foo1/current/edits_201-300", false);
@@ -101,6 +109,7 @@ public class TestNNStorageArchivalManage
   @Test
   public void testNoLogs() throws IOException {
     TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
     tc.addImage("/foo1/current/fsimage_100", true);
     tc.addImage("/foo1/current/fsimage_200", true);
     tc.addImage("/foo1/current/fsimage_300", false);
@@ -114,6 +123,7 @@ public class TestNNStorageArchivalManage
   @Test
   public void testEmptyDir() throws IOException {
     TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
     runTest(tc);
   }
 
@@ -123,6 +133,7 @@ public class TestNNStorageArchivalManage
   @Test
   public void testOldInProgress() throws IOException {
     TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
     tc.addImage("/foo1/current/fsimage_100", true);
     tc.addImage("/foo1/current/fsimage_200", true);
     tc.addImage("/foo1/current/fsimage_300", false);
@@ -130,7 +141,23 @@ public class TestNNStorageArchivalManage
     tc.addLog("/foo1/current/edits_inprogress_101", true);
     runTest(tc);
   }
-    
+
+  @Test
+  public void testSeparateEditDirs() throws IOException {
+    TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE);
+    tc.addRoot("/foo2", NameNodeDirType.EDITS);
+    tc.addImage("/foo1/current/fsimage_100", true);
+    tc.addImage("/foo1/current/fsimage_200", true);
+    tc.addImage("/foo1/current/fsimage_300", false);
+    tc.addImage("/foo1/current/fsimage_400", false);
+    tc.addLog("/foo2/current/edits_101-200", true);
+    tc.addLog("/foo2/current/edits_201-300", true);
+    tc.addLog("/foo2/current/edits_301-400", false);
+    tc.addLog("/foo2/current/edits_inprogress_401", false);
+    runTest(tc);    
+  }
+  
   private void runTest(TestCaseDescription tc) throws IOException {
     Configuration conf = new Configuration();
 
@@ -169,33 +196,55 @@ public class TestNNStorageArchivalManage
   }
   
   private static class TestCaseDescription {
-    private Set<String> files = Sets.newHashSet();
+    private Map<String, FakeRoot> dirRoots = Maps.newHashMap();
     private Set<String> expectedArchivedLogs = Sets.newHashSet();
     private Set<String> expectedArchivedImages = Sets.newHashSet();
+    
+    private static class FakeRoot {
+      NameNodeDirType type;
+      List<String> files;
+      
+      FakeRoot(NameNodeDirType type) {
+        this.type = type;
+        files = Lists.newArrayList();
+      }
+    }
 
+    void addRoot(String root, NameNodeDirType dir) {
+      dirRoots.put(root, new FakeRoot(dir));
+    }
+
+    private void addFile(String path) {
+      for (Map.Entry<String, FakeRoot> entry : dirRoots.entrySet()) {
+        if (path.startsWith(entry.getKey())) {
+          entry.getValue().files.add(path);
+        }
+      }
+    }
+    
     void addLog(String path, boolean expectArchive) {
-      files.add(path);
+      addFile(path);
       if (expectArchive) {
         expectedArchivedLogs.add(path);
       }
     }
     
-    private String[] getPaths() {
-      return files.toArray(new String[0]);
-    }
-    
     void addImage(String path, boolean expectArchive) {
-      files.add(path);
+      addFile(path);
       if (expectArchive) {
         expectedArchivedImages.add(path);
       }
     }
     
     NNStorage mockStorage() throws IOException {
-      String[] paths = getPaths();
-      StorageDirectory mockDir = TestFSImageStorageInspector.mockDirectory(
-            NameNodeDirType.IMAGE_AND_EDITS, false, paths);
-      return mockStorageForDirs(mockDir);
+      List<StorageDirectory> sds = Lists.newArrayList();
+      for (FakeRoot root : dirRoots.values()) {
+        StorageDirectory mockDir = TestFSImageStorageInspector.mockDirectory(
+            root.type, false,
+            root.files.toArray(new String[0]));
+        sds.add(mockDir);
+      }
+      return mockStorageForDirs(sds.toArray(new StorageDirectory[0]));
     }
   }
 

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java Fri Jun 24 21:57:49 2011
@@ -219,9 +219,10 @@ public class TestSaveNamespace {
     FSImage spyImage = spy(originalImage);
     fsn.dir.fsImage = spyImage;
     
-    File currentDir = storage.getStorageDir(0).getCurrentDir();
-    currentDir.setExecutable(false);
-    currentDir.setReadable(false);
+    File rootDir = storage.getStorageDir(0).getRoot();
+    rootDir.setExecutable(false);
+    rootDir.setWritable(false);
+    rootDir.setReadable(false);
 
     try {
       doAnEdit(fsn, 1);
@@ -238,8 +239,9 @@ public class TestSaveNamespace {
                  " bad directories.", 
                    storage.getRemovedStorageDirs().size() == 1);
 
-      currentDir.setExecutable(true);
-      currentDir.setReadable(true);
+      rootDir.setExecutable(true);
+      rootDir.setWritable(true);
+      rootDir.setReadable(true);
 
       // The next call to savenamespace should try inserting the
       // erroneous directory back to fs.name.dir. This command should
@@ -269,9 +271,10 @@ public class TestSaveNamespace {
       checkEditExists(fsn, 1);
       LOG.info("Reloaded image is good.");
     } finally {
-      if (currentDir.exists()) {
-        currentDir.setExecutable(true);
-        currentDir.setReadable(true);
+      if (rootDir.exists()) {
+        rootDir.setExecutable(true);
+        rootDir.setWritable(true);
+        rootDir.setReadable(true);
       }
 
       if (fsn != null) {

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java Fri Jun 24 21:57:49 2011
@@ -226,6 +226,13 @@ public class TestStorageRestore extends 
     String md5BeforeEdit = FSImageTestUtil.getFileMD5(
         new File(path1, "current/edits_inprogress_5"));
     
+    // The original image should still be the previously failed image
+    // directory after it got restored, since it's still useful for
+    // a recovery!
+    FSImageTestUtil.assertFileContentsSame(
+            new File(path1, "current/fsimage_0"),
+            new File(path2, "current/fsimage_0"));
+    
     // Do another edit to verify that all the logs are active.
     path = new Path("/", "test2");
     assertTrue(fs.mkdirs(path));

Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java Fri Jun 24 21:57:49 2011
@@ -23,6 +23,7 @@ import java.util.concurrent.CountDownLat
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.util.StringUtils;
 import org.junit.Assert;
 import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
@@ -47,7 +48,15 @@ public abstract class GenericTestUtils {
   public static void assertExists(File f) {
     Assert.assertTrue("File " + f + " should exist", f.exists());
   }
-  
+
+  public static void assertExceptionContains(String string, IOException ioe) {
+    String msg = ioe.getMessage();
+    Assert.assertTrue(
+        "Unexpected exception:" + StringUtils.stringifyException(ioe),
+        msg.contains(string));
+    
+  }  
+
   /**
    * Mockito answer helper that triggers one latch as soon as the
    * method is called, then waits on another before continuing.
@@ -122,6 +131,6 @@ public abstract class GenericTestUtils {
       return invocation.getMethod().invoke(
           delegate, invocation.getArguments());
     }
-  };
-  
+  }
+
 }