You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2011/06/24 23:57:50 UTC
svn commit: r1139453 - in /hadoop/common/branches/HDFS-1073/hdfs: ./
src/java/org/apache/hadoop/hdfs/server/namenode/
src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/
src/test/hdfs/org/apache/hadoop/test/
Author: todd
Date: Fri Jun 24 21:57:49 2011
New Revision: 1139453
URL: http://svn.apache.org/viewvc?rev=1139453&view=rev
Log:
HDFS-2078. NameNode should not clear directory when restoring removed storage. Contributed by Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java
Modified: hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/CHANGES.HDFS-1073.txt Fri Jun 24 21:57:49 2011
@@ -57,3 +57,6 @@ HDFS-2026. SecondaryNameNode should prop
NameNode is reformatted. (todd)
HDFS-2077. Address checkpoint upload when one of the storage dirs is failed
(todd)
+HDFS-2078. NameNode should not clear directory when restoring removed storage.
+ (todd)
+
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java Fri Jun 24 21:57:49 2011
@@ -242,20 +242,10 @@ public class NNStorage extends Storage i
LOG.info("currently disabled dir " + root.getAbsolutePath() +
"; type="+sd.getStorageDirType()
+ ";canwrite="+root.canWrite());
- try {
-
- if(root.exists() && root.canWrite()) {
- // when we try to restore we just need to remove all the data
- // without saving current in-memory state (which could've changed).
- // TODO does this still make sense with 1073?
- sd.clearDirectory();
-
- LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
- this.addStorageDir(sd); // restore
- this.removedStorageDirs.remove(sd);
- }
- } catch(IOException e) {
- LOG.warn("failed to restore " + sd.getRoot().getAbsolutePath(), e);
+ if(root.exists() && root.canWrite()) {
+ LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
+ this.addStorageDir(sd); // restore
+ this.removedStorageDirs.remove(sd);
}
}
}
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Fri Jun 24 21:57:49 2011
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
+import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
import junit.framework.TestCase;
import java.net.InetSocketAddress;
import java.io.File;
@@ -45,6 +46,7 @@ import org.apache.hadoop.hdfs.MiniDFSClu
import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@@ -1440,6 +1442,83 @@ public class TestCheckpoint extends Test
}
}
}
+
+ /**
+ * Test case where the NN is configured with a name-only and an edits-only
+ * dir, with storage-restore turned on. In this case, if the name-only dir
+ * disappears and comes back, a new checkpoint after it has been restored
+ * should function correctly.
+ * @throws Exception
+ */
+ @SuppressWarnings("deprecation")
+ public void testCheckpointWithSeparateDirsAfterNameFails() throws Exception {
+ MiniDFSCluster cluster = null;
+ SecondaryNameNode secondary = null;
+ File currentDir = null;
+
+ Configuration conf = new HdfsConfiguration();
+
+ File base_dir = new File(MiniDFSCluster.getBaseDirectory());
+ conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_RESTORE_KEY, true);
+ conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
+ MiniDFSCluster.getBaseDirectory() + "/name-only");
+ conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
+ MiniDFSCluster.getBaseDirectory() + "/edits-only");
+ conf.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,
+ fileAsURI(new File(base_dir, "namesecondary1")).toString());
+
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .format(true)
+ .manageNameDfsDirs(false)
+ .build();
+
+ secondary = startSecondaryNameNode(conf);
+
+ // Checkpoint once
+ secondary.doCheckpoint();
+
+ // Now primary NN experiences failure of its only name dir -- fake by
+ // setting its current dir to a-x permissions
+ NameNode nn = cluster.getNameNode();
+ NNStorage storage = nn.getFSImage().getStorage();
+ StorageDirectory sd0 = storage.getStorageDir(0);
+ assertEquals(NameNodeDirType.IMAGE, sd0.getStorageDirType());
+ currentDir = sd0.getCurrentDir();
+ currentDir.setExecutable(false);
+
+ // Try to upload checkpoint -- this should fail since there are no
+ // valid storage dirs
+ try {
+ secondary.doCheckpoint();
+ fail("Did not fail to checkpoint when there are no valid storage dirs");
+ } catch (IOException ioe) {
+ GenericTestUtils.assertExceptionContains(
+ "Unable to download to any storage dir", ioe);
+ }
+
+ // Restore the good dir
+ currentDir.setExecutable(true);
+ nn.restoreFailedStorage("true");
+ nn.rollEditLog();
+
+ // Checkpoint again -- this should upload to the restored name dir
+ secondary.doCheckpoint();
+
+ assertNNHasCheckpoints(cluster, ImmutableList.of(8));
+ assertParallelFilesInvariant(cluster, ImmutableList.of(secondary));
+ } finally {
+ if (currentDir != null) {
+ currentDir.setExecutable(true);
+ }
+ if (secondary != null) {
+ secondary.shutdown();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
@SuppressWarnings("deprecation")
private void cleanup(SecondaryNameNode snn) {
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNNStorageArchivalManager.java Fri Jun 24 21:57:49 2011
@@ -18,6 +18,8 @@
package org.apache.hadoop.hdfs.server.namenode;
import java.io.IOException;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
@@ -34,6 +36,8 @@ import org.mockito.invocation.Invocation
import org.mockito.stubbing.Answer;
import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
@@ -46,6 +50,7 @@ public class TestNNStorageArchivalManage
@Test
public void testArchiveEasyCase() throws IOException {
TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
tc.addImage("/foo1/current/fsimage_100", true);
tc.addImage("/foo1/current/fsimage_200", true);
tc.addImage("/foo1/current/fsimage_300", false);
@@ -66,6 +71,8 @@ public class TestNNStorageArchivalManage
@Test
public void testArchiveMultipleDirs() throws IOException {
TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
+ tc.addRoot("/foo2", NameNodeDirType.IMAGE_AND_EDITS);
tc.addImage("/foo1/current/fsimage_100", true);
tc.addImage("/foo1/current/fsimage_200", true);
tc.addImage("/foo2/current/fsimage_200", true);
@@ -87,6 +94,7 @@ public class TestNNStorageArchivalManage
@Test
public void testArchiveLessThanRetention() throws IOException {
TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
tc.addImage("/foo1/current/fsimage_100", false);
tc.addLog("/foo1/current/edits_101-200", false);
tc.addLog("/foo1/current/edits_201-300", false);
@@ -101,6 +109,7 @@ public class TestNNStorageArchivalManage
@Test
public void testNoLogs() throws IOException {
TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
tc.addImage("/foo1/current/fsimage_100", true);
tc.addImage("/foo1/current/fsimage_200", true);
tc.addImage("/foo1/current/fsimage_300", false);
@@ -114,6 +123,7 @@ public class TestNNStorageArchivalManage
@Test
public void testEmptyDir() throws IOException {
TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
runTest(tc);
}
@@ -123,6 +133,7 @@ public class TestNNStorageArchivalManage
@Test
public void testOldInProgress() throws IOException {
TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE_AND_EDITS);
tc.addImage("/foo1/current/fsimage_100", true);
tc.addImage("/foo1/current/fsimage_200", true);
tc.addImage("/foo1/current/fsimage_300", false);
@@ -130,7 +141,23 @@ public class TestNNStorageArchivalManage
tc.addLog("/foo1/current/edits_inprogress_101", true);
runTest(tc);
}
-
+
+ @Test
+ public void testSeparateEditDirs() throws IOException {
+ TestCaseDescription tc = new TestCaseDescription();
+ tc.addRoot("/foo1", NameNodeDirType.IMAGE);
+ tc.addRoot("/foo2", NameNodeDirType.EDITS);
+ tc.addImage("/foo1/current/fsimage_100", true);
+ tc.addImage("/foo1/current/fsimage_200", true);
+ tc.addImage("/foo1/current/fsimage_300", false);
+ tc.addImage("/foo1/current/fsimage_400", false);
+ tc.addLog("/foo2/current/edits_101-200", true);
+ tc.addLog("/foo2/current/edits_201-300", true);
+ tc.addLog("/foo2/current/edits_301-400", false);
+ tc.addLog("/foo2/current/edits_inprogress_401", false);
+ runTest(tc);
+ }
+
private void runTest(TestCaseDescription tc) throws IOException {
Configuration conf = new Configuration();
@@ -169,33 +196,55 @@ public class TestNNStorageArchivalManage
}
private static class TestCaseDescription {
- private Set<String> files = Sets.newHashSet();
+ private Map<String, FakeRoot> dirRoots = Maps.newHashMap();
private Set<String> expectedArchivedLogs = Sets.newHashSet();
private Set<String> expectedArchivedImages = Sets.newHashSet();
+
+ private static class FakeRoot {
+ NameNodeDirType type;
+ List<String> files;
+
+ FakeRoot(NameNodeDirType type) {
+ this.type = type;
+ files = Lists.newArrayList();
+ }
+ }
+ void addRoot(String root, NameNodeDirType dir) {
+ dirRoots.put(root, new FakeRoot(dir));
+ }
+
+ private void addFile(String path) {
+ for (Map.Entry<String, FakeRoot> entry : dirRoots.entrySet()) {
+ if (path.startsWith(entry.getKey())) {
+ entry.getValue().files.add(path);
+ }
+ }
+ }
+
void addLog(String path, boolean expectArchive) {
- files.add(path);
+ addFile(path);
if (expectArchive) {
expectedArchivedLogs.add(path);
}
}
- private String[] getPaths() {
- return files.toArray(new String[0]);
- }
-
void addImage(String path, boolean expectArchive) {
- files.add(path);
+ addFile(path);
if (expectArchive) {
expectedArchivedImages.add(path);
}
}
NNStorage mockStorage() throws IOException {
- String[] paths = getPaths();
- StorageDirectory mockDir = TestFSImageStorageInspector.mockDirectory(
- NameNodeDirType.IMAGE_AND_EDITS, false, paths);
- return mockStorageForDirs(mockDir);
+ List<StorageDirectory> sds = Lists.newArrayList();
+ for (FakeRoot root : dirRoots.values()) {
+ StorageDirectory mockDir = TestFSImageStorageInspector.mockDirectory(
+ root.type, false,
+ root.files.toArray(new String[0]));
+ sds.add(mockDir);
+ }
+ return mockStorageForDirs(sds.toArray(new StorageDirectory[0]));
}
}
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java Fri Jun 24 21:57:49 2011
@@ -219,9 +219,10 @@ public class TestSaveNamespace {
FSImage spyImage = spy(originalImage);
fsn.dir.fsImage = spyImage;
- File currentDir = storage.getStorageDir(0).getCurrentDir();
- currentDir.setExecutable(false);
- currentDir.setReadable(false);
+ File rootDir = storage.getStorageDir(0).getRoot();
+ rootDir.setExecutable(false);
+ rootDir.setWritable(false);
+ rootDir.setReadable(false);
try {
doAnEdit(fsn, 1);
@@ -238,8 +239,9 @@ public class TestSaveNamespace {
" bad directories.",
storage.getRemovedStorageDirs().size() == 1);
- currentDir.setExecutable(true);
- currentDir.setReadable(true);
+ rootDir.setExecutable(true);
+ rootDir.setWritable(true);
+ rootDir.setReadable(true);
// The next call to savenamespace should try inserting the
// erroneous directory back to fs.name.dir. This command should
@@ -269,9 +271,10 @@ public class TestSaveNamespace {
checkEditExists(fsn, 1);
LOG.info("Reloaded image is good.");
} finally {
- if (currentDir.exists()) {
- currentDir.setExecutable(true);
- currentDir.setReadable(true);
+ if (rootDir.exists()) {
+ rootDir.setExecutable(true);
+ rootDir.setWritable(true);
+ rootDir.setReadable(true);
}
if (fsn != null) {
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java Fri Jun 24 21:57:49 2011
@@ -226,6 +226,13 @@ public class TestStorageRestore extends
String md5BeforeEdit = FSImageTestUtil.getFileMD5(
new File(path1, "current/edits_inprogress_5"));
+ // The original image should still be the previously failed image
+ // directory after it got restored, since it's still useful for
+ // a recovery!
+ FSImageTestUtil.assertFileContentsSame(
+ new File(path1, "current/fsimage_0"),
+ new File(path2, "current/fsimage_0"));
+
// Do another edit to verify that all the logs are active.
path = new Path("/", "test2");
assertTrue(fs.mkdirs(path));
Modified: hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java?rev=1139453&r1=1139452&r2=1139453&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java (original)
+++ hadoop/common/branches/HDFS-1073/hdfs/src/test/hdfs/org/apache/hadoop/test/GenericTestUtils.java Fri Jun 24 21:57:49 2011
@@ -23,6 +23,7 @@ import java.util.concurrent.CountDownLat
import org.apache.commons.logging.Log;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.util.StringUtils;
import org.junit.Assert;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
@@ -47,7 +48,15 @@ public abstract class GenericTestUtils {
public static void assertExists(File f) {
Assert.assertTrue("File " + f + " should exist", f.exists());
}
-
+
+ public static void assertExceptionContains(String string, IOException ioe) {
+ String msg = ioe.getMessage();
+ Assert.assertTrue(
+ "Unexpected exception:" + StringUtils.stringifyException(ioe),
+ msg.contains(string));
+
+ }
+
/**
* Mockito answer helper that triggers one latch as soon as the
* method is called, then waits on another before continuing.
@@ -122,6 +131,6 @@ public abstract class GenericTestUtils {
return invocation.getMethod().invoke(
delegate, invocation.getArguments());
}
- };
-
+ }
+
}