You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2011/06/10 00:50:45 UTC

svn commit: r1134123 - in /hadoop/hdfs/branches/HDFS-1073: ./ src/java/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/hdfs/server/common/ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/

Author: todd
Date: Thu Jun  9 22:50:45 2011
New Revision: 1134123

URL: http://svn.apache.org/viewvc?rev=1134123&view=rev
Log:
HDFS-2047. Improve TestNamespace and TestEditLog in HDFS-1073 branch. Contributed by Todd Lipcon.

Added:
    hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/common/StorageAdapter.java
Modified:
    hadoop/hdfs/branches/HDFS-1073/CHANGES.HDFS-1073.txt
    hadoop/hdfs/branches/HDFS-1073/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
    hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
    hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java

Modified: hadoop/hdfs/branches/HDFS-1073/CHANGES.HDFS-1073.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1073/CHANGES.HDFS-1073.txt?rev=1134123&r1=1134122&r2=1134123&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1073/CHANGES.HDFS-1073.txt (original)
+++ hadoop/hdfs/branches/HDFS-1073/CHANGES.HDFS-1073.txt Thu Jun  9 22:50:45 2011
@@ -45,3 +45,4 @@ HDFS-2001. Remove use of previous.checkp
 HDFS-2015. Remove checkpointTxId from VERSION file. (todd)
 HDFS-2016. Add infrastructure to remove or archive old and unneeded storage
            files within the name directories. (todd)
+HDFS-2047. Improve TestNamespace and TestEditLog in HDFS-1073 branch. (todd)

Modified: hadoop/hdfs/branches/HDFS-1073/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1073/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1134123&r1=1134122&r2=1134123&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1073/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/hdfs/branches/HDFS-1073/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Thu Jun  9 22:50:45 2011
@@ -785,6 +785,10 @@ public class FSImage implements Closeabl
   }
   
   protected void saveFSImageInAllDirs(long txid) throws IOException {
+    if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
+      throw new IOException("No image directories available!");
+    }
+    
     List<StorageDirectory> errorSDs =
       Collections.synchronizedList(new ArrayList<StorageDirectory>());
 

Added: hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/common/StorageAdapter.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/common/StorageAdapter.java?rev=1134123&view=auto
==============================================================================
--- hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/common/StorageAdapter.java (added)
+++ hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/common/StorageAdapter.java Thu Jun  9 22:50:45 2011
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.common;
+
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.mockito.Mockito;
+
+/**
+ * Test methods that need to access package-private parts of
+ * Storage
+ */
+public abstract class StorageAdapter {
+
+  /**
+   * Inject and return a spy on a storage directory
+   */
+  public static StorageDirectory spyOnStorageDirectory(
+      Storage s, int idx) {
+
+    StorageDirectory dir = Mockito.spy(s.getStorageDir(idx));
+    s.storageDirs.set(idx, dir);
+    return dir;
+  }
+}

Modified: hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java?rev=1134123&r1=1134122&r2=1134123&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java (original)
+++ hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java Thu Jun  9 22:50:45 2011
@@ -228,7 +228,8 @@ public abstract class FSImageTestUtil {
       new FSImageTransactionalStorageInspector();
     inspector.inspectDirectory(sd);
 
-    return inspector.getLatestImage().getFile();
+    FoundFSImage latestImage = inspector.getLatestImage();
+    return (latestImage == null) ? null : latestImage.getFile();
   }
 
 

Modified: hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1134123&r1=1134122&r2=1134123&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java (original)
+++ hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Thu Jun  9 22:50:45 2011
@@ -558,11 +558,20 @@ public class TestEditLog extends TestCas
         // Now restore the backup
         FileUtil.deleteContents(dfsDir);
         backupDir.renameTo(dfsDir);
+        
+        // Directory layout looks like:
+        // test/data/dfs/nameN/current/{fsimage_N,edits_...}
+        File currentDir = new File(nameDir, "current");
 
         // We should see the file as in-progress
-        File editsFile = new File(nameDir, "current/edits_inprogress_1");
+        File editsFile = new File(currentDir, "edits_inprogress_1");
         assertTrue("Edits file " + editsFile + " should exist", editsFile.exists());        
         
+        File imageFile = FSImageTestUtil.findNewestImageFile(
+            currentDir.getAbsolutePath());
+        assertNotNull("No image found in " + nameDir, imageFile);
+        assertEquals("fsimage_0", imageFile.getName());
+        
         // Try to start a new cluster
         LOG.info("\n===========================================\n" +
         "Starting same cluster after simulated crash");
@@ -577,6 +586,14 @@ public class TestEditLog extends TestCas
         for (int i = 0; i < numTransactions; i++) {
           assertTrue(fs.exists(new Path("/test" + i)));
         }
+
+        // It should have saved a checkpoint on startup since there
+        // were unfinalized edits
+        long expectedTxId = numTransactions + 1;
+        imageFile = FSImageTestUtil.findNewestImageFile(
+            currentDir.getAbsolutePath());
+        assertNotNull("No image found in " + nameDir, imageFile);
+        assertEquals("fsimage_" + expectedTxId, imageFile.getName());
         
         // Started successfully
         cluster.shutdown();    

Modified: hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java?rev=1134123&r1=1134122&r2=1134123&view=diff
==============================================================================
--- hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java (original)
+++ hadoop/hdfs/branches/HDFS-1073/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java Thu Jun  9 22:50:45 2011
@@ -20,9 +20,7 @@ package org.apache.hadoop.hdfs.server.na
 import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
 
 import static org.junit.Assert.*;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+
 import static org.mockito.Matchers.anyObject;
 import static org.mockito.Matchers.anyLong;
 import static org.mockito.Mockito.doAnswer;
@@ -44,6 +42,7 @@ import org.apache.hadoop.hdfs.DFSTestUti
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
+import org.apache.hadoop.hdfs.server.common.StorageAdapter;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.log4j.Level;
@@ -72,16 +71,11 @@ public class TestSaveNamespace {
 
   private static class FaultySaveImage implements Answer<Void> {
     int count = 0;
-    boolean exceptionType = true;
-
-    // generate a RuntimeException
-    public FaultySaveImage() {
-      this.exceptionType = true;
-    }
+    boolean throwRTE = true;
 
     // generate either a RuntimeException or IOException
-    public FaultySaveImage(boolean etype) {
-      this.exceptionType = etype;
+    public FaultySaveImage(boolean throwRTE) {
+      this.throwRTE = throwRTE;
     }
 
     public Void answer(InvocationOnMock invocation) throws Throwable {
@@ -90,7 +84,7 @@ public class TestSaveNamespace {
 
       if (count++ == 1) {
         LOG.info("Injecting fault for sd: " + sd);
-        if (exceptionType) {
+        if (throwRTE) {
           throw new RuntimeException("Injected fault: saveFSImage second time");
         } else {
           throw new IOException("Injected fault: saveFSImage second time");
@@ -102,12 +96,14 @@ public class TestSaveNamespace {
   }
 
   private enum Fault {
-    SAVE_FSIMAGE,
-    MOVE_CURRENT,
-    MOVE_LAST_CHECKPOINT
+    SAVE_SECOND_FSIMAGE_RTE,
+    SAVE_SECOND_FSIMAGE_IOE,
+    SAVE_ALL_FSIMAGES,
+    WRITE_STORAGE_ALL,
+    WRITE_STORAGE_ONE
   };
 
-  private void saveNamespaceWithInjectedFault(Fault fault) throws IOException {
+  private void saveNamespaceWithInjectedFault(Fault fault) throws Exception {
     Configuration conf = getConf();
     NameNode.initMetrics(conf, NamenodeRole.ACTIVE);
     DFSTestUtil.formatNameNode(conf);
@@ -116,7 +112,6 @@ public class TestSaveNamespace {
     // Replace the FSImage with a spy
     FSImage originalImage = fsn.dir.fsImage;
     NNStorage storage = originalImage.getStorage();
-    storage.close(); // unlock any directories that FSNamesystem's initialization may have locked
 
     NNStorage spyStorage = spy(storage);
     originalImage.storage = spyStorage;
@@ -124,41 +119,64 @@ public class TestSaveNamespace {
     FSImage spyImage = spy(originalImage);
     fsn.dir.fsImage = spyImage;
 
+    boolean shouldFail = false; // should we expect the save operation to fail
     // inject fault
     switch(fault) {
-    case SAVE_FSIMAGE:
+    case SAVE_SECOND_FSIMAGE_RTE:
       // The spy throws a RuntimeException when writing to the second directory
-      doAnswer(new FaultySaveImage()).
+      doAnswer(new FaultySaveImage(true)).
         when(spyImage).saveFSImage((StorageDirectory)anyObject(), anyLong());
+      shouldFail = false;
       break;
-
-    /*
-     TODO: these two cases no longer make sense for HDFS-1073. Need to go
-     through saveNamespace path and find other good points to inject errors
-     for this test.
-    case MOVE_CURRENT:
-      // The spy throws a RuntimeException when calling moveCurrent()
-      doThrow(new RuntimeException("Injected fault: moveCurrent")).
-        when(spyStorage).moveCurrent((StorageDirectory)anyObject());
+    case SAVE_SECOND_FSIMAGE_IOE:
+      // The spy throws an IOException when writing to the second directory
+      doAnswer(new FaultySaveImage(false)).
+        when(spyImage).saveFSImage((StorageDirectory)anyObject(), anyLong());
+      shouldFail = false;
       break;
-    case MOVE_LAST_CHECKPOINT:
-      // The spy throws a RuntimeException when calling moveLastCheckpoint()
-      doThrow(new RuntimeException("Injected fault: moveLastCheckpoint")).
-        when(spyStorage).moveLastCheckpoint((StorageDirectory)anyObject());
+    case SAVE_ALL_FSIMAGES:
+      // The spy throws IOException in all directories
+      doThrow(new RuntimeException("Injected")).
+        when(spyImage).saveFSImage((StorageDirectory)anyObject(), anyLong());
+      shouldFail = true;
+      break;
+    case WRITE_STORAGE_ALL:
+      // The spy throws an exception before writing any VERSION files
+      doThrow(new RuntimeException("Injected"))
+        .when(spyStorage).writeAll();
+      shouldFail = true;
+      break;
+    case WRITE_STORAGE_ONE:
+      // The spy throws on exception on one particular storage directory
+      StorageDirectory dir = StorageAdapter.spyOnStorageDirectory(
+          storage, 1);
+      doThrow(new RuntimeException("Injected"))
+        .when(dir).write();
+      shouldFail = true; // TODO: unfortunately this fails -- should be improved
       break;
-      */
     }
 
     try {
       doAnEdit(fsn, 1);
 
-      // Save namespace - this will fail because we inject a fault.
+      // Save namespace - this may fail, depending on fault injected
       fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
       try {
         fsn.saveNamespace();
+        if (shouldFail) {
+          fail("Did not fail!");
+        }
       } catch (Exception e) {
-        LOG.info("Test caught expected exception", e);
+        if (! shouldFail) {
+          throw e;
+        } else {
+          LOG.info("Test caught expected exception", e);
+        }
       }
+      
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+      // Should still be able to perform edits
+      doAnEdit(fsn, 2);
 
       // Now shut down and restart the namesystem
       originalImage.close();
@@ -169,8 +187,9 @@ public class TestSaveNamespace {
       // the namespace from the previous incarnation.
       fsn = new FSNamesystem(conf);
 
-      // Make sure the image loaded including our edit.
+      // Make sure the image loaded including our edits.
       checkEditExists(fsn, 1);
+      checkEditExists(fsn, 2);
     } finally {
       if (fsn != null) {
         fsn.close();
@@ -266,26 +285,36 @@ public class TestSaveNamespace {
   }
 
   @Test
-  public void testCrashWhileSavingSecondImage() throws Exception {
-    saveNamespaceWithInjectedFault(Fault.SAVE_FSIMAGE);
+  public void testRTEWhileSavingSecondImage() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.SAVE_SECOND_FSIMAGE_RTE);
   }
 
   @Test
-  public void testCrashWhileMoveCurrent() throws Exception {
-    saveNamespaceWithInjectedFault(Fault.MOVE_CURRENT);
+  public void testIOEWhileSavingSecondImage() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.SAVE_SECOND_FSIMAGE_IOE);
   }
 
   @Test
-  public void testCrashWhileMoveLastCheckpoint() throws Exception {
-    saveNamespaceWithInjectedFault(Fault.MOVE_LAST_CHECKPOINT);
+  public void testCrashInAllImageDirs() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.SAVE_ALL_FSIMAGES);
+  }
+  
+  @Test
+  public void testCrashWhenWritingVersionFiles() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.WRITE_STORAGE_ALL);
+  }
+  
+  @Test
+  public void testCrashWhenWritingVersionFileInOneDir() throws Exception {
+    saveNamespaceWithInjectedFault(Fault.WRITE_STORAGE_ONE);
   }
  
 
   /**
    * Test case where savenamespace fails in all directories
    * and then the NN shuts down. Here we should recover from the
-   * failed checkpoint by moving the directories back on next
-   * NN start. This is a regression test for HDFS-1921.
+   * failed checkpoint since it only affected ".ckpt" files, not
+   * valid image files
    */
   @Test
   public void testFailedSaveNamespace() throws Exception {