You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by el...@apache.org on 2012/04/09 21:39:59 UTC

svn commit: r1311394 [2/2] - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ dev-support/ src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/ src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/ s...

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Mon Apr  9 19:39:58 2012
@@ -514,6 +514,8 @@ public class NameNode {
    * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
    * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
    * upgrade and create a snapshot of the current file system state</li> 
+   * <li>{@link StartupOption#RECOVERY RECOVERY} - recover name node
+   * metadata</li>
    * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
    *            cluster back to the previous state</li>
    * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
@@ -832,7 +834,10 @@ public class NameNode {
       StartupOption.FINALIZE.getName() + "] | [" +
       StartupOption.IMPORT.getName() + "] | [" +
       StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" +
-      StartupOption.INITIALIZESHAREDEDITS.getName() + "]");
+      StartupOption.INITIALIZESHAREDEDITS.getName() + "] | [" +
+      StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" + 
+      StartupOption.RECOVER.getName() + " [ " +
+        StartupOption.FORCE.getName() + " ] ]");
   }
 
   private static StartupOption parseArguments(String args[]) {
@@ -876,6 +881,21 @@ public class NameNode {
       } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
         startOpt = StartupOption.INITIALIZESHAREDEDITS;
         return startOpt;
+      } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
+        if (startOpt != StartupOption.REGULAR) {
+          throw new RuntimeException("Can't combine -recover with " +
+              "other startup options.");
+        }
+        startOpt = StartupOption.RECOVER;
+        while (++i < argsLen) {
+          if (args[i].equalsIgnoreCase(
+                StartupOption.FORCE.getName())) {
+            startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
+          } else {
+            throw new RuntimeException("Error parsing recovery options: " + 
+              "can't understand option \"" + args[i] + "\"");
+          }
+        }
       } else {
         return null;
       }
@@ -892,6 +912,39 @@ public class NameNode {
                                           StartupOption.REGULAR.toString()));
   }
 
+  private static void doRecovery(StartupOption startOpt, Configuration conf)
+      throws IOException {
+    if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
+      if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
+          "This mode is intended to recover lost metadata on a corrupt " +
+          "filesystem.  Metadata recovery mode often permanently deletes " +
+          "data from your HDFS filesystem.  Please back up your edit log " +
+          "and fsimage before trying this!\n\n" +
+          "Are you ready to proceed? (Y/N)\n")) {
+        System.err.println("Recovery aborted at user request.\n");
+        return;
+      }
+    }
+    MetaRecoveryContext.LOG.info("starting recovery...");
+    UserGroupInformation.setConfiguration(conf);
+    NameNode.initMetrics(conf, startOpt.toNodeRole());
+    FSNamesystem fsn = null;
+    try {
+      fsn = FSNamesystem.loadFromDisk(conf);
+      fsn.saveNamespace();
+      MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
+    } catch (IOException e) {
+      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
+      throw e;
+    } catch (RuntimeException e) {
+      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
+      throw e;
+    } finally {
+      if (fsn != null)
+        fsn.close();
+    }
+  }
+
   /**
    * Print out a prompt to the user, and return true if the user
    * responds with "Y" or "yes".
@@ -973,6 +1026,10 @@ public class NameNode {
         DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
         return new BackupNode(conf, role);
       }
+      case RECOVER: {
+        NameNode.doRecovery(startOpt, conf);
+        return null;
+      }
       default:
         DefaultMetricsSystem.initialize("NameNode");
         return new NameNode(conf);

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java Mon Apr  9 19:39:58 2012
@@ -219,7 +219,7 @@ public class EditLogTailer {
       // disk are ignored.
       long editsLoaded = 0;
       try {
-        editsLoaded = image.loadEdits(streams, namesystem);
+        editsLoaded = image.loadEdits(streams, namesystem, null);
       } catch (EditLogInputException elie) {
         editsLoaded = elie.getNumEditsLoaded();
         throw elie;

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java Mon Apr  9 19:39:58 2012
@@ -28,6 +28,7 @@ import org.apache.hadoop.classification.
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache;
 
 import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
 import org.xml.sax.Attributes;
@@ -54,6 +55,7 @@ class OfflineEditsXmlLoader 
   private FSEditLogOpCodes opCode;
   private StringBuffer cbuf;
   private long nextTxId;
+  private final OpInstanceCache opCache = new OpInstanceCache();
   
   static enum ParseState {
     EXPECT_EDITS_TAG,
@@ -207,7 +209,7 @@ class OfflineEditsXmlLoader 
           throw new InvalidXmlException("expected </DATA>");
         }
         state = ParseState.EXPECT_RECORD;
-        FSEditLogOp op = FSEditLogOp.getOpInstance(opCode);
+        FSEditLogOp op = opCache.get(opCode);
         opCode = null;
         try {
           op.decodeXml(stanza);

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java Mon Apr  9 19:39:58 2012
@@ -581,6 +581,10 @@ public class MiniDFSCluster {
       }
     }
     
+    if (operation == StartupOption.RECOVER) {
+      return;
+    }
+
     // Start the DataNodes
     startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
         hosts, simulatedCapacities, setupHostsFile);
@@ -781,6 +785,9 @@ public class MiniDFSCluster {
                      operation == StartupOption.REGULAR) ?
       new String[] {} : new String[] {operation.getName()};
     NameNode nn =  NameNode.createNameNode(args, conf);
+    if (operation == StartupOption.RECOVER) {
+      return;
+    }
     
     // After the NN has started, set back the bound ports into
     // the conf
@@ -956,6 +963,9 @@ public class MiniDFSCluster {
                              long[] simulatedCapacities,
                              boolean setupHostsFile,
                              boolean checkDataNodeAddrConfig) throws IOException {
+    if (operation == StartupOption.RECOVER) {
+      return;
+    }
     conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
 
     int curDatanodesNum = dataNodes.size();

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Mon Apr  9 19:39:58 2012
@@ -179,8 +179,8 @@ public class TestEditLog extends TestCas
   }
   
   private long testLoad(byte[] data, FSNamesystem namesys) throws IOException {
-    FSEditLogLoader loader = new FSEditLogLoader(namesys);
-    return loader.loadFSEdits(new EditLogByteInputStream(data), 1);
+    FSEditLogLoader loader = new FSEditLogLoader(namesys, 0);
+    return loader.loadFSEdits(new EditLogByteInputStream(data), 1, null);
   }
 
   /**
@@ -315,7 +315,7 @@ public class TestEditLog extends TestCas
       //
       for (Iterator<StorageDirectory> it = 
               fsimage.getStorage().dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
-        FSEditLogLoader loader = new FSEditLogLoader(namesystem);
+        FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);
         
         File editFile = NNStorage.getFinalizedEditsFile(it.next(), 3,
             3 + expectedTxns - 1);
@@ -323,7 +323,7 @@ public class TestEditLog extends TestCas
         
         System.out.println("Verifying file: " + editFile);
         long numEdits = loader.loadFSEdits(
-            new EditLogFileInputStream(editFile), 3);
+            new EditLogFileInputStream(editFile), 3, null);
         int numLeases = namesystem.leaseManager.countLease();
         System.out.println("Number of outstanding leases " + numLeases);
         assertEquals(0, numLeases);
@@ -774,8 +774,8 @@ public class TestEditLog extends TestCas
     }
 
     @Override
-    public FSEditLogOp readOp() throws IOException {
-      return reader.readOp();
+    protected FSEditLogOp nextOp() throws IOException {
+      return reader.readOp(false);
     }
 
     @Override
@@ -788,16 +788,11 @@ public class TestEditLog extends TestCas
       input.close();
     }
 
-    @Override // JournalStream
+    @Override
     public String getName() {
       return "AnonEditLogByteInputStream";
     }
 
-    @Override // JournalStream
-    public JournalType getType() {
-      return JournalType.FILE;
-    }
-
     @Override
     public boolean isInProgress() {
       return true;

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java Mon Apr  9 19:39:58 2012
@@ -236,9 +236,9 @@ public class TestEditLogRace {
       File editFile = new File(sd.getCurrentDir(), logFileName);
         
       System.out.println("Verifying file: " + editFile);
-      FSEditLogLoader loader = new FSEditLogLoader(namesystem);
+      FSEditLogLoader loader = new FSEditLogLoader(namesystem, startTxId);
       long numEditsThisLog = loader.loadFSEdits(new EditLogFileInputStream(editFile), 
-          startTxId);
+          startTxId, null);
       
       System.out.println("Number of edits: " + numEditsThisLog);
       assertTrue(numEdits == -1 || numEditsThisLog == numEdits);

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java Mon Apr  9 19:39:58 2012
@@ -92,8 +92,8 @@ public class TestFSEditLogLoader {
     rwf.close();
     
     StringBuilder bld = new StringBuilder();
-    bld.append("^Error replaying edit log at offset \\d+");
-    bld.append(" on transaction ID \\d+\n");
+    bld.append("^Error replaying edit log at offset \\d+.  ");
+    bld.append("Expected transaction ID was \\d+\n");
     bld.append("Recent opcode offsets: (\\d+\\s*){4}$");
     try {
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES)

Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java?rev=1311394&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java (added)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java Mon Apr  9 19:39:58 2012
@@ -0,0 +1,305 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.junit.Test;
+
+import com.google.common.collect.Sets;
+
+/**
+ * This tests data recovery mode for the NameNode.
+ */
+public class TestNameNodeRecovery {
+  private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class);
+  private static StartupOption recoverStartOpt = StartupOption.RECOVER;
+
+  static {
+    recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL);
+  }
+
+  static void runEditLogTest(EditLogTestSetup elts) throws IOException {
+    final String TEST_LOG_NAME = "test_edit_log";
+    final OpInstanceCache cache = new OpInstanceCache();
+    
+    EditLogFileOutputStream elfos = null;
+    File file = null;
+    EditLogFileInputStream elfis = null;
+    try {
+      file = new File(TEST_LOG_NAME);
+      elfos = new EditLogFileOutputStream(file, 0);
+      elfos.create();
+
+      elts.addTransactionsToLog(elfos, cache);
+      elfos.setReadyToFlush();
+      elfos.flushAndSync();
+      elfos.close();
+      elfos = null;
+      file = new File(TEST_LOG_NAME);
+      elfis = new EditLogFileInputStream(file);
+      
+      // reading through normally will get you an exception
+      Set<Long> validTxIds = elts.getValidTxIds();
+      FSEditLogOp op = null;
+      long prevTxId = 0;
+      try {
+        while (true) {
+          op = elfis.nextOp();
+          if (op == null) {
+            break;
+          }
+          LOG.debug("read txid " + op.txid);
+          if (!validTxIds.contains(op.getTransactionId())) {
+            fail("read txid " + op.getTransactionId() +
+                ", which we did not expect to find.");
+          }
+          validTxIds.remove(op.getTransactionId());
+          prevTxId = op.getTransactionId();
+        }
+        if (elts.getLastValidTxId() != -1) {
+          fail("failed to throw IoException as expected");
+        }
+      } catch (IOException e) {
+        if (elts.getLastValidTxId() == -1) {
+          fail("expected all transactions to be valid, but got exception " +
+              "on txid " + prevTxId);
+        } else {
+          assertEquals(prevTxId, elts.getLastValidTxId());
+        }
+      }
+      
+      if (elts.getLastValidTxId() != -1) {
+        // let's skip over the bad transaction
+        op = null;
+        prevTxId = 0;
+        try {
+          while (true) {
+            op = elfis.nextValidOp();
+            if (op == null) {
+              break;
+            }
+            prevTxId = op.getTransactionId();
+            assertTrue(validTxIds.remove(op.getTransactionId()));
+          }
+        } catch (Throwable e) {
+          fail("caught IOException while trying to skip over bad " +
+              "transaction.  message was " + e.getMessage() + 
+              "\nstack trace\n" + StringUtils.stringifyException(e));
+        }
+      }
+      // We should have read every valid transaction.
+      assertTrue(validTxIds.isEmpty());
+    } finally {
+      IOUtils.cleanup(LOG, elfos, elfis);
+    }
+  }
+
+  private interface EditLogTestSetup {
+    /** 
+     * Set up the edit log.
+     */
+    abstract public void addTransactionsToLog(EditLogOutputStream elos,
+        OpInstanceCache cache) throws IOException;
+
+    /**
+     * Get the transaction ID right before the transaction which causes the
+     * normal edit log loading process to bail out-- or -1 if the first
+     * transaction should be bad.
+     */
+    abstract public long getLastValidTxId();
+
+    /**
+     * Get the transaction IDs which should exist and be valid in this
+     * edit log.
+     **/
+    abstract public Set<Long> getValidTxIds();
+  }
+  
+  private class EltsTestEmptyLog implements EditLogTestSetup {
+    public void addTransactionsToLog(EditLogOutputStream elos,
+        OpInstanceCache cache) throws IOException {
+        // do nothing
+    }
+
+    public long getLastValidTxId() {
+      return -1;
+    }
+
+    public Set<Long> getValidTxIds() {
+      return new HashSet<Long>();
+    } 
+  }
+  
+  /** Test an empty edit log */
+  @Test(timeout=180000)
+  public void testEmptyLog() throws IOException {
+    runEditLogTest(new EltsTestEmptyLog());
+  }
+  
+  private class EltsTestGarbageInEditLog implements EditLogTestSetup {
+    final private long BAD_TXID = 4;
+    final private long MAX_TXID = 10;
+    
+    public void addTransactionsToLog(EditLogOutputStream elos,
+        OpInstanceCache cache) throws IOException {
+      for (long txid = 1; txid <= MAX_TXID; txid++) {
+        if (txid == BAD_TXID) {
+          byte garbage[] = { 0x1, 0x2, 0x3 };
+          elos.writeRaw(garbage, 0, garbage.length);
+        }
+        else {
+          DeleteOp op;
+          op = DeleteOp.getInstance(cache);
+          op.setTransactionId(txid);
+          op.setPath("/foo." + txid);
+          op.setTimestamp(txid);
+          elos.write(op);
+        }
+      }
+    }
+
+    public long getLastValidTxId() {
+      return BAD_TXID - 1;
+    }
+
+    public Set<Long> getValidTxIds() {
+      return Sets.newHashSet(1L , 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L);
+    }
+  }
+  
+  /** Test that we can successfully recover from a situation where there is
+   * garbage in the middle of the edit log file output stream. */
+  @Test(timeout=180000)
+  public void testSkipEdit() throws IOException {
+    runEditLogTest(new EltsTestGarbageInEditLog());
+  }
+  
+  /** Test that we can successfully recover from a situation where the last
+   * entry in the edit log has been truncated. */
+  @Test(timeout=180000)
+  public void testRecoverTruncatedEditLog() throws IOException {
+    final String TEST_PATH = "/test/path/dir";
+    final int NUM_TEST_MKDIRS = 10;
+    
+    // start a cluster 
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = null;
+    FileSystem fileSys = null;
+    StorageDirectory sd = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .build();
+      cluster.waitActive();
+      fileSys = cluster.getFileSystem();
+      final FSNamesystem namesystem = cluster.getNamesystem();
+      FSImage fsimage = namesystem.getFSImage();
+      for (int i = 0; i < NUM_TEST_MKDIRS; i++) {
+        fileSys.mkdirs(new Path(TEST_PATH));
+      }
+      sd = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS).next();
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+
+    File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile();
+    assertTrue("Should exist: " + editFile, editFile.exists());
+
+    // Corrupt the last edit
+    long fileLen = editFile.length();
+    RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
+    rwf.setLength(fileLen - 1);
+    rwf.close();
+    
+    // Make sure that we can't start the cluster normally before recovery
+    cluster = null;
+    try {
+      LOG.debug("trying to start normally (this should fail)...");
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .format(false).build();
+      cluster.waitActive();
+      cluster.shutdown();
+      fail("expected the truncated edit log to prevent normal startup");
+    } catch (IOException e) {
+      // success
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+    
+    // Perform recovery
+    cluster = null;
+    try {
+      LOG.debug("running recovery...");
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .format(false).startupOption(recoverStartOpt).build();
+    } catch (IOException e) {
+      fail("caught IOException while trying to recover. " +
+          "message was " + e.getMessage() + 
+          "\nstack trace\n" + StringUtils.stringifyException(e));
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+    
+    // Make sure that we can start the cluster normally after recovery
+    cluster = null;
+    try {
+      LOG.debug("starting cluster normally after recovery...");
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .format(false).build();
+      LOG.debug("testRecoverTruncatedEditLog: successfully recovered the " +
+          "truncated edit log");
+      assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH)));
+    } catch (IOException e) {
+      fail("failed to recover.  Error message: " + e.getMessage());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+}

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java Mon Apr  9 19:39:58 2012
@@ -143,9 +143,9 @@ public class TestSecurityTokenEditLog ex
         File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 1 + expectedTransactions - 1);
         System.out.println("Verifying file: " + editFile);
         
-        FSEditLogLoader loader = new FSEditLogLoader(namesystem);        
+        FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);        
         long numEdits = loader.loadFSEdits(
-            new EditLogFileInputStream(editFile), 1);
+            new EditLogFileInputStream(editFile), 1, null);
         assertEquals("Verification for " + editFile, expectedTransactions, numEdits);
       }
     } finally {