You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by el...@apache.org on 2012/04/09 21:39:59 UTC
svn commit: r1311394 [2/2] - in
/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ dev-support/
src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/
src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/
s...
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java Mon Apr 9 19:39:58 2012
@@ -514,6 +514,8 @@ public class NameNode {
* <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
* <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster
* upgrade and create a snapshot of the current file system state</li>
+ * <li>{@link StartupOption#RECOVERY RECOVERY} - recover name node
+ * metadata</li>
* <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the
* cluster back to the previous state</li>
* <li>{@link StartupOption#FINALIZE FINALIZE} - finalize
@@ -832,7 +834,10 @@ public class NameNode {
StartupOption.FINALIZE.getName() + "] | [" +
StartupOption.IMPORT.getName() + "] | [" +
StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" +
- StartupOption.INITIALIZESHAREDEDITS.getName() + "]");
+ StartupOption.INITIALIZESHAREDEDITS.getName() + "] | [" +
+ StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" +
+ StartupOption.RECOVER.getName() + " [ " +
+ StartupOption.FORCE.getName() + " ] ]");
}
private static StartupOption parseArguments(String args[]) {
@@ -876,6 +881,21 @@ public class NameNode {
} else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.INITIALIZESHAREDEDITS;
return startOpt;
+ } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
+ if (startOpt != StartupOption.REGULAR) {
+ throw new RuntimeException("Can't combine -recover with " +
+ "other startup options.");
+ }
+ startOpt = StartupOption.RECOVER;
+ while (++i < argsLen) {
+ if (args[i].equalsIgnoreCase(
+ StartupOption.FORCE.getName())) {
+ startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
+ } else {
+ throw new RuntimeException("Error parsing recovery options: " +
+ "can't understand option \"" + args[i] + "\"");
+ }
+ }
} else {
return null;
}
@@ -892,6 +912,39 @@ public class NameNode {
StartupOption.REGULAR.toString()));
}
+ private static void doRecovery(StartupOption startOpt, Configuration conf)
+ throws IOException {
+ if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
+ if (!confirmPrompt("You have selected Metadata Recovery mode. " +
+ "This mode is intended to recover lost metadata on a corrupt " +
+ "filesystem. Metadata recovery mode often permanently deletes " +
+ "data from your HDFS filesystem. Please back up your edit log " +
+ "and fsimage before trying this!\n\n" +
+ "Are you ready to proceed? (Y/N)\n")) {
+ System.err.println("Recovery aborted at user request.\n");
+ return;
+ }
+ }
+ MetaRecoveryContext.LOG.info("starting recovery...");
+ UserGroupInformation.setConfiguration(conf);
+ NameNode.initMetrics(conf, startOpt.toNodeRole());
+ FSNamesystem fsn = null;
+ try {
+ fsn = FSNamesystem.loadFromDisk(conf);
+ fsn.saveNamespace();
+ MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
+ } catch (IOException e) {
+ MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
+ throw e;
+ } catch (RuntimeException e) {
+ MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
+ throw e;
+ } finally {
+ if (fsn != null)
+ fsn.close();
+ }
+ }
+
/**
* Print out a prompt to the user, and return true if the user
* responds with "Y" or "yes".
@@ -973,6 +1026,10 @@ public class NameNode {
DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
return new BackupNode(conf, role);
}
+ case RECOVER: {
+ NameNode.doRecovery(startOpt, conf);
+ return null;
+ }
default:
DefaultMetricsSystem.initialize("NameNode");
return new NameNode(conf);
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java Mon Apr 9 19:39:58 2012
@@ -219,7 +219,7 @@ public class EditLogTailer {
// disk are ignored.
long editsLoaded = 0;
try {
- editsLoaded = image.loadEdits(streams, namesystem);
+ editsLoaded = image.loadEdits(streams, namesystem, null);
} catch (EditLogInputException elie) {
editsLoaded = elie.getNumEditsLoaded();
throw elie;
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java Mon Apr 9 19:39:58 2012
@@ -28,6 +28,7 @@ import org.apache.hadoop.classification.
import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache;
import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
import org.xml.sax.Attributes;
@@ -54,6 +55,7 @@ class OfflineEditsXmlLoader
private FSEditLogOpCodes opCode;
private StringBuffer cbuf;
private long nextTxId;
+ private final OpInstanceCache opCache = new OpInstanceCache();
static enum ParseState {
EXPECT_EDITS_TAG,
@@ -207,7 +209,7 @@ class OfflineEditsXmlLoader
throw new InvalidXmlException("expected </DATA>");
}
state = ParseState.EXPECT_RECORD;
- FSEditLogOp op = FSEditLogOp.getOpInstance(opCode);
+ FSEditLogOp op = opCache.get(opCode);
opCode = null;
try {
op.decodeXml(stanza);
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java Mon Apr 9 19:39:58 2012
@@ -581,6 +581,10 @@ public class MiniDFSCluster {
}
}
+ if (operation == StartupOption.RECOVER) {
+ return;
+ }
+
// Start the DataNodes
startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
hosts, simulatedCapacities, setupHostsFile);
@@ -781,6 +785,9 @@ public class MiniDFSCluster {
operation == StartupOption.REGULAR) ?
new String[] {} : new String[] {operation.getName()};
NameNode nn = NameNode.createNameNode(args, conf);
+ if (operation == StartupOption.RECOVER) {
+ return;
+ }
// After the NN has started, set back the bound ports into
// the conf
@@ -956,6 +963,9 @@ public class MiniDFSCluster {
long[] simulatedCapacities,
boolean setupHostsFile,
boolean checkDataNodeAddrConfig) throws IOException {
+ if (operation == StartupOption.RECOVER) {
+ return;
+ }
conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
int curDatanodesNum = dataNodes.size();
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java Mon Apr 9 19:39:58 2012
@@ -179,8 +179,8 @@ public class TestEditLog extends TestCas
}
private long testLoad(byte[] data, FSNamesystem namesys) throws IOException {
- FSEditLogLoader loader = new FSEditLogLoader(namesys);
- return loader.loadFSEdits(new EditLogByteInputStream(data), 1);
+ FSEditLogLoader loader = new FSEditLogLoader(namesys, 0);
+ return loader.loadFSEdits(new EditLogByteInputStream(data), 1, null);
}
/**
@@ -315,7 +315,7 @@ public class TestEditLog extends TestCas
//
for (Iterator<StorageDirectory> it =
fsimage.getStorage().dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
- FSEditLogLoader loader = new FSEditLogLoader(namesystem);
+ FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);
File editFile = NNStorage.getFinalizedEditsFile(it.next(), 3,
3 + expectedTxns - 1);
@@ -323,7 +323,7 @@ public class TestEditLog extends TestCas
System.out.println("Verifying file: " + editFile);
long numEdits = loader.loadFSEdits(
- new EditLogFileInputStream(editFile), 3);
+ new EditLogFileInputStream(editFile), 3, null);
int numLeases = namesystem.leaseManager.countLease();
System.out.println("Number of outstanding leases " + numLeases);
assertEquals(0, numLeases);
@@ -774,8 +774,8 @@ public class TestEditLog extends TestCas
}
@Override
- public FSEditLogOp readOp() throws IOException {
- return reader.readOp();
+ protected FSEditLogOp nextOp() throws IOException {
+ return reader.readOp(false);
}
@Override
@@ -788,16 +788,11 @@ public class TestEditLog extends TestCas
input.close();
}
- @Override // JournalStream
+ @Override
public String getName() {
return "AnonEditLogByteInputStream";
}
- @Override // JournalStream
- public JournalType getType() {
- return JournalType.FILE;
- }
-
@Override
public boolean isInProgress() {
return true;
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java Mon Apr 9 19:39:58 2012
@@ -236,9 +236,9 @@ public class TestEditLogRace {
File editFile = new File(sd.getCurrentDir(), logFileName);
System.out.println("Verifying file: " + editFile);
- FSEditLogLoader loader = new FSEditLogLoader(namesystem);
+ FSEditLogLoader loader = new FSEditLogLoader(namesystem, startTxId);
long numEditsThisLog = loader.loadFSEdits(new EditLogFileInputStream(editFile),
- startTxId);
+ startTxId, null);
System.out.println("Number of edits: " + numEditsThisLog);
assertTrue(numEdits == -1 || numEditsThisLog == numEdits);
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java Mon Apr 9 19:39:58 2012
@@ -92,8 +92,8 @@ public class TestFSEditLogLoader {
rwf.close();
StringBuilder bld = new StringBuilder();
- bld.append("^Error replaying edit log at offset \\d+");
- bld.append(" on transaction ID \\d+\n");
+ bld.append("^Error replaying edit log at offset \\d+. ");
+ bld.append("Expected transaction ID was \\d+\n");
bld.append("Recent opcode offsets: (\\d+\\s*){4}$");
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES)
Added: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java?rev=1311394&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java (added)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java Mon Apr 9 19:39:58 2012
@@ -0,0 +1,305 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.junit.Test;
+
+import com.google.common.collect.Sets;
+
+/**
+ * This tests data recovery mode for the NameNode.
+ */
+public class TestNameNodeRecovery {
+ private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class);
+ private static StartupOption recoverStartOpt = StartupOption.RECOVER;
+
+ static {
+ recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL);
+ }
+
+ static void runEditLogTest(EditLogTestSetup elts) throws IOException {
+ final String TEST_LOG_NAME = "test_edit_log";
+ final OpInstanceCache cache = new OpInstanceCache();
+
+ EditLogFileOutputStream elfos = null;
+ File file = null;
+ EditLogFileInputStream elfis = null;
+ try {
+ file = new File(TEST_LOG_NAME);
+ elfos = new EditLogFileOutputStream(file, 0);
+ elfos.create();
+
+ elts.addTransactionsToLog(elfos, cache);
+ elfos.setReadyToFlush();
+ elfos.flushAndSync();
+ elfos.close();
+ elfos = null;
+ file = new File(TEST_LOG_NAME);
+ elfis = new EditLogFileInputStream(file);
+
+ // reading through normally will get you an exception
+ Set<Long> validTxIds = elts.getValidTxIds();
+ FSEditLogOp op = null;
+ long prevTxId = 0;
+ try {
+ while (true) {
+ op = elfis.nextOp();
+ if (op == null) {
+ break;
+ }
+ LOG.debug("read txid " + op.txid);
+ if (!validTxIds.contains(op.getTransactionId())) {
+ fail("read txid " + op.getTransactionId() +
+ ", which we did not expect to find.");
+ }
+ validTxIds.remove(op.getTransactionId());
+ prevTxId = op.getTransactionId();
+ }
+ if (elts.getLastValidTxId() != -1) {
+ fail("failed to throw IoException as expected");
+ }
+ } catch (IOException e) {
+ if (elts.getLastValidTxId() == -1) {
+ fail("expected all transactions to be valid, but got exception " +
+ "on txid " + prevTxId);
+ } else {
+ assertEquals(prevTxId, elts.getLastValidTxId());
+ }
+ }
+
+ if (elts.getLastValidTxId() != -1) {
+ // let's skip over the bad transaction
+ op = null;
+ prevTxId = 0;
+ try {
+ while (true) {
+ op = elfis.nextValidOp();
+ if (op == null) {
+ break;
+ }
+ prevTxId = op.getTransactionId();
+ assertTrue(validTxIds.remove(op.getTransactionId()));
+ }
+ } catch (Throwable e) {
+ fail("caught IOException while trying to skip over bad " +
+ "transaction. message was " + e.getMessage() +
+ "\nstack trace\n" + StringUtils.stringifyException(e));
+ }
+ }
+ // We should have read every valid transaction.
+ assertTrue(validTxIds.isEmpty());
+ } finally {
+ IOUtils.cleanup(LOG, elfos, elfis);
+ }
+ }
+
+ private interface EditLogTestSetup {
+ /**
+ * Set up the edit log.
+ */
+ abstract public void addTransactionsToLog(EditLogOutputStream elos,
+ OpInstanceCache cache) throws IOException;
+
+ /**
+ * Get the transaction ID right before the transaction which causes the
+ * normal edit log loading process to bail out-- or -1 if the first
+ * transaction should be bad.
+ */
+ abstract public long getLastValidTxId();
+
+ /**
+ * Get the transaction IDs which should exist and be valid in this
+ * edit log.
+ **/
+ abstract public Set<Long> getValidTxIds();
+ }
+
+ private class EltsTestEmptyLog implements EditLogTestSetup {
+ public void addTransactionsToLog(EditLogOutputStream elos,
+ OpInstanceCache cache) throws IOException {
+ // do nothing
+ }
+
+ public long getLastValidTxId() {
+ return -1;
+ }
+
+ public Set<Long> getValidTxIds() {
+ return new HashSet<Long>();
+ }
+ }
+
+ /** Test an empty edit log */
+ @Test(timeout=180000)
+ public void testEmptyLog() throws IOException {
+ runEditLogTest(new EltsTestEmptyLog());
+ }
+
+ private class EltsTestGarbageInEditLog implements EditLogTestSetup {
+ final private long BAD_TXID = 4;
+ final private long MAX_TXID = 10;
+
+ public void addTransactionsToLog(EditLogOutputStream elos,
+ OpInstanceCache cache) throws IOException {
+ for (long txid = 1; txid <= MAX_TXID; txid++) {
+ if (txid == BAD_TXID) {
+ byte garbage[] = { 0x1, 0x2, 0x3 };
+ elos.writeRaw(garbage, 0, garbage.length);
+ }
+ else {
+ DeleteOp op;
+ op = DeleteOp.getInstance(cache);
+ op.setTransactionId(txid);
+ op.setPath("/foo." + txid);
+ op.setTimestamp(txid);
+ elos.write(op);
+ }
+ }
+ }
+
+ public long getLastValidTxId() {
+ return BAD_TXID - 1;
+ }
+
+ public Set<Long> getValidTxIds() {
+ return Sets.newHashSet(1L , 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L);
+ }
+ }
+
+ /** Test that we can successfully recover from a situation where there is
+ * garbage in the middle of the edit log file output stream. */
+ @Test(timeout=180000)
+ public void testSkipEdit() throws IOException {
+ runEditLogTest(new EltsTestGarbageInEditLog());
+ }
+
+ /** Test that we can successfully recover from a situation where the last
+ * entry in the edit log has been truncated. */
+ @Test(timeout=180000)
+ public void testRecoverTruncatedEditLog() throws IOException {
+ final String TEST_PATH = "/test/path/dir";
+ final int NUM_TEST_MKDIRS = 10;
+
+ // start a cluster
+ Configuration conf = new HdfsConfiguration();
+ MiniDFSCluster cluster = null;
+ FileSystem fileSys = null;
+ StorageDirectory sd = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .build();
+ cluster.waitActive();
+ fileSys = cluster.getFileSystem();
+ final FSNamesystem namesystem = cluster.getNamesystem();
+ FSImage fsimage = namesystem.getFSImage();
+ for (int i = 0; i < NUM_TEST_MKDIRS; i++) {
+ fileSys.mkdirs(new Path(TEST_PATH));
+ }
+ sd = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS).next();
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
+ File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile();
+ assertTrue("Should exist: " + editFile, editFile.exists());
+
+ // Corrupt the last edit
+ long fileLen = editFile.length();
+ RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
+ rwf.setLength(fileLen - 1);
+ rwf.close();
+
+ // Make sure that we can't start the cluster normally before recovery
+ cluster = null;
+ try {
+ LOG.debug("trying to start normally (this should fail)...");
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .format(false).build();
+ cluster.waitActive();
+ cluster.shutdown();
+ fail("expected the truncated edit log to prevent normal startup");
+ } catch (IOException e) {
+ // success
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
+ // Perform recovery
+ cluster = null;
+ try {
+ LOG.debug("running recovery...");
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .format(false).startupOption(recoverStartOpt).build();
+ } catch (IOException e) {
+ fail("caught IOException while trying to recover. " +
+ "message was " + e.getMessage() +
+ "\nstack trace\n" + StringUtils.stringifyException(e));
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
+ // Make sure that we can start the cluster normally after recovery
+ cluster = null;
+ try {
+ LOG.debug("starting cluster normally after recovery...");
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+ .format(false).build();
+ LOG.debug("testRecoverTruncatedEditLog: successfully recovered the " +
+ "truncated edit log");
+ assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH)));
+ } catch (IOException e) {
+ fail("failed to recover. Error message: " + e.getMessage());
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+}
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java?rev=1311394&r1=1311393&r2=1311394&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java Mon Apr 9 19:39:58 2012
@@ -143,9 +143,9 @@ public class TestSecurityTokenEditLog ex
File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 1 + expectedTransactions - 1);
System.out.println("Verifying file: " + editFile);
- FSEditLogLoader loader = new FSEditLogLoader(namesystem);
+ FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);
long numEdits = loader.loadFSEdits(
- new EditLogFileInputStream(editFile), 1);
+ new EditLogFileInputStream(editFile), 1, null);
assertEquals("Verification for " + editFile, expectedTransactions, numEdits);
}
} finally {