You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by ma...@apache.org on 2011/06/30 20:38:01 UTC
svn commit: r1141658 - in /hadoop/common/trunk/hdfs: ./
src/java/org/apache/hadoop/hdfs/server/namenode/
src/test/hdfs/org/apache/hadoop/hdfs/
Author: mattf
Date: Thu Jun 30 18:38:01 2011
New Revision: 1141658
URL: http://svn.apache.org/viewvc?rev=1141658&view=rev
Log:
HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826. Contributed by Matt Foley.
Modified:
hadoop/common/trunk/hdfs/CHANGES.txt
hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
hadoop/common/trunk/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSUpgrade.java
Modified: hadoop/common/trunk/hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/CHANGES.txt?rev=1141658&r1=1141657&r2=1141658&view=diff
==============================================================================
--- hadoop/common/trunk/hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hdfs/CHANGES.txt Thu Jun 30 18:38:01 2011
@@ -554,6 +554,9 @@ Trunk (unreleased changes)
BUG FIXES
+ HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.
+ (mattf)
+
HDFS-2061. Two minor bugs in BlockManager block report processing. (mattf)
HDFS-1449. Fix test failures - ExtendedBlock must return
Modified: hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1141658&r1=1141657&r2=1141658&view=diff
==============================================================================
--- hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java (original)
+++ hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java Thu Jun 30 18:38:01 2011
@@ -1228,6 +1228,11 @@ public class FSEditLog implements NNStor
@Override // NNStorageListener
public synchronized void errorOccurred(StorageDirectory sd)
throws IOException {
+ if (editStreams == null) {
+ //errors can occur on storage directories
+ //before edit streams have been set up
+ return;
+ }
ArrayList<EditLogOutputStream> errorStreams
= new ArrayList<EditLogOutputStream>();
Modified: hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1141658&r1=1141657&r2=1141658&view=diff
==============================================================================
--- hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java Thu Jun 30 18:38:01 2011
@@ -397,7 +397,12 @@ public class FSImage implements NNStorag
LOG.info("Upgrade of " + sd.getRoot() + " is complete.");
}
isUpgradeFinalized = false;
- storage.reportErrorsOnDirectories(errorSDs);
+ if (!errorSDs.isEmpty()) {
+ storage.reportErrorsOnDirectories(errorSDs);
+ //during upgrade, it's a fatal error to fail any storage directory
+ throw new IOException("Upgrade failed in " + errorSDs.size()
+ + " storage directory(ies), previously logged.");
+ }
storage.initializeDistributedUpgrade();
editLog.open();
}
Modified: hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java?rev=1141658&r1=1141657&r2=1141658&view=diff
==============================================================================
--- hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java (original)
+++ hadoop/common/trunk/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java Thu Jun 30 18:38:01 2011
@@ -905,7 +905,7 @@ public class NNStorage extends Storage i
*/
void reportErrorsOnDirectory(StorageDirectory sd)
throws IOException {
- LOG.warn("Error reported on storage directory " + sd);
+ LOG.error("Error reported on storage directory " + sd);
String lsd = listStorageDirectories();
LOG.debug("current list of storage dirs:" + lsd);
@@ -914,12 +914,12 @@ public class NNStorage extends Storage i
listener.errorOccurred(sd);
}
- LOG.info("About to remove corresponding storage: "
+ LOG.warn("About to remove corresponding storage: "
+ sd.getRoot().getAbsolutePath());
try {
sd.unlock();
} catch (Exception e) {
- LOG.info("Unable to unlock bad storage directory: "
+ LOG.warn("Unable to unlock bad storage directory: "
+ sd.getRoot().getPath(), e);
}
Modified: hadoop/common/trunk/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSUpgrade.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSUpgrade.java?rev=1141658&r1=1141657&r2=1141658&view=diff
==============================================================================
--- hadoop/common/trunk/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSUpgrade.java (original)
+++ hadoop/common/trunk/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSUpgrade.java Thu Jun 30 18:38:01 2011
@@ -22,18 +22,19 @@ import static org.apache.hadoop.hdfs.ser
import java.io.File;
import java.io.IOException;
-
-import junit.framework.TestCase;
+import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.TestParallelImageWrite;
+import org.apache.hadoop.util.StringUtils;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
import org.junit.Test;
import static org.junit.Assert.*;
@@ -44,8 +45,7 @@ import static org.junit.Assert.*;
*/
public class TestDFSUpgrade {
- private static final Log LOG = LogFactory.getLog(
- "org.apache.hadoop.hdfs.TestDFSUpgrade");
+ private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
private Configuration conf;
private int testCounter = 0;
private MiniDFSCluster cluster = null;
@@ -111,11 +111,27 @@ public class TestDFSUpgrade {
}
}
+
/**
* Attempts to start a NameNode with the given operation. Starting
* the NameNode should throw an exception.
*/
void startNameNodeShouldFail(StartupOption operation) {
+ startNameNodeShouldFail(operation, null, null);
+ }
+
+ /**
+ * Attempts to start a NameNode with the given operation. Starting
+ * the NameNode should throw an exception.
+ * @param operation - NameNode startup operation
+ * @param exceptionClass - if non-null, will check that the caught exception
+ * is assignment-compatible with exceptionClass
+ * @param messagePattern - if non-null, will check that a substring of the
+ * message from the caught exception matches this pattern, via the
+ * {@link Matcher#find()} method.
+ */
+ void startNameNodeShouldFail(StartupOption operation,
+ Class<? extends Exception> exceptionClass, Pattern messagePattern) {
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.startupOption(operation)
@@ -123,9 +139,23 @@ public class TestDFSUpgrade {
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
.build(); // should fail
- throw new AssertionError("NameNode should have failed to start");
- } catch (Exception expected) {
- // expected
+ fail("NameNode should have failed to start");
+
+ } catch (Exception e) {
+ // expect exception
+ if (exceptionClass != null) {
+ assertTrue("Caught exception is not of expected class "
+ + exceptionClass.getSimpleName() + ": "
+ + StringUtils.stringifyException(e),
+ exceptionClass.isInstance(e));
+ }
+ if (messagePattern != null) {
+ assertTrue("Caught exception message string does not match expected pattern \""
+ + messagePattern.pattern() + "\" : "
+ + StringUtils.stringifyException(e),
+ messagePattern.matcher(e.getMessage()).find());
+ }
+ LOG.info("Successfully detected expected NameNode startup failure.");
}
}
@@ -155,6 +185,11 @@ public class TestDFSUpgrade {
.build();
}
+ @BeforeClass
+ public static void initialize() throws Exception {
+ UpgradeUtilities.initialize();
+ }
+
/**
* This test attempts to upgrade the NameNode and DataNode under
* a number of valid and invalid conditions.
@@ -162,8 +197,6 @@ public class TestDFSUpgrade {
@Test
public void testUpgrade() throws Exception {
File[] baseDirs;
- UpgradeUtilities.initialize();
-
StorageInfo storageInfo = null;
for (int numDirs = 1; numDirs <= 2; numDirs++) {
conf = new HdfsConfiguration();
@@ -311,6 +344,30 @@ public class TestDFSUpgrade {
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
}
}
+
+ /*
+ * Stand-alone test to detect failure of one SD during parallel upgrade.
+ * At this time, can only be done with manual hack of {@link FSImage.doUpgrade()}
+ */
+ @Ignore
+ public void testUpgrade4() throws Exception {
+ int numDirs = 4;
+ conf = new HdfsConfiguration();
+ conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
+ conf = UpgradeUtilities.initializeStorageStateConf(numDirs, conf);
+ String[] nameNodeDirs = conf.getStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
+
+ log("NameNode upgrade with one bad storage dir", numDirs);
+ UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
+ try {
+ // assert("storage dir has been prepared for failure before reaching this point");
+ startNameNodeShouldFail(StartupOption.UPGRADE, IOException.class,
+ Pattern.compile("failed in 1 storage"));
+ } finally {
+ // assert("storage dir shall be returned to normal state before exiting");
+ UpgradeUtilities.createEmptyDirs(nameNodeDirs);
+ }
+ }
@Test(expected=IOException.class)
public void testUpgradeFromPreUpgradeLVFails() throws IOException {
@@ -320,6 +377,7 @@ public class TestDFSUpgrade {
fail("Expected IOException is not thrown");
}
+ @Ignore
public void test203LayoutVersion() {
for (int lv : Storage.LAYOUT_VERSIONS_203) {
assertTrue(Storage.is203LayoutVersion(lv));
@@ -327,7 +385,9 @@ public class TestDFSUpgrade {
}
public static void main(String[] args) throws Exception {
- new TestDFSUpgrade().testUpgrade();
+ TestDFSUpgrade t = new TestDFSUpgrade();
+ TestDFSUpgrade.initialize();
+ t.testUpgrade();
}
}