You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/04/11 07:16:07 UTC
svn commit: r1324558 - in
/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: CHANGES.txt
src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
Author: todd
Date: Wed Apr 11 05:16:06 2012
New Revision: 1324558
URL: http://svn.apache.org/viewvc?rev=1324558&view=rev
Log:
HDFS-3247. Improve bootstrapStandby behavior when original NN is not active. Contributed by Todd Lipcon.
Modified:
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1324558&r1=1324557&r2=1324558&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Apr 11 05:16:06 2012
@@ -362,6 +362,9 @@ Release 2.0.0 - UNRELEASED
HDFS-3244. Remove dead writable code from hdfs/protocol. (eli)
+ HDFS-3247. Improve bootstrapStandby behavior when original NN is not active
+ (todd)
+
OPTIMIZATIONS
HDFS-3024. Improve performance of stringification in addStoredBlock (todd)
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java?rev=1324558&r1=1324557&r2=1324558&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java Wed Apr 11 05:16:06 2012
@@ -33,10 +33,14 @@ import org.apache.hadoop.HadoopIllegalAr
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.HAServiceStatus;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.NameNodeProxies;
-import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature;
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
@@ -47,8 +51,10 @@ import org.apache.hadoop.hdfs.server.nam
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.tools.NNHAServiceTarget;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Tool;
@@ -65,7 +71,7 @@ import com.google.common.collect.Sets;
*/
@InterfaceAudience.Private
public class BootstrapStandby implements Tool, Configurable {
- private static final Log LOG = LogFactory.getLog(BootstrapStandby.class);
+ private static final Log LOG = LogFactory.getLog(BootstrapStandby.class);
private String nsId;
private String nnId;
private String otherNNId;
@@ -79,7 +85,13 @@ public class BootstrapStandby implements
private boolean force = false;
private boolean interactive = true;
-
+
+ // Exit/return codes.
+ static final int ERR_CODE_FAILED_CONNECT = 2;
+ static final int ERR_CODE_INVALID_VERSION = 3;
+ static final int ERR_CODE_OTHER_NN_NOT_ACTIVE = 4;
+ static final int ERR_CODE_ALREADY_FORMATTED = 5;
+ static final int ERR_CODE_LOGS_UNAVAILABLE = 6;
public int run(String[] args) throws Exception {
SecurityUtil.initKrb5CipherSuites();
@@ -121,24 +133,43 @@ public class BootstrapStandby implements
System.err.println("Usage: " + this.getClass().getSimpleName() +
"[-force] [-nonInteractive]");
}
+
+ private NamenodeProtocol createNNProtocolProxy()
+ throws IOException {
+ return NameNodeProxies.createNonHAProxy(getConf(),
+ otherIpcAddr, NamenodeProtocol.class,
+ UserGroupInformation.getLoginUser(), true)
+ .getProxy();
+ }
+
+ private HAServiceProtocol createHAProtocolProxy()
+ throws IOException {
+ return new NNHAServiceTarget(new HdfsConfiguration(conf),
+ nsId, otherNNId).getProxy(conf, 15000);
+ }
private int doRun() throws IOException {
- ProxyAndInfo<NamenodeProtocol> proxyAndInfo = NameNodeProxies.createNonHAProxy(getConf(),
- otherIpcAddr, NamenodeProtocol.class,
- UserGroupInformation.getLoginUser(), true);
- NamenodeProtocol proxy = proxyAndInfo.getProxy();
+
+ NamenodeProtocol proxy = createNNProtocolProxy();
NamespaceInfo nsInfo;
try {
nsInfo = proxy.versionRequest();
- checkLayoutVersion(nsInfo);
} catch (IOException ioe) {
LOG.fatal("Unable to fetch namespace information from active NN at " +
otherIpcAddr + ": " + ioe.getMessage());
if (LOG.isDebugEnabled()) {
LOG.debug("Full exception trace", ioe);
}
- return 1;
+ return ERR_CODE_FAILED_CONNECT;
}
+
+ if (!checkLayoutVersion(nsInfo)) {
+ LOG.fatal("Layout version on remote node (" +
+ nsInfo.getLayoutVersion() + ") does not match " +
+ "this node's layout version (" + HdfsConstants.LAYOUT_VERSION + ")");
+ return ERR_CODE_INVALID_VERSION;
+ }
+
System.out.println(
"=====================================================\n" +
@@ -153,12 +184,35 @@ public class BootstrapStandby implements
" Layout version: " + nsInfo.getLayoutVersion() + "\n" +
"=====================================================");
+ // Ensure the other NN is active - we can't force it to roll edit logs
+ // below if it's not active.
+ if (!isOtherNNActive()) {
+ String err = "NameNode " + nsId + "." + nnId + " at " + otherIpcAddr +
+ " is not currently in ACTIVE state.";
+ if (!interactive) {
+ LOG.fatal(err + " Please transition it to " +
+ "active before attempting to bootstrap a standby node.");
+ return ERR_CODE_OTHER_NN_NOT_ACTIVE;
+ }
+
+ System.err.println(err);
+ if (ToolRunner.confirmPrompt(
+ "Do you want to automatically transition it to active now?")) {
+ transitionOtherNNActive();
+ } else {
+ LOG.fatal("User aborted. Exiting without bootstrapping standby.");
+ return ERR_CODE_OTHER_NN_NOT_ACTIVE;
+ }
+ }
+
+
+
// Check with the user before blowing away data.
if (!NameNode.confirmFormat(
Sets.union(Sets.newHashSet(dirsToFormat),
Sets.newHashSet(editUrisToFormat)),
force, interactive)) {
- return 1;
+ return ERR_CODE_ALREADY_FORMATTED;
}
// Force the active to roll its log
@@ -180,7 +234,7 @@ public class BootstrapStandby implements
// Ensure that we have enough edits already in the shared directory to
// start up from the last checkpoint on the active.
if (!checkLogsAvailableForRead(image, imageTxId, rollTxId)) {
- return 1;
+ return ERR_CODE_LOGS_UNAVAILABLE;
}
image.getStorage().writeTransactionIdFileToStorage(rollTxId);
@@ -193,6 +247,14 @@ public class BootstrapStandby implements
return 0;
}
+
+ private void transitionOtherNNActive()
+ throws AccessControlException, ServiceFailedException, IOException {
+ LOG.info("Transitioning the running namenode to active...");
+ createHAProtocolProxy().transitionToActive();
+ LOG.info("Successful");
+ }
+
private boolean checkLogsAvailableForRead(FSImage image, long imageTxId,
long rollTxId) {
@@ -225,12 +287,14 @@ public class BootstrapStandby implements
}
}
- private void checkLayoutVersion(NamespaceInfo nsInfo) throws IOException {
- if (nsInfo.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
- throw new IOException("Layout version on remote node (" +
- nsInfo.getLayoutVersion() + ") does not match " +
- "this node's layout version (" + HdfsConstants.LAYOUT_VERSION + ")");
- }
+ private boolean checkLayoutVersion(NamespaceInfo nsInfo) throws IOException {
+ return (nsInfo.getLayoutVersion() == HdfsConstants.LAYOUT_VERSION);
+ }
+
+ private boolean isOtherNNActive()
+ throws AccessControlException, IOException {
+ HAServiceStatus status = createHAProtocolProxy().getServiceStatus();
+ return status.getState() == HAServiceState.ACTIVE;
}
private void parseConfAndFindOtherNN() throws IOException {
Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java?rev=1324558&r1=1324557&r2=1324558&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java Wed Apr 11 05:16:06 2012
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.namenode.ha;
+import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.URI;
@@ -40,6 +41,7 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
@@ -170,7 +172,7 @@ public class TestBootstrapStandby {
int rc = BootstrapStandby.run(
new String[]{"-force"},
cluster.getConfiguration(1));
- assertEquals(1, rc);
+ assertEquals(BootstrapStandby.ERR_CODE_LOGS_UNAVAILABLE, rc);
} finally {
logs.stopCapturing();
}
@@ -184,7 +186,7 @@ public class TestBootstrapStandby {
int rc = BootstrapStandby.run(
new String[]{"-nonInteractive"},
cluster.getConfiguration(1));
- assertEquals(1, rc);
+ assertEquals(BootstrapStandby.ERR_CODE_ALREADY_FORMATTED, rc);
// Should pass with -force
rc = BootstrapStandby.run(
@@ -192,6 +194,24 @@ public class TestBootstrapStandby {
cluster.getConfiguration(1));
assertEquals(0, rc);
}
+
+ @Test(timeout=30000)
+ public void testOtherNodeNotActive() throws Exception {
+ cluster.transitionToStandby(0);
+ int rc = BootstrapStandby.run(
+ new String[]{"-nonInteractive"},
+ cluster.getConfiguration(1));
+ assertEquals(BootstrapStandby.ERR_CODE_OTHER_NN_NOT_ACTIVE, rc);
+
+ // Answer "yes" to the prompt about transition to active
+ System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
+ rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ assertFalse(nn0.getNamesystem().isInStandbyState());
+ }
private void assertNNFilesMatch() throws Exception {
List<File> curDirs = Lists.newArrayList();