You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2015/09/02 20:11:40 UTC
hadoop git commit: HDFS-8127. NameNode Failover during HA upgrade can
cause DataNode to finalize upgrade. Contributed by Jing Zhao.
Repository: hadoop
Updated Branches:
refs/heads/branch-2.6.1 dcc0d4658 -> b7e789641
HDFS-8127. NameNode Failover during HA upgrade can cause DataNode to finalize upgrade. Contributed by Jing Zhao.
(cherry picked from commit fddd55279d0bdd08b3b40aba6fe2ded1d2e0d846)
(cherry picked from commit 38b031d6bab8527698bd186887d301bd6a63cf01)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
(cherry picked from commit b4e227e65d3a81150f46aac5e987d759defc1452)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b7e78964
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b7e78964
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b7e78964
Branch: refs/heads/branch-2.6.1
Commit: b7e7896410bdd8aac5a8b9f9327227542e730ece
Parents: dcc0d46
Author: Jing Zhao <ji...@apache.org>
Authored: Tue Apr 14 15:05:23 2015 -0700
Committer: Vinod Kumar Vavilapalli <vi...@apache.org>
Committed: Wed Sep 2 11:06:04 2015 -0700
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
.../NamenodeProtocolServerSideTranslatorPB.java | 17 +++
.../NamenodeProtocolTranslatorPB.java | 16 +++
.../hadoop/hdfs/server/namenode/FSImage.java | 15 +-
.../hdfs/server/namenode/FSNamesystem.java | 4 +-
.../hadoop/hdfs/server/namenode/NNStorage.java | 2 +-
.../hdfs/server/namenode/NNUpgradeUtil.java | 52 ++++---
.../hdfs/server/namenode/NameNodeRpcServer.java | 7 +
.../server/namenode/ha/BootstrapStandby.java | 141 ++++++++++++++++---
.../hdfs/server/protocol/NamenodeProtocol.java | 7 +
.../src/main/proto/NamenodeProtocol.proto | 16 +++
.../ha/TestBootstrapStandbyWithQJM.java | 89 +++++++++++-
.../namenode/ha/TestDFSUpgradeWithHA.java | 5 +-
13 files changed, 322 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 231cc8e..c642f28 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -120,6 +120,9 @@ Release 2.6.1 - UNRELEASED
HDFS-8072. Reserved RBW space is not released if client terminates while
writing block. (Arpit Agarwal)
+ HDFS-8127. NameNode Failover during HA upgrade can cause DataNode to
+ finalize upgrade. (jing9)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java
index 284808e..dbacded 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionResponseProto;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.EndCheckpointRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.EndCheckpointResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.ErrorReportRequestProto;
@@ -36,6 +37,8 @@ import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecen
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdResponseProto;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsUpgradeFinalizedRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsUpgradeFinalizedResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RegisterRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RegisterResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RollEditLogRequestProto;
@@ -222,4 +225,18 @@ public class NamenodeProtocolServerSideTranslatorPB implements
return VersionResponseProto.newBuilder()
.setInfo(PBHelper.convert(info)).build();
}
+
+ @Override
+ public IsUpgradeFinalizedResponseProto isUpgradeFinalized(
+ RpcController controller, IsUpgradeFinalizedRequestProto request)
+ throws ServiceException {
+ boolean isUpgradeFinalized;
+ try {
+ isUpgradeFinalized = impl.isUpgradeFinalized();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ return IsUpgradeFinalizedResponseProto.newBuilder()
+ .setIsUpgradeFinalized(isUpgradeFinalized).build();
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
index 98e99f0..82c5c4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeCommandProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.EndCheckpointRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.ErrorReportRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlockKeysRequestProto;
@@ -34,6 +35,8 @@ import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlocksReq
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetEditLogManifestRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetMostRecentCheckpointTxIdRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetTransactionIdRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsUpgradeFinalizedRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.IsUpgradeFinalizedResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RegisterRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.RollEditLogRequestProto;
import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.StartCheckpointRequestProto;
@@ -232,4 +235,17 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
RPC.RpcKind.RPC_PROTOCOL_BUFFER,
RPC.getProtocolVersion(NamenodeProtocolPB.class), methodName);
}
+
+ @Override
+ public boolean isUpgradeFinalized() throws IOException {
+ IsUpgradeFinalizedRequestProto req = IsUpgradeFinalizedRequestProto
+ .newBuilder().build();
+ try {
+ IsUpgradeFinalizedResponseProto response = rpcProxy.isUpgradeFinalized(
+ NULL_CONTROLLER, req);
+ return response.getIsUpgradeFinalized();
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 51efb51..d464e99 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -199,7 +199,7 @@ public class FSImage implements Closeable {
// check whether all is consistent before transitioning.
Map<StorageDirectory, StorageState> dataDirStates =
new HashMap<StorageDirectory, StorageState>();
- boolean isFormatted = recoverStorageDirs(startOpt, dataDirStates);
+ boolean isFormatted = recoverStorageDirs(startOpt, storage, dataDirStates);
if (LOG.isTraceEnabled()) {
LOG.trace("Data dir states:\n " +
@@ -288,8 +288,9 @@ public class FSImage implements Closeable {
* @param dataDirStates output of storage directory states
* @return true if there is at least one valid formatted storage directory
*/
- private boolean recoverStorageDirs(StartupOption startOpt,
- Map<StorageDirectory, StorageState> dataDirStates) throws IOException {
+ public static boolean recoverStorageDirs(StartupOption startOpt,
+ NNStorage storage, Map<StorageDirectory, StorageState> dataDirStates)
+ throws IOException {
boolean isFormatted = false;
// This loop needs to be over all storage dirs, even shared dirs, to make
// sure that we properly examine their state, but we make sure we don't
@@ -339,7 +340,7 @@ public class FSImage implements Closeable {
}
/** Check if upgrade is in progress. */
- void checkUpgrade(FSNamesystem target) throws IOException {
+ public static void checkUpgrade(NNStorage storage) throws IOException {
// Upgrade or rolling upgrade is allowed only if there are
// no previous fs states in any of the directories
for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
@@ -351,6 +352,10 @@ public class FSImage implements Closeable {
}
}
+ void checkUpgrade() throws IOException {
+ checkUpgrade(storage);
+ }
+
/**
* @return true if there is rollback fsimage (for rolling upgrade) in NameNode
* directory.
@@ -368,7 +373,7 @@ public class FSImage implements Closeable {
}
void doUpgrade(FSNamesystem target) throws IOException {
- checkUpgrade(target);
+ checkUpgrade();
// load the latest image
this.loadFSImage(target, StartupOption.UPGRADE, null);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index fa52981..5f396f7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -8365,7 +8365,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void startRollingUpgradeInternal(long startTime)
throws IOException {
checkRollingUpgrade("start rolling upgrade");
- getFSImage().checkUpgrade(this);
+ getFSImage().checkUpgrade();
setRollingUpgradeInfo(false, startTime);
}
@@ -8382,7 +8382,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
+ "in order to create namespace image.");
}
checkRollingUpgrade("start rolling upgrade");
- getFSImage().checkUpgrade(this);
+ getFSImage().checkUpgrade();
// in non-HA setup, we do an extra checkpoint to generate a rollback image
getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
LOG.info("Successfully saved namespace for preparing rolling upgrade.");
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
index cb5252e..dbb2c50 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
@@ -975,7 +975,7 @@ public class NNStorage extends Storage implements Closeable,
}
/** Validate and set block pool ID */
- void setBlockPoolID(String bpid) {
+ public void setBlockPoolID(String bpid) {
blockpoolID = bpid;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
index c63da20..c01b11d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
@@ -31,7 +31,7 @@ import org.apache.hadoop.hdfs.server.common.StorageInfo;
import com.google.common.base.Preconditions;
-abstract class NNUpgradeUtil {
+public abstract class NNUpgradeUtil {
private static final Log LOG = LogFactory.getLog(NNUpgradeUtil.class);
@@ -110,25 +110,12 @@ abstract class NNUpgradeUtil {
static void doPreUpgrade(Configuration conf, StorageDirectory sd)
throws IOException {
LOG.info("Starting upgrade of storage directory " + sd.getRoot());
- File curDir = sd.getCurrentDir();
- File prevDir = sd.getPreviousDir();
- final File tmpDir = sd.getPreviousTmp();
-
- Preconditions.checkState(curDir.exists(),
- "Current directory must exist for preupgrade.");
- Preconditions.checkState(!prevDir.exists(),
- "Previous directory must not exist for preupgrade.");
- Preconditions.checkState(!tmpDir.exists(),
- "Previous.tmp directory must not exist for preupgrade."
- + "Consider restarting for recovery.");
// rename current to tmp
- NNStorage.rename(curDir, tmpDir);
-
- if (!curDir.mkdir()) {
- throw new IOException("Cannot create directory " + curDir);
- }
+ renameCurToTmp(sd);
+ final File curDir = sd.getCurrentDir();
+ final File tmpDir = sd.getPreviousTmp();
String[] fileNameList = tmpDir.list(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
@@ -162,6 +149,31 @@ abstract class NNUpgradeUtil {
in.close();
}
}
+
+ /**
+ * Rename the existing current dir to previous.tmp, and create a new empty
+ * current dir.
+ */
+ public static void renameCurToTmp(StorageDirectory sd) throws IOException {
+ File curDir = sd.getCurrentDir();
+ File prevDir = sd.getPreviousDir();
+ final File tmpDir = sd.getPreviousTmp();
+
+ Preconditions.checkState(curDir.exists(),
+ "Current directory must exist for preupgrade.");
+ Preconditions.checkState(!prevDir.exists(),
+ "Previous directory must not exist for preupgrade.");
+ Preconditions.checkState(!tmpDir.exists(),
+ "Previous.tmp directory must not exist for preupgrade."
+ + "Consider restarting for recovery.");
+
+ // rename current to tmp
+ NNStorage.rename(curDir, tmpDir);
+
+ if (!curDir.mkdir()) {
+ throw new IOException("Cannot create directory " + curDir);
+ }
+ }
/**
* Perform the upgrade of the storage dir to the given storage info. The new
@@ -172,14 +184,14 @@ abstract class NNUpgradeUtil {
* @param storage info about the new upgraded versions.
* @throws IOException in the event of error
*/
- static void doUpgrade(StorageDirectory sd, Storage storage) throws
- IOException {
+ public static void doUpgrade(StorageDirectory sd, Storage storage)
+ throws IOException {
LOG.info("Performing upgrade of storage directory " + sd.getRoot());
try {
// Write the version file, since saveFsImage only makes the
// fsimage_<txid>, and the directory is otherwise empty.
storage.writeProperties(sd);
-
+
File prevDir = sd.getPreviousDir();
File tmpDir = sd.getPreviousTmp();
Preconditions.checkState(!prevDir.exists(),
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 2d40696..d7d65db 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -991,6 +991,13 @@ class NameNodeRpcServer implements NamenodeProtocols {
namesystem.checkSuperuserPrivilege();
return namesystem.getEditLog().getEditLogManifest(sinceTxId);
}
+
+ @Override // NamenodeProtocol
+ public boolean isUpgradeFinalized() throws IOException {
+ checkNNStartup();
+ namesystem.checkSuperuserPrivilege();
+ return namesystem.isUpgradeFinalized();
+ }
@Override // ClientProtocol
public void finalizeUpgrade() throws IOException {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
index 43b8fce..0134d2e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
@@ -26,7 +26,10 @@ import java.net.URI;
import java.net.URL;
import java.security.PrivilegedAction;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -38,13 +41,17 @@ import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageState;
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
+import org.apache.hadoop.hdfs.server.namenode.NNUpgradeUtil;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
@@ -142,11 +149,12 @@ public class BootstrapStandby implements Tool, Configurable {
}
private int doRun() throws IOException {
-
NamenodeProtocol proxy = createNNProtocolProxy();
NamespaceInfo nsInfo;
+ boolean isUpgradeFinalized;
try {
nsInfo = proxy.versionRequest();
+ isUpgradeFinalized = proxy.isUpgradeFinalized();
} catch (IOException ioe) {
LOG.fatal("Unable to fetch namespace information from active NN at " +
otherIpcAddr + ": " + ioe.getMessage());
@@ -163,7 +171,6 @@ public class BootstrapStandby implements Tool, Configurable {
return ERR_CODE_INVALID_VERSION;
}
-
System.out.println(
"=====================================================\n" +
"About to bootstrap Standby ID " + nnId + " from:\n" +
@@ -175,35 +182,133 @@ public class BootstrapStandby implements Tool, Configurable {
" Block pool ID: " + nsInfo.getBlockPoolID() + "\n" +
" Cluster ID: " + nsInfo.getClusterID() + "\n" +
" Layout version: " + nsInfo.getLayoutVersion() + "\n" +
+ " isUpgradeFinalized: " + isUpgradeFinalized + "\n" +
"=====================================================");
-
- long imageTxId = proxy.getMostRecentCheckpointTxId();
- long curTxId = proxy.getTransactionID();
NNStorage storage = new NNStorage(conf, dirsToFormat, editUrisToFormat);
-
+
+ if (!isUpgradeFinalized) {
+ // the remote NameNode is in upgrade state, this NameNode should also
+ // create the previous directory. First prepare the upgrade and rename
+ // the current dir to previous.tmp.
+ LOG.info("The active NameNode is in Upgrade. " +
+ "Prepare the upgrade for the standby NameNode as well.");
+ if (!doPreUpgrade(storage, nsInfo)) {
+ return ERR_CODE_ALREADY_FORMATTED;
+ }
+ } else if (!format(storage, nsInfo)) { // prompt the user to format storage
+ return ERR_CODE_ALREADY_FORMATTED;
+ }
+
+ // download the fsimage from active namenode
+ int download = downloadImage(storage, proxy);
+ if (download != 0) {
+ return download;
+ }
+
+ // finish the upgrade: rename previous.tmp to previous
+ if (!isUpgradeFinalized) {
+ doUpgrade(storage);
+ }
+ return 0;
+ }
+
+ /**
+ * Iterate over all the storage directories, checking if it should be
+ * formatted. Format the storage if necessary and allowed by the user.
+ * @return True if formatting is processed
+ */
+ private boolean format(NNStorage storage, NamespaceInfo nsInfo)
+ throws IOException {
// Check with the user before blowing away data.
- if (!Storage.confirmFormat(storage.dirIterable(null),
- force, interactive)) {
+ if (!Storage.confirmFormat(storage.dirIterable(null), force, interactive)) {
storage.close();
- return ERR_CODE_ALREADY_FORMATTED;
+ return false;
+ } else {
+ // Format the storage (writes VERSION file)
+ storage.format(nsInfo);
+ return true;
+ }
+ }
+
+ /**
+ * This is called when using bootstrapStandby for HA upgrade. The SBN should
+ * also create previous directory so that later when it starts, it understands
+ * that the cluster is in the upgrade state. This function renames the old
+ * current directory to previous.tmp.
+ */
+ private boolean doPreUpgrade(NNStorage storage, NamespaceInfo nsInfo)
+ throws IOException {
+ boolean isFormatted = false;
+ Map<StorageDirectory, StorageState> dataDirStates =
+ new HashMap<StorageDirectory, StorageState>();
+ try {
+ isFormatted = FSImage.recoverStorageDirs(StartupOption.UPGRADE, storage,
+ dataDirStates);
+ if (dataDirStates.values().contains(StorageState.NOT_FORMATTED)) {
+ // recoverStorageDirs returns true if there is a formatted directory
+ isFormatted = false;
+ System.err.println("The original storage directory is not formatted.");
+ }
+ } catch (InconsistentFSStateException e) {
+ // if the storage is in a bad state,
+ LOG.warn("The storage directory is in an inconsistent state", e);
+ } finally {
+ storage.unlockAll();
}
-
- // Format the storage (writes VERSION file)
- storage.format(nsInfo);
- // Load the newly formatted image, using all of the directories (including shared
- // edits)
+ // if there is InconsistentFSStateException or the storage is not formatted,
+ // format the storage. Although this format is done through the new
+ // software, since in HA setup the SBN is rolled back through
+ // "-bootstrapStandby", we should still be fine.
+ if (!isFormatted && !format(storage, nsInfo)) {
+ return false;
+ }
+
+ // make sure there is no previous directory
+ FSImage.checkUpgrade(storage);
+ // Do preUpgrade for each directory
+ for (Iterator<StorageDirectory> it = storage.dirIterator(false);
+ it.hasNext();) {
+ StorageDirectory sd = it.next();
+ try {
+ NNUpgradeUtil.renameCurToTmp(sd);
+ } catch (IOException e) {
+ LOG.error("Failed to move aside pre-upgrade storage " +
+ "in image directory " + sd.getRoot(), e);
+ throw e;
+ }
+ }
+ storage.setStorageInfo(nsInfo);
+ storage.setBlockPoolID(nsInfo.getBlockPoolID());
+ return true;
+ }
+
+ private void doUpgrade(NNStorage storage) throws IOException {
+ for (Iterator<StorageDirectory> it = storage.dirIterator(false);
+ it.hasNext();) {
+ StorageDirectory sd = it.next();
+ NNUpgradeUtil.doUpgrade(sd, storage);
+ }
+ }
+
+ private int downloadImage(NNStorage storage, NamenodeProtocol proxy)
+ throws IOException {
+ // Load the newly formatted image, using all of the directories
+ // (including shared edits)
+ final long imageTxId = proxy.getMostRecentCheckpointTxId();
+ final long curTxId = proxy.getTransactionID();
FSImage image = new FSImage(conf);
try {
image.getStorage().setStorageInfo(storage);
image.initEditLog(StartupOption.REGULAR);
assert image.getEditLog().isOpenForRead() :
- "Expected edit log to be open for read";
+ "Expected edit log to be open for read";
// Ensure that we have enough edits already in the shared directory to
// start up from the last checkpoint on the active.
- if (!skipSharedEditsCheck && !checkLogsAvailableForRead(image, imageTxId, curTxId)) {
+ if (!skipSharedEditsCheck &&
+ !checkLogsAvailableForRead(image, imageTxId, curTxId)) {
return ERR_CODE_LOGS_UNAVAILABLE;
}
@@ -211,7 +316,7 @@ public class BootstrapStandby implements Tool, Configurable {
// Download that checkpoint into our storage directories.
MD5Hash hash = TransferFsImage.downloadImageToStorage(
- otherHttpAddr, imageTxId, storage, true);
+ otherHttpAddr, imageTxId, storage, true);
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
hash);
} catch (IOException ioe) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
index 91f1e77..4048372 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
@@ -186,5 +186,12 @@ public interface NamenodeProtocol {
@Idempotent
public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
throws IOException;
+
+ /**
+ * @return Whether the NameNode is in upgrade state (false) or not (true)
+ */
+ @Idempotent
+ public boolean isUpgradeFinalized() throws IOException;
+
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto
index 4bcbeed..f7c1312 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto
@@ -182,6 +182,16 @@ message GetEditLogManifestResponseProto {
}
/**
+ * void request
+ */
+message IsUpgradeFinalizedRequestProto {
+}
+
+message IsUpgradeFinalizedResponseProto {
+ required bool isUpgradeFinalized = 1;
+}
+
+/**
* Protocol used by the sub-ordinate namenode to send requests
* the active/primary namenode.
*
@@ -250,4 +260,10 @@ service NamenodeProtocolService {
*/
rpc getEditLogManifest(GetEditLogManifestRequestProto)
returns(GetEditLogManifestResponseProto);
+
+ /**
+ * Return whether the NameNode is in upgrade state (false) or not (true)
+ */
+ rpc isUpgradeFinalized(IsUpgradeFinalizedRequestProto)
+ returns (IsUpgradeFinalizedResponseProto);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandbyWithQJM.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandbyWithQJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandbyWithQJM.java
index c7756ca..ca8f563 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandbyWithQJM.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandbyWithQJM.java
@@ -18,7 +18,9 @@
package org.apache.hadoop.hdfs.server.namenode.ha;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
@@ -28,18 +30,26 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster;
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
+import org.mockito.internal.util.reflection.Whitebox;
/**
* Test BootstrapStandby when QJM is used for shared edits.
*/
-public class TestBootstrapStandbyWithQJM {
- private MiniQJMHACluster miniQjmHaCluster;
+public class TestBootstrapStandbyWithQJM {
+ enum UpgradeState {
+ NORMAL,
+ RECOVER,
+ FORMAT
+ }
+
private MiniDFSCluster cluster;
private MiniJournalCluster jCluster;
@@ -52,7 +62,7 @@ public class TestBootstrapStandbyWithQJM {
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
0);
- miniQjmHaCluster = new MiniQJMHACluster.Builder(conf).build();
+ MiniQJMHACluster miniQjmHaCluster = new MiniQJMHACluster.Builder(conf).build();
cluster = miniQjmHaCluster.getDfsCluster();
jCluster = miniQjmHaCluster.getJournalCluster();
@@ -112,4 +122,77 @@ public class TestBootstrapStandbyWithQJM {
ImmutableList.of(0));
FSImageTestUtil.assertNNFilesMatch(cluster);
}
+
+ /**
+ * Test the bootstrapstandby while the other namenode is in upgrade state.
+ * Make sure a previous directory can be created.
+ */
+ @Test
+ public void testUpgrade() throws Exception {
+ testUpgrade(UpgradeState.NORMAL);
+ }
+
+ /**
+ * Similar with testUpgrade, but rename nn1's current directory to
+ * previous.tmp before bootstrapStandby, and make sure the nn1 is recovered
+ * first then converted into upgrade state.
+ */
+ @Test
+ public void testUpgradeWithRecover() throws Exception {
+ testUpgrade(UpgradeState.RECOVER);
+ }
+
+ /**
+ * Similar with testUpgrade, but rename nn1's current directory to a random
+ * name so that it's not formatted. Make sure the nn1 is formatted and then
+ * converted into upgrade state.
+ */
+ @Test
+ public void testUpgradeWithFormat() throws Exception {
+ testUpgrade(UpgradeState.FORMAT);
+ }
+
+ private void testUpgrade(UpgradeState state) throws Exception {
+ cluster.transitionToActive(0);
+ final Configuration confNN1 = cluster.getConfiguration(1);
+
+ final File current = cluster.getNameNode(1).getFSImage().getStorage()
+ .getStorageDir(0).getCurrentDir();
+ final File tmp = cluster.getNameNode(1).getFSImage().getStorage()
+ .getStorageDir(0).getPreviousTmp();
+ // shut down nn1
+ cluster.shutdownNameNode(1);
+
+ // make NN0 in upgrade state
+ FSImage fsImage0 = cluster.getNameNode(0).getNamesystem().getFSImage();
+ Whitebox.setInternalState(fsImage0, "isUpgradeFinalized", false);
+
+ switch (state) {
+ case RECOVER:
+ // rename the current directory to previous.tmp in nn1
+ NNStorage.rename(current, tmp);
+ break;
+ case FORMAT:
+ // rename the current directory to a random name so it's not formatted
+ final File wrongPath = new File(current.getParentFile(), "wrong");
+ NNStorage.rename(current, wrongPath);
+ break;
+ default:
+ break;
+ }
+
+ int rc = BootstrapStandby.run(new String[] { "-force" }, confNN1);
+ assertEquals(0, rc);
+
+ // Should have copied over the namespace from the standby
+ FSImageTestUtil.assertNNHasCheckpoints(cluster, 1,
+ ImmutableList.of(0));
+ FSImageTestUtil.assertNNFilesMatch(cluster);
+
+ // make sure the NN1 is in upgrade state, i.e., the previous directory has
+ // been successfully created
+ cluster.restartNameNode(1);
+ assertFalse(cluster.getNameNode(1).getNamesystem().isUpgradeFinalized());
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/b7e78964/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
index c749431..a77b435 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
@@ -42,7 +42,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster.Builder;
import org.apache.hadoop.hdfs.qjournal.server.Journal;
-import org.apache.hadoop.hdfs.qjournal.server.JournalNode;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -594,7 +593,7 @@ public class TestDFSUpgradeWithHA {
cluster.restartNameNode(1);
checkNnPreviousDirExistence(cluster, 0, true);
- checkNnPreviousDirExistence(cluster, 1, false);
+ checkNnPreviousDirExistence(cluster, 1, true);
checkPreviousDirExistence(sharedDir, true);
assertCTimesEqual(cluster);
@@ -671,7 +670,7 @@ public class TestDFSUpgradeWithHA {
cluster.restartNameNode(1);
checkNnPreviousDirExistence(cluster, 0, true);
- checkNnPreviousDirExistence(cluster, 1, false);
+ checkNnPreviousDirExistence(cluster, 1, true);
checkJnPreviousDirExistence(qjCluster, true);
assertCTimesEqual(cluster);