You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/09/18 08:39:51 UTC
svn commit: r576729 - in /lucene/hadoop/trunk: CHANGES.txt
src/java/org/apache/hadoop/dfs/DataNode.java
src/java/org/apache/hadoop/dfs/FSEditLog.java
src/java/org/apache/hadoop/dfs/FSImage.java
src/java/org/apache/hadoop/dfs/FSNamesystem.java
Author: dhruba
Date: Mon Sep 17 23:39:50 2007
New Revision: 576729
URL: http://svn.apache.org/viewvc?rev=576729&view=rev
Log:
HADOOP-1762. The Namenode fsimage does not contain the list of
Datanodes. (Raghu Angadi via dhruba)
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon Sep 17 23:39:50 2007
@@ -83,6 +83,9 @@
BUG FIXES
+ HADOOP-1762. The Namenode fsimage does not contain a list of
+ Datanodes. (Raghu Angadi via dhruba)
+
HADOOP-1890. Removed debugging prints introduced by HADOOP-1774.
(Raghu Angadi via dhruba)
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Mon Sep 17 23:39:50 2007
@@ -36,6 +36,8 @@
import java.io.*;
import java.net.*;
import java.util.*;
+import java.security.NoSuchAlgorithmException;
+import java.security.SecureRandom;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.Updater;
@@ -337,6 +339,36 @@
return "<namenode>";
}
+ private void setNewStorageID(DatanodeRegistration dnReg) {
+ /* Return
+ * "DS-randInt-ipaddr-currentTimeMillis"
+ * It is considered extermely rare for all these numbers to match
+ * on a different machine accidentally for the following
+ * a) SecureRandom(INT_MAX) is pretty much random (1 in 2 billion), and
+ * b) Good chance ip address would be different, and
+ * c) Even on the same machine, Datanode is designed to use different ports.
+ * d) Good chance that these are started at different times.
+ * For a confict to occur all the 4 above have to match!.
+ * The format of this string can be changed anytime in future without
+ * affecting its functionality.
+ */
+ String ip = "unknownIP";
+ try {
+ ip = DNS.getDefaultIP("default");
+ } catch (UnknownHostException ignored) {
+ LOG.warn("Could not find ip address of \"default\" inteface.");
+ }
+
+ int rand = 0;
+ try {
+ rand = SecureRandom.getInstance("SHA1PRNG").nextInt(Integer.MAX_VALUE);
+ } catch (NoSuchAlgorithmException e) {
+ LOG.warn("Could not use SecureRandom");
+ rand = (new Random()).nextInt(Integer.MAX_VALUE);
+ }
+ dnReg.storageID = "DS-" + rand + "-"+ ip + "-" + dnReg.getPort() + "-" +
+ System.currentTimeMillis();
+ }
/**
* Register datanode
* <p>
@@ -349,6 +381,9 @@
* @throws IOException
*/
private void register() throws IOException {
+ if (dnRegistration.getStorageID().equals("")) {
+ setNewStorageID(dnRegistration);
+ }
while(shouldRun) {
try {
// reset name to machineName. Mainly for web interface.
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java Mon Sep 17 23:39:50 2007
@@ -43,8 +43,9 @@
private static final byte OP_DELETE = 2;
private static final byte OP_MKDIR = 3;
private static final byte OP_SET_REPLICATION = 4;
- private static final byte OP_DATANODE_ADD = 5;
- private static final byte OP_DATANODE_REMOVE = 6;
+ //the following two are used only for backword compatibility :
+ @Deprecated private static final byte OP_DATANODE_ADD = 5;
+ @Deprecated private static final byte OP_DATANODE_REMOVE = 6;
private ArrayList<EditLogOutputStream> editStreams = null;
private FSImage fsimage = null;
@@ -383,8 +384,7 @@
+ " for version " + logVersion);
FSImage.DatanodeImage nodeimage = new FSImage.DatanodeImage();
nodeimage.readFields(in);
- DatanodeDescriptor node = nodeimage.getDatanodeDescriptor();
- fsNamesys.unprotectedAddDatanode(node);
+ //Datnodes are not persistent any more.
break;
}
case OP_DATANODE_REMOVE: {
@@ -394,11 +394,7 @@
DatanodeID nodeID = new DatanodeID();
nodeID.readFields(in);
DatanodeDescriptor node = fsNamesys.getDatanode(nodeID);
- if (node != null) {
- fsNamesys.unprotectedRemoveDatanode(node);
- // physically remove node from datanodeMap
- fsNamesys.wipeDatanode(nodeID);
- }
+ //Datanodes are not persistent any more.
break;
}
default: {
@@ -550,22 +546,6 @@
new UTF8(src),
FSEditLog.toLogLong(timestamp)};
logEdit(OP_DELETE, new ArrayWritable(UTF8.class, info), null);
- }
-
- /**
- * Creates a record in edit log corresponding to a new data node
- * registration event.
- */
- void logAddDatanode(DatanodeDescriptor node) {
- logEdit(OP_DATANODE_ADD, new FSImage.DatanodeImage(node), null);
- }
-
- /**
- * Creates a record in edit log corresponding to a data node
- * removal event.
- */
- void logRemoveDatanode(DatanodeID nodeID) {
- logEdit(OP_DATANODE_REMOVE, new DatanodeID(nodeID), null);
}
static UTF8 toLogReplication(short replication) {
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java Mon Sep 17 23:39:50 2007
@@ -850,32 +850,25 @@
}
/**
- * Save list of datanodes contained in {@link FSNamesystem#datanodeMap}.
- * Only the {@link DatanodeInfo} part is stored.
- * The {@link DatanodeDescriptor#blocks} is transient.
+ * Earlier version used to store all the known datanodes.
+ * DFS don't store datanodes anymore.
*
* @param out output stream
* @throws IOException
*/
void saveDatanodes(DataOutputStream out) throws IOException {
- Map datanodeMap = FSNamesystem.getFSNamesystem().datanodeMap;
- int size = datanodeMap.size();
- out.writeInt(size);
- for(Iterator it = datanodeMap.values().iterator(); it.hasNext();) {
- DatanodeImage nodeImage = new DatanodeImage((DatanodeDescriptor) it.next());
- nodeImage.write(out);
- }
+ // we don't store datanodes anymore.
+ out.writeInt(0);
}
void loadDatanodes(int version, DataInputStream in) throws IOException {
if (version > -3) // pre datanode image version
return;
- FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
int size = in.readInt();
for(int i = 0; i < size; i++) {
DatanodeImage nodeImage = new DatanodeImage();
nodeImage.readFields(in);
- fsNamesys.unprotectedAddDatanode(nodeImage.getDatanodeDescriptor());
+ // We don't need to add these descriptors any more.
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Mon Sep 17 23:39:50 2007
@@ -1557,15 +1557,7 @@
*
* @see DataNode#register()
*/
- public void registerDatanode(DatanodeRegistration nodeReg,
- String networkLocation
- ) throws IOException {
- registerDatanodeInternal(nodeReg, networkLocation);
- getEditLog().logSync();
- }
-
- private synchronized void registerDatanodeInternal(
- DatanodeRegistration nodeReg,
+ public synchronized void registerDatanode(DatanodeRegistration nodeReg,
String networkLocation
) throws IOException {
@@ -1604,8 +1596,6 @@
removeDatanode(nodeN);
// physically remove node from datanodeMap
wipeDatanode(nodeN);
- // and log removal
- getEditLog().logRemoveDatanode(nodeN);
nodeN = null;
}
@@ -1618,13 +1608,19 @@
+ "node restarted.");
} else {
// nodeS is found
- // The registering datanode is a replacement node for the existing
- // data storage, which from now on will be served by a new node.
- NameNode.stateChangeLog.debug(
- "BLOCK* NameSystem.registerDatanode: "
+ /* The registering datanode is a replacement node for the existing
+ data storage, which from now on will be served by a new node.
+ If this message repeats, both nodes might have same storageID
+ by (insanely rare) random chance. User needs to restart one of the
+ nodes with its data cleared (or user can just remove the StorageID
+ value in "VERSION" file under the data directory of the datanode,
+ but this is might not work if VERSION file format has changed
+ */
+ NameNode.stateChangeLog.info( "BLOCK* NameSystem.registerDatanode: "
+ "node " + nodeS.getName()
- + " is replaced by " + nodeReg.getName() + ".");
- getEditLog().logRemoveDatanode(nodeS);
+ + " is replaced by " + nodeReg.getName() +
+ " with the same storageID " +
+ nodeReg.getStorageID());
}
// update cluster map
clusterMap.remove(nodeS);
@@ -1632,9 +1628,6 @@
nodeS.setNetworkLocation(networkLocation);
clusterMap.add(nodeS);
nodeS.setHostName(hostName);
- if ( nodeS != nodeN ) {
- getEditLog().logAddDatanode( nodeS );
- }
// also treat the registration message as a heartbeat
synchronized(heartbeats) {
@@ -1662,7 +1655,6 @@
= new DatanodeDescriptor(nodeReg, networkLocation, hostName);
unprotectedAddDatanode(nodeDescr);
clusterMap.add(nodeDescr);
- getEditLog().logAddDatanode(nodeDescr);
// also treat the registration message as a heartbeat
synchronized(heartbeats) {