You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/09/18 08:39:51 UTC

svn commit: r576729 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/DataNode.java src/java/org/apache/hadoop/dfs/FSEditLog.java src/java/org/apache/hadoop/dfs/FSImage.java src/java/org/apache/hadoop/dfs/FSNamesystem.java

Author: dhruba
Date: Mon Sep 17 23:39:50 2007
New Revision: 576729

URL: http://svn.apache.org/viewvc?rev=576729&view=rev
Log:
HADOOP-1762. The Namenode fsimage does not contain the list of
Datanodes.  (Raghu Angadi via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon Sep 17 23:39:50 2007
@@ -83,6 +83,9 @@
 
   BUG FIXES
 
+    HADOOP-1762. The Namenode fsimage does not contain a list of
+    Datanodes.  (Raghu Angadi via dhruba)
+
     HADOOP-1890. Removed debugging prints introduced by HADOOP-1774.
     (Raghu Angadi via dhruba)
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Mon Sep 17 23:39:50 2007
@@ -36,6 +36,8 @@
 import java.io.*;
 import java.net.*;
 import java.util.*;
+import java.security.NoSuchAlgorithmException;
+import java.security.SecureRandom;
 import org.apache.hadoop.metrics.MetricsContext;
 import org.apache.hadoop.metrics.MetricsRecord;
 import org.apache.hadoop.metrics.Updater;
@@ -337,6 +339,36 @@
     return "<namenode>";
   }
 
+  private void setNewStorageID(DatanodeRegistration dnReg) {
+    /* Return 
+     * "DS-randInt-ipaddr-currentTimeMillis"
+     * It is considered extermely rare for all these numbers to match
+     * on a different machine accidentally for the following 
+     * a) SecureRandom(INT_MAX) is pretty much random (1 in 2 billion), and
+     * b) Good chance ip address would be different, and
+     * c) Even on the same machine, Datanode is designed to use different ports.
+     * d) Good chance that these are started at different times.
+     * For a confict to occur all the 4 above have to match!.
+     * The format of this string can be changed anytime in future without
+     * affecting its functionality.
+     */
+    String ip = "unknownIP";
+    try {
+      ip = DNS.getDefaultIP("default");
+    } catch (UnknownHostException ignored) {
+      LOG.warn("Could not find ip address of \"default\" inteface.");
+    }
+    
+    int rand = 0;
+    try {
+      rand = SecureRandom.getInstance("SHA1PRNG").nextInt(Integer.MAX_VALUE);
+    } catch (NoSuchAlgorithmException e) {
+      LOG.warn("Could not use SecureRandom");
+      rand = (new Random()).nextInt(Integer.MAX_VALUE);
+    }
+    dnReg.storageID = "DS-" + rand + "-"+ ip + "-" + dnReg.getPort() + "-" + 
+                      System.currentTimeMillis();
+  }
   /**
    * Register datanode
    * <p>
@@ -349,6 +381,9 @@
    * @throws IOException
    */
   private void register() throws IOException {
+    if (dnRegistration.getStorageID().equals("")) {
+      setNewStorageID(dnRegistration);
+    }
     while(shouldRun) {
       try {
         // reset name to machineName. Mainly for web interface.

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java Mon Sep 17 23:39:50 2007
@@ -43,8 +43,9 @@
   private static final byte OP_DELETE = 2;
   private static final byte OP_MKDIR = 3;
   private static final byte OP_SET_REPLICATION = 4;
-  private static final byte OP_DATANODE_ADD = 5;
-  private static final byte OP_DATANODE_REMOVE = 6;
+  //the following two are used only for backword compatibility :
+  @Deprecated private static final byte OP_DATANODE_ADD = 5;
+  @Deprecated private static final byte OP_DATANODE_REMOVE = 6;
 
   private ArrayList<EditLogOutputStream> editStreams = null;
   private FSImage fsimage = null;
@@ -383,8 +384,7 @@
                                     + " for version " + logVersion);
             FSImage.DatanodeImage nodeimage = new FSImage.DatanodeImage();
             nodeimage.readFields(in);
-            DatanodeDescriptor node = nodeimage.getDatanodeDescriptor();
-            fsNamesys.unprotectedAddDatanode(node);
+            //Datnodes are not persistent any more.
             break;
           }
           case OP_DATANODE_REMOVE: {
@@ -394,11 +394,7 @@
             DatanodeID nodeID = new DatanodeID();
             nodeID.readFields(in);
             DatanodeDescriptor node = fsNamesys.getDatanode(nodeID);
-            if (node != null) {
-              fsNamesys.unprotectedRemoveDatanode(node);
-              // physically remove node from datanodeMap
-              fsNamesys.wipeDatanode(nodeID);
-            }
+            //Datanodes are not persistent any more.
             break;
           }
           default: {
@@ -550,22 +546,6 @@
       new UTF8(src),
       FSEditLog.toLogLong(timestamp)};
     logEdit(OP_DELETE, new ArrayWritable(UTF8.class, info), null);
-  }
-  
-  /** 
-   * Creates a record in edit log corresponding to a new data node
-   * registration event.
-   */
-  void logAddDatanode(DatanodeDescriptor node) {
-    logEdit(OP_DATANODE_ADD, new FSImage.DatanodeImage(node), null);
-  }
-  
-  /** 
-   * Creates a record in edit log corresponding to a data node
-   * removal event.
-   */
-  void logRemoveDatanode(DatanodeID nodeID) {
-    logEdit(OP_DATANODE_REMOVE, new DatanodeID(nodeID), null);
   }
   
   static UTF8 toLogReplication(short replication) {

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSImage.java Mon Sep 17 23:39:50 2007
@@ -850,32 +850,25 @@
   }
 
   /**
-   * Save list of datanodes contained in {@link FSNamesystem#datanodeMap}.
-   * Only the {@link DatanodeInfo} part is stored.
-   * The {@link DatanodeDescriptor#blocks} is transient.
+   * Earlier version used to store all the known datanodes.
+   * DFS don't store datanodes anymore.
    * 
    * @param out output stream
    * @throws IOException
    */
   void saveDatanodes(DataOutputStream out) throws IOException {
-    Map datanodeMap = FSNamesystem.getFSNamesystem().datanodeMap;
-    int size = datanodeMap.size();
-    out.writeInt(size);
-    for(Iterator it = datanodeMap.values().iterator(); it.hasNext();) {
-      DatanodeImage nodeImage = new DatanodeImage((DatanodeDescriptor) it.next());
-      nodeImage.write(out);
-    }
+    // we don't store datanodes anymore.
+    out.writeInt(0);    
   }
 
   void loadDatanodes(int version, DataInputStream in) throws IOException {
     if (version > -3) // pre datanode image version
       return;
-    FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
     int size = in.readInt();
     for(int i = 0; i < size; i++) {
       DatanodeImage nodeImage = new DatanodeImage();
       nodeImage.readFields(in);
-      fsNamesys.unprotectedAddDatanode(nodeImage.getDatanodeDescriptor());
+      // We don't need to add these descriptors any more.
     }
   }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=576729&r1=576728&r2=576729&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Mon Sep 17 23:39:50 2007
@@ -1557,15 +1557,7 @@
    * 
    * @see DataNode#register()
    */
-  public void registerDatanode(DatanodeRegistration nodeReg,
-                                            String networkLocation
-                                            ) throws IOException {
-    registerDatanodeInternal(nodeReg, networkLocation);
-    getEditLog().logSync();
-  }
-
-  private synchronized void registerDatanodeInternal(
-                                            DatanodeRegistration nodeReg,
+  public synchronized void registerDatanode(DatanodeRegistration nodeReg,
                                             String networkLocation
                                             ) throws IOException {
 
@@ -1604,8 +1596,6 @@
       removeDatanode(nodeN);
       // physically remove node from datanodeMap
       wipeDatanode(nodeN);
-      // and log removal
-      getEditLog().logRemoveDatanode(nodeN);
       nodeN = null;
     }
 
@@ -1618,13 +1608,19 @@
                                       + "node restarted.");
       } else {
         // nodeS is found
-        // The registering datanode is a replacement node for the existing 
-        // data storage, which from now on will be served by a new node.
-        NameNode.stateChangeLog.debug(
-                                      "BLOCK* NameSystem.registerDatanode: "
+        /* The registering datanode is a replacement node for the existing 
+          data storage, which from now on will be served by a new node.
+          If this message repeats, both nodes might have same storageID 
+          by (insanely rare) random chance. User needs to restart one of the
+          nodes with its data cleared (or user can just remove the StorageID
+          value in "VERSION" file under the data directory of the datanode,
+          but this is might not work if VERSION file format has changed 
+       */        
+        NameNode.stateChangeLog.info( "BLOCK* NameSystem.registerDatanode: "
                                       + "node " + nodeS.getName()
-                                      + " is replaced by " + nodeReg.getName() + ".");
-        getEditLog().logRemoveDatanode(nodeS);
+                                      + " is replaced by " + nodeReg.getName() + 
+                                      " with the same storageID " +
+                                      nodeReg.getStorageID());
       }
       // update cluster map
       clusterMap.remove(nodeS);
@@ -1632,9 +1628,6 @@
       nodeS.setNetworkLocation(networkLocation);
       clusterMap.add(nodeS);
       nodeS.setHostName(hostName);
-      if ( nodeS != nodeN ) {
-        getEditLog().logAddDatanode( nodeS );
-      }
         
       // also treat the registration message as a heartbeat
       synchronized(heartbeats) {
@@ -1662,7 +1655,6 @@
       = new DatanodeDescriptor(nodeReg, networkLocation, hostName);
     unprotectedAddDatanode(nodeDescr);
     clusterMap.add(nodeDescr);
-    getEditLog().logAddDatanode(nodeDescr);
       
     // also treat the registration message as a heartbeat
     synchronized(heartbeats) {