You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/09/07 22:55:27 UTC

svn commit: r573698 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/

Author: cutting
Date: Fri Sep  7 13:55:26 2007
New Revision: 573698

URL: http://svn.apache.org/viewvc?rev=573698&view=rev
Log:
HADOOP-1687.  Save memory on namenode by optimizing BlockMap representation.  Contributed by Konstantin.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlocksMap.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeDescriptor.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=573698&r1=573697&r2=573698&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Sep  7 13:55:26 2007
@@ -56,6 +56,9 @@
     Block, and replace many uses of Block with BlockInfo.
     (Konstantin Shvachko via cutting)
 
+    HADOOP-1687.  Save memory in namenode by optimizing BlockMap
+    representation.  (Konstantin Shvachko via cutting)
+
   BUG FIXES
 
     HADOOP-1763. Too many lost task trackers on large clusters due to

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlocksMap.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlocksMap.java?rev=573698&r1=573697&r2=573698&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlocksMap.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlocksMap.java Fri Sep  7 13:55:26 2007
@@ -29,165 +29,342 @@
   /**
    * Internal class for block metadata.
    */
-  private static class BlockInfo extends Block {
+  static class BlockInfo extends Block {
     private INodeFile          inode;
-      
-    /** nodes could contain some null entries at the end, so 
-     *  nodes.legth >= number of datanodes. 
-     *  if nodes != null then nodes[0] != null.
+
+    /**
+     * This array contains trpilets of references.
+     * For each i-th data-node the block belongs to
+     * triplets[3*i] is the reference to the DatanodeDescriptor
+     * and triplets[3*i+1] and triplets[3*i+2] are references 
+     * to the previous and the next blocks, respectively, in the 
+     * list of blocks belonging to this data-node.
      */
-    private DatanodeDescriptor[]           nodes;
+    private Object[] triplets;
 
-    BlockInfo(Block blk) {
+    BlockInfo(Block blk, int replication) {
       super(blk);
+      this.triplets = new Object[3*replication];
+      this.inode = null;
+    }
+
+    INodeFile getINode() {
+      return inode;
+    }
+
+    DatanodeDescriptor getDatanode(int index) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
+      DatanodeDescriptor node = (DatanodeDescriptor)triplets[index*3];
+      assert node == null || 
+          DatanodeDescriptor.class.getName().equals(node.getClass().getName()) : 
+                "DatanodeDescriptor is expected at " + index*3;
+      return node;
+    }
+
+    BlockInfo getPrevious(int index) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
+      BlockInfo info = (BlockInfo)triplets[index*3+1];
+      assert info == null || 
+          BlockInfo.class.getName().equals(info.getClass().getName()) : 
+                "BlockInfo is expected at " + index*3;
+      return info;
+    }
+
+    BlockInfo getNext(int index) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
+      BlockInfo info = (BlockInfo)triplets[index*3+2];
+      assert info == null || 
+          BlockInfo.class.getName().equals(info.getClass().getName()) : 
+                "BlockInfo is expected at " + index*3;
+      return info;
+    }
+
+    void setDatanode(int index, DatanodeDescriptor node) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
+      triplets[index*3] = node;
+    }
+
+    void setPrevious(int index, BlockInfo to) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
+      triplets[index*3+1] = to;
+    }
+
+    void setNext(int index, BlockInfo to) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
+      triplets[index*3+2] = to;
+    }
+
+    private int getCapacity() {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert triplets.length % 3 == 0 : "Malformed BlockInfo";
+      return triplets.length / 3;
+    }
+
+    /**
+     * Ensure that there is enough  space to include num more triplets.
+     *      * @return first free triplet index.
+     */
+    private int ensureCapacity(int num) {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      int last = numNodes();
+      if(triplets.length >= (last+num)*3)
+        return last;
+      /* Not enough space left. Create a new array. Should normally 
+       * happen only when replication is manually increased by the user. */
+      Object[] old = triplets;
+      triplets = new Object[(last+num)*3];
+      for(int i=0; i < last*3; i++) {
+        triplets[i] = old[i];
+      }
+      return last;
+    }
+
+    /**
+     * Count the number of data-nodes the block belongs to.
+     */
+    int numNodes() {
+      assert this.triplets != null : "BlockInfo is not initialized";
+      assert triplets.length % 3 == 0 : "Malformed BlockInfo";
+      for(int idx = getCapacity()-1; idx >= 0; idx--) {
+        if(getDatanode(idx) != null)
+          return idx+1;
+      }
+      return 0;
+    }
+
+    /**
+     * Add data-node this block belongs to.
+     */
+    boolean addNode(DatanodeDescriptor node) {
+      int dnIndex = this.findDatanode(node);
+      if(dnIndex >= 0) // the node is already there
+        return false;
+      // find the last null node
+      int lastNode = ensureCapacity(1);
+      setDatanode(lastNode, node);
+      setNext(lastNode, null); 
+      setPrevious(lastNode, null); 
+      return true;
+    }
+
+    /**
+     * Remove data-node from the block.
+     */
+    boolean removeNode(DatanodeDescriptor node) {
+      int dnIndex = this.findDatanode(node);
+      if(dnIndex < 0) // the node is not found
+        return false;
+      // find the last not null node
+      int lastNode = numNodes()-1; 
+      // replace current node triplet by the lastNode one 
+      setDatanode(dnIndex, getDatanode(lastNode));
+      setNext(dnIndex, getNext(lastNode)); 
+      setPrevious(dnIndex, getPrevious(lastNode)); 
+      // set the last triplet to null
+      setDatanode(lastNode, null);
+      setNext(lastNode, null); 
+      setPrevious(lastNode, null); 
+      return true;
+    }
+
+    /**
+     * Find specified DatanodeDescriptor.
+     * @param dn
+     * @return index or -1 if not found.
+     */
+    int findDatanode(DatanodeDescriptor dn) {
+      int len = getCapacity();
+      for(int idx = 0; idx < len; idx++) {
+        DatanodeDescriptor cur = getDatanode(idx);
+        if(cur == dn)
+          return idx;
+        if(cur == null)
+          break;
+      }
+      return -1;
+    }
+
+    /**
+     * Insert this block into the head of the list of blocks 
+     * related to the specified DatanodeDescriptor.
+     * If the head is null then form a new list.
+     * @return current block as the new head of the list.
+     */
+    BlockInfo listInsert(BlockInfo head, DatanodeDescriptor dn) {
+      int dnIndex = this.findDatanode(dn);
+      assert dnIndex >= 0 : "Data node is not found: current";
+      this.setPrevious(dnIndex, null);
+      this.setNext(dnIndex, head);
+      if(head != null) {
+        int headDNIndex = head.findDatanode(dn);
+        assert headDNIndex >= 0 : "Data node is not found: head";
+        head.setPrevious(headDNIndex, this);
+      }
+      return this;
+    }
+
+    /**
+     * Remove this block from the list of blocks 
+     * related to the specified DatanodeDescriptor.
+     * If this block is the head of the list then return the next block as 
+     * the new head.
+     * @return the new head of the list or null if the list becomes
+     * empy after deletion.
+     */
+    BlockInfo listRemove(BlockInfo head, DatanodeDescriptor dn) {
+      if(head == null)
+        return null;
+      int dnIndex = this.findDatanode(dn);
+      if(dnIndex < 0) // this block is not on the data-node list
+        return head;
+
+      BlockInfo next = this.getNext(dnIndex);
+      BlockInfo prev = this.getPrevious(dnIndex);
+      this.setNext(dnIndex, null);
+      this.setPrevious(dnIndex, null);
+      if(prev != null) {
+        int prevDNIndex = prev.findDatanode(dn);
+        assert prevDNIndex >= 0 : "Data node is not found: previous";
+        prev.setNext(prevDNIndex, next);
+      }
+      if(next != null) {
+        int nextDNIndex = next.findDatanode(dn);
+        assert nextDNIndex >= 0 : "Data node is not found: next";
+        next.setPrevious(nextDNIndex, prev);
+      }
+      if(this == head)  // removing the head
+        head = next;
+      return head;
+    }
+
+    int listCount(DatanodeDescriptor dn) {
+      int count = 0;
+      for(BlockInfo cur = this; cur != null;
+            cur = cur.getNext(cur.findDatanode(dn)))
+        count++;
+      return count;
     }
   }
-      
+
   private static class NodeIterator implements Iterator<DatanodeDescriptor> {
-    NodeIterator(DatanodeDescriptor[] nodes) {
-      arr = nodes;
-    }
-    private DatanodeDescriptor[] arr;
+    private BlockInfo blockInfo;
     private int nextIdx = 0;
       
+    NodeIterator(BlockInfo blkInfo) {
+      this.blockInfo = blkInfo;
+    }
+
     public boolean hasNext() {
-      return arr != null && nextIdx < arr.length && arr[nextIdx] != null;
+      return blockInfo != null && nextIdx < blockInfo.getCapacity()
+              && blockInfo.getDatanode(nextIdx) != null;
     }
-      
+
     public DatanodeDescriptor next() {
-      return arr[nextIdx++];
+      return blockInfo.getDatanode(nextIdx++);
     }
-      
+
     public void remove()  {
       throw new UnsupportedOperationException("Sorry. can't remove.");
     }
   }
-      
+
   private Map<Block, BlockInfo> map = new HashMap<Block, BlockInfo>();
-      
-  /** add BlockInfo if mapping does not exist. */
-  private BlockInfo checkBlockInfo(Block b) {
+
+  /**
+   * Add BlockInfo if mapping does not exist. */
+  private BlockInfo checkBlockInfo(Block b, int replication) {
     BlockInfo info = map.get(b);
     if (info == null) {
-      info = new BlockInfo(b);
+      info = new BlockInfo(b, replication);
       map.put(b, info);
     }
     return info;
   }
-      
-  public INodeFile getINode(Block b) {
+
+  INodeFile getINode(Block b) {
     BlockInfo info = map.get(b);
     return (info != null) ? info.inode : null;
   }
-          
-  public void addINode(Block b, INodeFile iNode) {
-    BlockInfo info = checkBlockInfo(b);
+
+  /**
+   * Add block b belonging to the specified file inode to the map.
+   */
+  BlockInfo addINode(Block b, INodeFile iNode) {
+    BlockInfo info = checkBlockInfo(b, iNode.getReplication());
     info.inode = iNode;
+    return info;
   }
-    
+
+  /**
+   * Remove INode reference from block b.
+   * Remove the block from the block map
+   * only if it does not belong to any file and data-nodes.
+   */
   public void removeINode(Block b) {
     BlockInfo info = map.get(b);
     if (info != null) {
       info.inode = null;
-      if (info.nodes == null) {
-        map.remove(b);
+      if (info.getDatanode(0) == null) {  // no datanodes left
+        map.remove(b);  // remove block from the map
       }
     }
   }
-      
+
   /** Returns the block object it it exists in the map. */
-  public Block getStoredBlock(Block b) {
-    BlockInfo info = map.get(b);
-    return (info != null) ? info : null;
+  BlockInfo getStoredBlock(Block b) {
+    return map.get(b);
   }
-    
+
   /** Returned Iterator does not support. */
-  public Iterator<DatanodeDescriptor> nodeIterator(Block b) {
-    BlockInfo info = map.get(b);
-    return new NodeIterator((info != null) ? info.nodes : null);
+  Iterator<DatanodeDescriptor> nodeIterator(Block b) {
+    return new NodeIterator(map.get(b));
   }
-    
+
   /** counts number of containing nodes. Better than using iterator. */
-  public int numNodes(Block b) {
-    int count = 0;
+  int numNodes(Block b) {
     BlockInfo info = map.get(b);
-    if (info != null && info.nodes != null) {
-      count = info.nodes.length;
-      while (info.nodes[ count-1 ] == null) {// mostly false
-        count--;
-      }
-    }
-    return count;
+    return info == null ? 0 : info.numNodes();
   }
-      
+
   /** returns true if the node does not already exists and is added.
    * false if the node already exists.*/
-  public boolean addNode(Block b, 
-                         DatanodeDescriptor node,
-                         int replicationHint) {
-    BlockInfo info = checkBlockInfo(b);
-    if (info.nodes == null) {
-      info.nodes = new DatanodeDescriptor[ replicationHint ];
-    }
-      
-    DatanodeDescriptor[] arr = info.nodes;
-    for(int i=0; i < arr.length; i++) {
-      if (arr[i] == null) {
-        arr[i] = node;
-        return true;
-      }
-      if (arr[i] == node) {
-        return false;
-      }
-    }
-
-    /* Not enough space left. Create a new array. Should normally 
-     * happen only when replication is manually increased by the user. */
-    info.nodes = new DatanodeDescriptor[ arr.length + 1 ];
-    for(int i=0; i < arr.length; i++) {
-      info.nodes[i] = arr[i];
-    }
-    info.nodes[ arr.length ] = node;
-    return true;
+  boolean addNode(Block b, DatanodeDescriptor node, int replication) {
+    // insert into the map if not there yet
+    BlockInfo info = checkBlockInfo(b, replication);
+    // add node to the block info
+    boolean added = info.addNode(node);
+    // add to the data-node list
+    node.addBlock(info);
+    return added;
   }
-    
-  public boolean removeNode(Block b, DatanodeDescriptor node) {
+
+  /**
+   * Remove data-node reference from the block.
+   * Remove the block from the block map
+   * only if it does not belong to any file and data-nodes.
+   */
+  boolean removeNode(Block b, DatanodeDescriptor node) {
     BlockInfo info = map.get(b);
-    if (info == null || info.nodes == null) {
+    if (info == null)
       return false;
-    }
-
-    boolean removed = false;
-    // swap lastNode and node's location. set lastNode to null.
-    DatanodeDescriptor[] arr = info.nodes;
-    int lastNode = -1;
-    for(int i=arr.length-1; i >= 0; i--) {
-      if (lastNode < 0 && arr[i] != null) {
-        lastNode = i;
-      }
-      if (arr[i] == node) {
-        arr[i] = arr[ lastNode ];
-        arr[ lastNode ] = null;
-        removed = true;
-        break;
-      }
-    }
-        
-    /*
-     * if ((lastNode + 1) < arr.length/4) {
-     *    we could trim the array.
-     * } 
-     */
-    if (arr[0] == null) { // no datanodes left.
-      info.nodes = null;
-      if (info.inode == null) {
-        map.remove(b);
-      }
+    // first remove block from the data-node list
+    node.removeBlock(info);
+    // remove node from the block info
+    boolean removed = info.removeNode(node);
+    if (info.getDatanode(0) == null     // no datanodes left
+              && info.inode == null) {  // does not belong to a file
+      map.remove(b);  // remove block from the map
     }
     return removed;
   }
 
-  public int size() {
+  int size() {
     return map.size();
   }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeDescriptor.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeDescriptor.java?rev=573698&r1=573697&r2=573698&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeDescriptor.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeDescriptor.java Fri Sep  7 13:55:26 2007
@@ -19,6 +19,8 @@
 
 import java.util.*;
 
+import org.apache.hadoop.dfs.BlocksMap.BlockInfo;
+
 /**************************************************
  * DatanodeDescriptor tracks stats on a given DataNode,
  * such as available storage capacity, last update time, etc.,
@@ -32,7 +34,7 @@
  **************************************************/
 public class DatanodeDescriptor extends DatanodeInfo {
 
-  private volatile SortedMap<Block, Block> blocks = new TreeMap<Block, Block>();
+  private volatile BlockInfo blockList = null;
   // isAlive == heartbeats.contains(this)
   // This is an optimization, because contains takes O(n) time on Arraylist
   protected boolean isAlive = false;
@@ -130,26 +132,30 @@
   }
 
   /**
+   * Add block to the head of the list of blocks belonging to the data-node.
    */
-  void addBlock(Block b) {
-    blocks.put(b, b);
+  void addBlock(BlockInfo b) {
+    blockList = b.listInsert(blockList, this);
   }
   
-  void removeBlock(Block b) {
-    blocks.remove(b);
+  /**
+   * Remove block from the list of blocks belonging to the data-node.
+   */
+  void removeBlock(BlockInfo b) {
+    blockList = b.listRemove(blockList, this);
   }
 
   void resetBlocks() {
     this.capacity = 0;
     this.remaining = 0;
     this.xceiverCount = 0;
-    this.blocks.clear();
+    this.blockList = null;
   }
 
   int numBlocks() {
-    return blocks.size();
+    return blockList == null ? 0 : blockList.listCount(this);
   }
-  
+
   /**
    */
   void updateHeartbeat(long capacity, long dfsUsed, long remaining,
@@ -160,13 +166,33 @@
     this.lastUpdate = System.currentTimeMillis();
     this.xceiverCount = xceiverCount;
   }
-  
-  Block[] getBlocks() {
-    return (Block[]) blocks.keySet().toArray(new Block[blocks.size()]);
+
+  static private class BlockIterator implements Iterator<Block> {
+    private BlockInfo current;
+    private DatanodeDescriptor node;
+      
+    BlockIterator(BlockInfo head, DatanodeDescriptor dn) {
+      this.current = head;
+      this.node = dn;
+    }
+
+    public boolean hasNext() {
+      return current != null;
+    }
+
+    public BlockInfo next() {
+      BlockInfo res = current;
+      current = current.getNext(current.findDatanode(node));
+      return res;
+    }
+
+    public void remove()  {
+      throw new UnsupportedOperationException("Sorry. can't remove.");
+    }
   }
 
   Iterator<Block> getBlockIterator() {
-    return blocks.keySet().iterator();
+    return new BlockIterator(this.blockList, this);
   }
   
   /*
@@ -271,5 +297,32 @@
       assert(blocklist.length > 0);
       xferResults[0] = blocklist;
     }
+  }
+
+  void reportDiff(BlocksMap blocksMap,
+                  Block[] newReport,
+                  Collection<Block> toAdd,
+                  Collection<Block> toRemove) {
+    BlockInfo delimiter = new BlockInfo(new Block(), 1);
+    delimiter.addNode(this);
+    this.addBlock(delimiter); // add to the head of the list
+    if(newReport == null)
+      newReport = new Block[0];
+    for(Block blk : newReport) {
+      BlockInfo storedBlock = blocksMap.getStoredBlock(blk);
+      if(storedBlock == null || storedBlock.findDatanode(this) < 0) {
+        toAdd.add(blk);
+        continue;
+      }
+      // move block to the head of the list
+      this.removeBlock(storedBlock);
+      this.addBlock(storedBlock);
+    }
+    // collect blocks that have not been reported
+    // they are all next to the delimiter
+    Iterator<Block> it = new BlockIterator(delimiter.getNext(0), this);
+    while(it.hasNext())
+      toRemove.add(it.next());
+    this.removeBlock(delimiter);
   }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java?rev=573698&r1=573697&r2=573698&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java Fri Sep  7 13:55:26 2007
@@ -150,16 +150,17 @@
     if (blocks == null)
       newNode = new INodeDirectory(modificationTime);
     else
-      newNode = new INodeFile(blocks, replication, modificationTime,
+      newNode = new INodeFile(blocks.length, replication, modificationTime,
                               preferredBlockSize);
     synchronized (rootDir) {
       try {
         newNode = rootDir.addNode(path, newNode);
-        if(newNode != null) {
-          int nrBlocks = (blocks == null) ? 0 : blocks.length;
+        if(newNode != null && blocks != null) {
+          int nrBlocks = blocks.length;
           // Add file->block mapping
+          INodeFile newF = (INodeFile)newNode;
           for (int i = 0; i < nrBlocks; i++) {
-            namesystem.blocksMap.addINode(blocks[i], (INodeFile)newNode);
+            newF.setBlock(i, namesystem.blocksMap.addINode(blocks[i], newF));
           }
         }
       } catch (FileNotFoundException e) {
@@ -187,9 +188,10 @@
       }
 
       // associate the new list of blocks with this file
-      fileNode.setBlocks(blocks);
+      fileNode.allocateBlocks(blocks.length);
       for (int i = 0; i < blocks.length; i++) {
-        namesystem.blocksMap.addINode(blocks[i], fileNode);
+        fileNode.setBlock(i, 
+            namesystem.blocksMap.addINode(blocks[i], fileNode));
       }
 
       // create two transactions. The first one deletes the empty

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=573698&r1=573697&r2=573698&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Fri Sep  7 13:55:26 2007
@@ -711,7 +711,7 @@
    * of machines.  The first on this list should be where the client 
    * writes data.  Subsequent items in the list must be provided in
    * the connection to the first datanode.
-   * @return Return an array that consists of the block, plus a set
+   * Return an array that consists of the block, plus a set
    * of machines
    * @throws IOException if the filename is invalid
    *         {@link FSDirectory#isValidToCreate(String)}.
@@ -986,7 +986,7 @@
         
     Collection<Block> blocks = pendingFile.getBlocks();
     int nrBlocks = blocks.size();
-    Block pendingBlocks[] = blocks.toArray(new Block[nrBlocks]);
+    Block pendingBlocks[] = new Block[nrBlocks];
 
     //
     // We have the pending blocks, but they won't have
@@ -994,12 +994,12 @@
     // data-write took place). Find the block stored in
     // node descriptor.
     //
-    for (int i = 0; i < nrBlocks; i++) {
-      Block b = pendingBlocks[i];
+    int idx = 0;
+    for (Block b : blocks) {
       Block storedBlock = blocksMap.getStoredBlock(b);
-      if (storedBlock != null) {
-        pendingBlocks[i] = storedBlock;
-      }
+      // according to checkFileProgress() every block is present & replicated
+      assert storedBlock != null : "Missing block " + b.getBlockName();
+      pendingBlocks[idx++] = storedBlock;
     }
         
     //
@@ -1974,8 +1974,6 @@
   }
 
   void unprotectedRemoveDatanode(DatanodeDescriptor nodeDescr) {
-    // datanodeMap.remove(nodeDescr.getStorageID());
-    // deaddatanodeMap.put(nodeDescr.getName(), nodeDescr);
     nodeDescr.resetBlocks();
     NameNode.stateChangeLog.debug(
                                   "BLOCK* NameSystem.unprotectedRemoveDatanode: "
@@ -1996,7 +1994,6 @@
                                   + "node " + nodeDescr.getName() + " is added to datanodeMap.");
   }
 
-    
   /**
    * Physically remove node from datanodeMap.
    * 
@@ -2096,49 +2093,15 @@
     // Modify the (block-->datanode) map, according to the difference
     // between the old and new block report.
     //
-    int newPos = 0;
-    Iterator<Block> iter = node.getBlockIterator();
-    Block oldblk = iter.hasNext() ? iter.next() : null;
-    Block newblk = (newReport != null && newReport.length > 0) ? 
-      newReport[0]	: null;
-
-    // common case is that most of the blocks from the datanode
-    // matches blocks in datanode descriptor.                
-    Collection<Block> toRemove = new LinkedList<Block>();
     Collection<Block> toAdd = new LinkedList<Block>();
+    Collection<Block> toRemove = new LinkedList<Block>();
+    node.reportDiff(blocksMap, newReport, toAdd, toRemove);
         
-    while (oldblk != null || newblk != null) {
-           
-      int cmp = (oldblk == null) ? 1 : 
-        ((newblk == null) ? -1 : oldblk.compareTo(newblk));
-
-      if (cmp == 0) {
-        // Do nothing, blocks are the same
-        newPos++;
-        oldblk = iter.hasNext() ? iter.next() : null;
-        newblk = (newPos < newReport.length)
-          ? newReport[newPos] : null;
-      } else if (cmp < 0) {
-        // The old report has a block the new one does not
-        toRemove.add(oldblk);
-        oldblk = iter.hasNext() ? iter.next() : null;
-      } else {
-        // The new report has a block the old one does not
-        toAdd.add(newblk);
-        newPos++;
-        newblk = (newPos < newReport.length)
-          ? newReport[newPos] : null;
-      }
-    }
-        
-    for (Iterator<Block> i = toRemove.iterator(); i.hasNext();) {
-      Block b = i.next();
+    for (Block b : toRemove) {
       removeStoredBlock(b, node);
-      node.removeBlock(b);
     }
-    for (Iterator<Block> i = toAdd.iterator(); i.hasNext();) {
-      Block b = i.next();
-      node.addBlock(addStoredBlock(b, node));
+    for (Block b : toAdd) {
+      addStoredBlock(b, node);
     }
         
     //
@@ -2444,7 +2407,7 @@
     //
     // Modify the blocks->datanode map and node's map.
     // 
-    node.addBlock(addStoredBlock(block, node));
+    addStoredBlock(block, node);
     pendingReplications.remove(block);
   }
 
@@ -2534,9 +2497,10 @@
       //
       // all the blocks that reside on this node have to be 
       // replicated.
-      Block decommissionBlocks[] = node.getBlocks();
-      for (int j = 0; j < decommissionBlocks.length; j++) {
-        updateNeededReplications(decommissionBlocks[j], -1, 0);
+      Iterator<Block> decommissionBlocks = node.getBlockIterator();
+      while(decommissionBlocks.hasNext()) {
+        Block block = decommissionBlocks.next();
+        updateNeededReplications(block, -1, 0);
       }
     }
   }
@@ -2714,10 +2678,10 @@
    * yet reached their replication factor. Otherwise returns false.
    */
   private boolean isReplicationInProgress(DatanodeDescriptor srcNode) {
-    Block decommissionBlocks[] = srcNode.getBlocks();
     boolean status = false;
-    for (int i = 0; i < decommissionBlocks.length; i++) {
-      Block block = decommissionBlocks[i];
+    Iterator<Block> decommissionBlocks = srcNode.getBlockIterator();
+    while(decommissionBlocks.hasNext()) {
+      Block block = decommissionBlocks.next();
       INode fileINode = blocksMap.getINode(block);
 
       if (fileINode != null) {

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java?rev=573698&r1=573697&r2=573698&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/INode.java Fri Sep  7 13:55:26 2007
@@ -25,6 +25,7 @@
 import java.util.List;
 
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.dfs.BlocksMap.BlockInfo;
 
 /**
  * We keep an in-memory representation of the file/block hierarchy.
@@ -291,21 +292,21 @@
    * following path components: ["","c1","c2","c3"],
    * 
    * <p>
-   * {@link #getExistingPathINodes(["","c1","c2"], [?])} should fill the
+   * <code>getExistingPathINodes(["","c1","c2"], [?])</code> should fill the
    * array with [c2] <br>
-   * {@link #getExistingPathINodes(["","c1","c2","c3"], [?])} should fill the
+   * <code>getExistingPathINodes(["","c1","c2","c3"], [?])</code> should fill the
    * array with [null]
    * 
    * <p>
-   * {@link #getExistingPathINodes(["","c1","c2"], [?,?])} should fill the
+   * <code>getExistingPathINodes(["","c1","c2"], [?,?])</code> should fill the
    * array with [c1,c2] <br>
-   * {@link #getExistingPathINodes(["","c1","c2","c3"], [?,?])} should fill
+   * <code>getExistingPathINodes(["","c1","c2","c3"], [?,?])</code> should fill
    * the array with [c2,null]
    * 
    * <p>
-   * {@link #getExistingPathINodes(["","c1","c2"], [?,?,?,?])} should fill
+   * <code>getExistingPathINodes(["","c1","c2"], [?,?,?,?])</code> should fill
    * the array with [rootINode,c1,c2,null], <br>
-   * {@link #getExistingPathINodes(["","c1","c2","c3"], [?,?,?,?])} should
+   * <code>getExistingPathINodes(["","c1","c2","c3"], [?,?,?,?])</code> should
    * fill the array with [rootINode,c1,c2,null]
    * @param components array of path component name
    * @param existing INode array to fill with existing INodes
@@ -325,7 +326,7 @@
         existing[index] = curNode;
       if (!curNode.isDirectory() || (count == components.length - 1))
         break; // no more child, stop here
-      INodeDirectory parentDir = (INodeDirectory) curNode;
+      INodeDirectory parentDir = (INodeDirectory)curNode;
       curNode = parentDir.getChildINode(components[count + 1]);
       count += 1;
       index += 1;
@@ -457,18 +458,18 @@
 }
 
 class INodeFile extends INode {
-  private Block blocks[] = null;
+  private BlockInfo blocks[] = null;
   protected short blockReplication;
   protected long preferredBlockSize;
 
   /**
    */
-  INodeFile(Block blocks[], short replication, long modificationTime,
+  INodeFile(int nrBlocks, short replication, long modificationTime,
             long preferredBlockSize) {
     super(modificationTime);
-    this.blocks = blocks;
     this.blockReplication = replication;
     this.preferredBlockSize = preferredBlockSize;
+    allocateBlocks(nrBlocks);
   }
 
   boolean isDirectory() {
@@ -496,10 +497,18 @@
   }
 
   /**
-   * Set file blocks 
+   * Allocate space for blocks.
+   * @param nrBlocks number of blocks
    */
-  void setBlocks(Block[] blockList) {
-    this.blocks = blockList;
+  void allocateBlocks(int nrBlocks) {
+    this.blocks = new BlockInfo[nrBlocks];
+  }
+
+  /**
+   * Set file block
+   */
+  void setBlock(int idx, BlockInfo blk) {
+    this.blocks[idx] = blk;
   }
 
   /**