You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2008/10/13 21:06:25 UTC

svn commit: r704206 - in /hadoop/core/branches/branch-0.18: ./ src/hdfs/org/apache/hadoop/dfs/ src/test/org/apache/hadoop/dfs/

Author: hairong
Date: Mon Oct 13 12:06:24 2008
New Revision: 704206

URL: http://svn.apache.org/viewvc?rev=704206&view=rev
Log:
HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws ArrayIndexOutOfBoundsException. Contributed by Hairong Kuang.

Modified:
    hadoop/core/branches/branch-0.18/CHANGES.txt
    hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/BlocksMap.java
    hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/CorruptReplicasMap.java
    hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java
    hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestFileCorruption.java

Modified: hadoop/core/branches/branch-0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/CHANGES.txt?rev=704206&r1=704205&r2=704206&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.18/CHANGES.txt Mon Oct 13 12:06:24 2008
@@ -18,6 +18,9 @@
     HADOOP-4395. The FSEditLog loading is incorrect for the case OP_SET_OWNER.
     (szetszwo)
 
+    HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws
+    ArrayIndexOutOfBoundsException. (hairong)
+
   NEW FEATURES
 
     HADOOP-2421.  Add jdiff output to documentation, listing all API

Modified: hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/BlocksMap.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/BlocksMap.java?rev=704206&r1=704205&r2=704206&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/BlocksMap.java (original)
+++ hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/BlocksMap.java Mon Oct 13 12:06:24 2008
@@ -405,4 +405,18 @@
   boolean contains(Block block) {
     return map.containsKey(block);
   }
+  
+  /**
+   * Check if the replica at the given datanode exists in map
+   */
+  boolean contains(Block block, DatanodeDescriptor datanode) {
+    BlockInfo info = map.get(block);
+    if (info == null)
+      return false;
+    
+    if (-1 == info.findDatanode(datanode))
+      return false;
+    
+    return true;
+  }
 }

Modified: hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/CorruptReplicasMap.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/CorruptReplicasMap.java?rev=704206&r1=704205&r2=704206&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/CorruptReplicasMap.java (original)
+++ hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/CorruptReplicasMap.java Mon Oct 13 12:06:24 2008
@@ -82,6 +82,28 @@
   }
 
   /**
+   * Remove the block at the given datanode from CorruptBlockMap
+   * @param blk block to be removed
+   * @param datanode datanode where the block is located
+   * @return true if the removal is successful; 
+             false if the replica is not in the map
+   */ 
+  boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode) {
+    Collection<DatanodeDescriptor> datanodes = corruptReplicasMap.get(blk);
+    if (datanodes==null)
+      return false;
+    if (datanodes.remove(datanode)) { // remove the replicas
+      if (datanodes.isEmpty()) {
+        // remove the block if there is no more corrupted replicas
+        corruptReplicasMap.remove(blk);
+      }
+      return true;
+    }
+    return false;
+  }
+    
+
+  /**
    * Get Nodes which have corrupt replicas of Block
    * 
    * @param blk Block for which nodes are requested

Modified: hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java?rev=704206&r1=704205&r2=704206&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ hadoop/core/branches/branch-0.18/src/hdfs/org/apache/hadoop/dfs/FSNamesystem.java Mon Oct 13 12:06:24 2008
@@ -771,9 +771,13 @@
     do {
       // get block locations
       int numNodes = blocksMap.numNodes(blocks[curBlk]);
-      Collection<DatanodeDescriptor> nodesCorrupt = corruptReplicas.getNodes(
-                                                      blocks[curBlk]);
-      int numCorruptNodes = (nodesCorrupt == null) ? 0 : nodesCorrupt.size();
+      int numCorruptNodes = countNodes(blocks[curBlk]).corruptReplicas();
+      int numCorruptReplicas = corruptReplicas.numCorruptReplicas(blocks[curBlk]); 
+      if (numCorruptNodes != numCorruptReplicas) {
+        LOG.warn("Inconsistent number of corrupt replicas for " + 
+            blocks[curBlk] + "blockMap has " + numCorruptNodes + 
+            " but corrupt replicas map has " + numCorruptReplicas);
+      }
       boolean blockCorrupt = (numCorruptNodes == numNodes);
       int numMachineSet = blockCorrupt ? numNodes : 
                             (numNodes - numCorruptNodes);
@@ -783,8 +787,7 @@
         for(Iterator<DatanodeDescriptor> it = 
             blocksMap.nodeIterator(blocks[curBlk]); it.hasNext();) {
           DatanodeDescriptor dn = it.next();
-          boolean replicaCorrupt = ((nodesCorrupt != null) &&
-                                    nodesCorrupt.contains(dn));
+          boolean replicaCorrupt = corruptReplicas.isReplicaCorrupt(blocks[curBlk], dn);
           if (blockCorrupt || (!blockCorrupt && !replicaCorrupt))
             machineSet[numNodes++] = dn;
         }
@@ -1354,20 +1357,28 @@
                             " as corrupt because datanode " + dn.getName() +
                             " does not exist. ");
     }
-    // Check if the block is associated with an inode, if not 
-    // ignore the request for now. This could happen when BlockScanner
-    // thread of Datanode reports bad block before Block reports are sent
-    // by the Datanode on startup
-    if (blocksMap.getINode(blk) == null) 
+    
+    if (!blocksMap.contains(blk, node)) {
+      // Check if the replica is in the blockMap, if not 
+      // ignore the request for now. This could happen when BlockScanner
+      // thread of Datanode reports bad block before Block reports are sent
+      // by the Datanode on startup
       NameNode.stateChangeLog.info("BLOCK NameSystem.markBlockAsCorrupt: " +
                                    "block " + blk + " could not be marked " +
                                    "as corrupt as it does not exists in " +
                                    "blocksMap");
-    else {
-      // Add this replica to corruptReplicas Map and 
-      // add the block to neededReplication 
-      corruptReplicas.addToCorruptReplicasMap(blk, node);
-      updateNeededReplications(blk, 0, 1);
+    } else {
+      INodeFile inode = blocksMap.getINode(blk);
+      assert inode!=null : (blk + " in blocksMap must belongs to a file.");
+      if (countNodes(blk).liveReplicas()>inode.getReplication()) {
+        // the block is over-replicated so invalidate the replicas immediately
+        invalidateBlock(blk, node);
+      } else {
+        // Add this replica to corruptReplicas Map and 
+        // add the block to neededReplication 
+        corruptReplicas.addToCorruptReplicasMap(blk, node);
+        updateNeededReplications(blk, -1, 0);
+      }
     }
   }
 
@@ -2835,8 +2846,14 @@
     }
     // If the file replication has reached desired value
     // we can remove any corrupt replicas the block may have
-    int corruptReplicasCount = num.corruptReplicas();
-    if ((corruptReplicasCount > 0) && (numLiveReplicas == fileReplication)) 
+    int corruptReplicasCount = corruptReplicas.numCorruptReplicas(block); 
+    int numCorruptNodes = num.corruptReplicas();
+    if ( numCorruptNodes != corruptReplicasCount) {
+      LOG.warn("Inconsistent number of corrupt replicas for " + 
+          block + "blockMap has " + numCorruptNodes + 
+          " but corrupt replicas map has " + corruptReplicasCount);
+    }
+    if ((corruptReplicasCount > 0) && (numLiveReplicas >= fileReplication)) 
       invalidateCorruptReplicas(block);
     return block;
   }
@@ -3101,9 +3118,8 @@
         excessReplicateMap.remove(node.getStorageID());
       }
     }
-    // If block is removed from blocksMap, remove it from corruptReplicas
-    if (fileINode == null)
-      corruptReplicas.removeFromCorruptReplicasMap(block);
+    // Remove the replica from corruptReplicas
+    corruptReplicas.removeFromCorruptReplicasMap(block, node);
   }
 
   /**

Modified: hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestFileCorruption.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestFileCorruption.java?rev=704206&r1=704205&r2=704206&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestFileCorruption.java (original)
+++ hadoop/core/branches/branch-0.18/src/test/org/apache/hadoop/dfs/TestFileCorruption.java Mon Oct 13 12:06:24 2008
@@ -19,12 +19,19 @@
 package org.apache.hadoop.dfs;
 
 import java.io.*;
+import java.util.ArrayList;
+
 import junit.framework.*;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.dfs.Block;
+import org.apache.hadoop.dfs.DatanodeInfo;
+import org.apache.hadoop.dfs.GenerationStamp;
+import org.apache.hadoop.dfs.DataNode;
 
 /**
  * A JUnit test for corrupted file handling.
@@ -93,4 +100,82 @@
     }
     fs.delete(file, true);
   }
+  
+  /** Test the case that a replica is reported corrupt while it is not
+   * in blocksMap. Make sure that ArrayIndexOutOfBounds does not thrown.
+   * See Hadoop-4351.
+   */
+  public void testArrayOutOfBoundsException() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new Configuration();
+      cluster = new MiniDFSCluster(conf, 2, true, null);
+      cluster.waitActive();
+      
+      FileSystem fs = cluster.getFileSystem();
+      final Path FILE_PATH = new Path("/tmp.txt");
+      final long FILE_LEN = 1L;
+      DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short)2, 1L);
+      
+      // get the block
+      File dataDir = new File(System.getProperty("test.build.data"),
+          "dfs/data/data1/current");
+
+      Block blk = getBlock(dataDir);
+      if (blk == null) {
+        blk = getBlock(new File(System.getProperty("test.build.data"),
+          "dfs/data/data2/current"));
+      }
+      assertFalse(blk==null);
+
+      // start a third datanode
+      cluster.startDataNodes(conf, 1, true, null, null);
+      ArrayList<DataNode> datanodes = cluster.getDataNodes();
+      assertEquals(datanodes.size(), 3);
+      DataNode dataNode = datanodes.get(2);
+      
+      // report corrupted block by the third datanode
+      cluster.getNameNode().namesystem.markBlockAsCorrupt(blk, 
+          new DatanodeInfo(dataNode.dnRegistration ));
+      
+      // open the file
+      fs.open(FILE_PATH);
+      
+      //clean up
+      fs.delete(FILE_PATH, false);
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+    
+  }
+  
+  private Block getBlock(File dataDir) {
+    assertTrue("data directory does not exist", dataDir.exists());
+    File[] blocks = dataDir.listFiles();
+    assertTrue("Blocks do not exist in dataDir", (blocks != null) && (blocks.length > 0));
+
+    int idx = 0;
+    String blockFileName = null;
+    for (; idx < blocks.length; idx++) {
+      blockFileName = blocks[idx].getName();
+      if (blockFileName.startsWith("blk_") && !blockFileName.endsWith(".meta")) {
+        break;
+      }
+    }
+    if (blockFileName == null) {
+      return null;
+    }
+    long blockId = Long.parseLong(blockFileName.substring("blk_".length()));
+    long blockTimeStamp = GenerationStamp.WILDCARD_STAMP;
+    for (idx=0; idx < blocks.length; idx++) {
+      String fileName = blocks[idx].getName();
+      if (fileName.startsWith(blockFileName) && fileName.endsWith(".meta")) {
+        int startIndex = blockFileName.length()+1;
+        int endIndex = fileName.length() - ".meta".length();
+        blockTimeStamp = Long.parseLong(fileName.substring(startIndex, endIndex));
+        break;
+      }
+    }
+    return new Block(blockId, blocks[idx].length(), blockTimeStamp);
+  }
 }