You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2008/10/13 20:54:48 UTC
svn commit: r704203 - in /hadoop/core/trunk: ./
src/hdfs/org/apache/hadoop/hdfs/server/namenode/
src/test/org/apache/hadoop/hdfs/
Author: hairong
Date: Mon Oct 13 11:54:48 2008
New Revision: 704203
URL: http://svn.apache.org/viewvc?rev=704203&view=rev
Log:
HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws ArrayIndexOutOfBoundsException. Contributed by Hairong Kuang.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/BlocksMap.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestFileCorruption.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=704203&r1=704202&r2=704203&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon Oct 13 11:54:48 2008
@@ -907,6 +907,9 @@
HADOOP-4395. The FSEditLog loading is incorrect for the case OP_SET_OWNER.
(szetszwo)
+ HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws
+ ArrayIndexOutOfBoundsException. (hairong)
+
Release 0.18.1 - 2008-09-17
IMPROVEMENTS
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/BlocksMap.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/BlocksMap.java?rev=704203&r1=704202&r2=704203&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/BlocksMap.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/BlocksMap.java Mon Oct 13 11:54:48 2008
@@ -407,4 +407,18 @@
boolean contains(Block block) {
return map.containsKey(block);
}
+
+ /**
+ * Check if the replica at the given datanode exists in map
+ */
+ boolean contains(Block block, DatanodeDescriptor datanode) {
+ BlockInfo info = map.get(block);
+ if (info == null)
+ return false;
+
+ if (-1 == info.findDatanode(datanode))
+ return false;
+
+ return true;
+ }
}
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java?rev=704203&r1=704202&r2=704203&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/CorruptReplicasMap.java Mon Oct 13 11:54:48 2008
@@ -83,6 +83,28 @@
}
/**
+ * Remove the block at the given datanode from CorruptBlockMap
+ * @param blk block to be removed
+ * @param datanode datanode where the block is located
+ * @return true if the removal is successful;
+ false if the replica is not in the map
+ */
+ boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode) {
+ Collection<DatanodeDescriptor> datanodes = corruptReplicasMap.get(blk);
+ if (datanodes==null)
+ return false;
+ if (datanodes.remove(datanode)) { // remove the replicas
+ if (datanodes.isEmpty()) {
+ // remove the block if there is no more corrupted replicas
+ corruptReplicasMap.remove(blk);
+ }
+ return true;
+ }
+ return false;
+ }
+
+
+ /**
* Get Nodes which have corrupt replicas of Block
*
* @param blk Block for which nodes are requested
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=704203&r1=704202&r2=704203&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Oct 13 11:54:48 2008
@@ -830,9 +830,13 @@
do {
// get block locations
int numNodes = blocksMap.numNodes(blocks[curBlk]);
- Collection<DatanodeDescriptor> nodesCorrupt = corruptReplicas.getNodes(
- blocks[curBlk]);
- int numCorruptNodes = (nodesCorrupt == null) ? 0 : nodesCorrupt.size();
+ int numCorruptNodes = countNodes(blocks[curBlk]).corruptReplicas();
+ int numCorruptReplicas = corruptReplicas.numCorruptReplicas(blocks[curBlk]);
+ if (numCorruptNodes != numCorruptReplicas) {
+ LOG.warn("Inconsistent number of corrupt replicas for " +
+ blocks[curBlk] + "blockMap has " + numCorruptNodes +
+ " but corrupt replicas map has " + numCorruptReplicas);
+ }
boolean blockCorrupt = (numCorruptNodes == numNodes);
int numMachineSet = blockCorrupt ? numNodes :
(numNodes - numCorruptNodes);
@@ -842,8 +846,7 @@
for(Iterator<DatanodeDescriptor> it =
blocksMap.nodeIterator(blocks[curBlk]); it.hasNext();) {
DatanodeDescriptor dn = it.next();
- boolean replicaCorrupt = ((nodesCorrupt != null) &&
- nodesCorrupt.contains(dn));
+ boolean replicaCorrupt = corruptReplicas.isReplicaCorrupt(blocks[curBlk], dn);
if (blockCorrupt || (!blockCorrupt && !replicaCorrupt))
machineSet[numNodes++] = dn;
}
@@ -1526,20 +1529,28 @@
" as corrupt because datanode " + dn.getName() +
" does not exist. ");
}
- // Check if the block is associated with an inode, if not
- // ignore the request for now. This could happen when BlockScanner
- // thread of Datanode reports bad block before Block reports are sent
- // by the Datanode on startup
- if (blocksMap.getINode(blk) == null)
+
+ if (!blocksMap.contains(blk, node)) {
+ // Check if the replica is in the blockMap, if not
+ // ignore the request for now. This could happen when BlockScanner
+ // thread of Datanode reports bad block before Block reports are sent
+ // by the Datanode on startup
NameNode.stateChangeLog.info("BLOCK NameSystem.markBlockAsCorrupt: " +
"block " + blk + " could not be marked " +
"as corrupt as it does not exists in " +
"blocksMap");
- else {
- // Add this replica to corruptReplicas Map and
- // add the block to neededReplication
- corruptReplicas.addToCorruptReplicasMap(blk, node);
- updateNeededReplications(blk, 0, 1);
+ } else {
+ INodeFile inode = blocksMap.getINode(blk);
+ assert inode!=null : (blk + " in blocksMap must belongs to a file.");
+ if (countNodes(blk).liveReplicas()>inode.getReplication()) {
+ // the block is over-replicated so invalidate the replicas immediately
+ invalidateBlock(blk, node);
+ } else {
+ // Add this replica to corruptReplicas Map and
+ // add the block to neededReplication
+ corruptReplicas.addToCorruptReplicasMap(blk, node);
+ updateNeededReplications(blk, -1, 0);
+ }
}
}
@@ -2921,8 +2932,14 @@
}
// If the file replication has reached desired value
// we can remove any corrupt replicas the block may have
- int corruptReplicasCount = num.corruptReplicas();
- if ((corruptReplicasCount > 0) && (numLiveReplicas == fileReplication))
+ int corruptReplicasCount = corruptReplicas.numCorruptReplicas(block);
+ int numCorruptNodes = num.corruptReplicas();
+ if ( numCorruptNodes != corruptReplicasCount) {
+ LOG.warn("Inconsistent number of corrupt replicas for " +
+ block + "blockMap has " + numCorruptNodes +
+ " but corrupt replicas map has " + corruptReplicasCount);
+ }
+ if ((corruptReplicasCount > 0) && (numLiveReplicas >= fileReplication))
invalidateCorruptReplicas(block);
return block;
}
@@ -3187,9 +3204,8 @@
excessReplicateMap.remove(node.getStorageID());
}
}
- // If block is removed from blocksMap, remove it from corruptReplicas
- if (fileINode == null)
- corruptReplicas.removeFromCorruptReplicasMap(block);
+ // Remove the replica from corruptReplicas
+ corruptReplicas.removeFromCorruptReplicasMap(block, node);
}
/**
Modified: hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestFileCorruption.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestFileCorruption.java?rev=704203&r1=704202&r2=704203&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestFileCorruption.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestFileCorruption.java Mon Oct 13 11:54:48 2008
@@ -19,12 +19,19 @@
package org.apache.hadoop.hdfs;
import java.io.*;
+import java.util.ArrayList;
+
import junit.framework.*;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.server.common.GenerationStamp;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
/**
* A JUnit test for corrupted file handling.
@@ -93,4 +100,81 @@
}
fs.delete(file, true);
}
+
+ /** Test the case that a replica is reported corrupt while it is not
+ * in blocksMap. Make sure that ArrayIndexOutOfBounds does not thrown.
+ * See Hadoop-4351.
+ */
+ public void testArrayOutOfBoundsException() throws Exception {
+ MiniDFSCluster cluster = null;
+ try {
+ Configuration conf = new Configuration();
+ cluster = new MiniDFSCluster(conf, 2, true, null);
+ cluster.waitActive();
+
+ FileSystem fs = cluster.getFileSystem();
+ final Path FILE_PATH = new Path("/tmp.txt");
+ final long FILE_LEN = 1L;
+ DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short)2, 1L);
+
+ // get the block
+ File dataDir = new File(cluster.getDataDirectory(),
+ "data1/current");
+ Block blk = getBlock(dataDir);
+ if (blk == null) {
+ blk = getBlock(new File(cluster.getDataDirectory(),
+ "dfs/data/data2/current"));
+ }
+ assertFalse(blk==null);
+
+ // start a third datanode
+ cluster.startDataNodes(conf, 1, true, null, null);
+ ArrayList<DataNode> datanodes = cluster.getDataNodes();
+ assertEquals(datanodes.size(), 3);
+ DataNode dataNode = datanodes.get(2);
+
+ // report corrupted block by the third datanode
+ cluster.getNameNode().namesystem.markBlockAsCorrupt(blk,
+ new DatanodeInfo(dataNode.dnRegistration ));
+
+ // open the file
+ fs.open(FILE_PATH);
+
+ //clean up
+ fs.delete(FILE_PATH, false);
+ } finally {
+ if (cluster != null) { cluster.shutdown(); }
+ }
+
+ }
+
+ private Block getBlock(File dataDir) {
+ assertTrue("data directory does not exist", dataDir.exists());
+ File[] blocks = dataDir.listFiles();
+ assertTrue("Blocks do not exist in dataDir", (blocks != null) && (blocks.length > 0));
+
+ int idx = 0;
+ String blockFileName = null;
+ for (; idx < blocks.length; idx++) {
+ blockFileName = blocks[idx].getName();
+ if (blockFileName.startsWith("blk_") && !blockFileName.endsWith(".meta")) {
+ break;
+ }
+ }
+ if (blockFileName == null) {
+ return null;
+ }
+ long blockId = Long.parseLong(blockFileName.substring("blk_".length()));
+ long blockTimeStamp = GenerationStamp.WILDCARD_STAMP;
+ for (idx=0; idx < blocks.length; idx++) {
+ String fileName = blocks[idx].getName();
+ if (fileName.startsWith(blockFileName) && fileName.endsWith(".meta")) {
+ int startIndex = blockFileName.length()+1;
+ int endIndex = fileName.length() - ".meta".length();
+ blockTimeStamp = Long.parseLong(fileName.substring(startIndex, endIndex));
+ break;
+ }
+ }
+ return new Block(blockId, blocks[idx].length(), blockTimeStamp);
+ }
}