You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sz...@apache.org on 2008/10/17 20:14:05 UTC
svn commit: r705691 - in /hadoop/core/trunk: CHANGES.txt
src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
Author: szetszwo
Date: Fri Oct 17 11:14:05 2008
New Revision: 705691
URL: http://svn.apache.org/viewvc?rev=705691&view=rev
Log:
HADOOP-4278. If the primary datanode fails in DFSClent, remove it from the pipe line. (dhruba via szetszwo)
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=705691&r1=705690&r2=705691&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Oct 17 11:14:05 2008
@@ -953,6 +953,9 @@
HADOOP-4427. Adds the new queue/job commands to the manual.
(Sreekanth Ramakrishnan via ddas)
+ HADOOP-4278. If the primary datanode fails in DFSClent, remove it from
+ the pipe line. (dhruba via szetszwo)
+
Release 0.18.2 - Unreleased
BUG FIXES
Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java?rev=705691&r1=705690&r2=705691&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java Fri Oct 17 11:14:05 2008
@@ -2448,17 +2448,34 @@
//
LocatedBlock newBlock = null;
ClientDatanodeProtocol primary = null;
+ DatanodeInfo primaryNode = null;
try {
// Pick the "least" datanode as the primary datanode to avoid deadlock.
- primary = createClientDatanodeProtocolProxy(
- Collections.min(Arrays.asList(newnodes)), conf);
+ primaryNode = Collections.min(Arrays.asList(newnodes));
+ primary = createClientDatanodeProtocolProxy(primaryNode, conf);
newBlock = primary.recoverBlock(block, newnodes);
} catch (IOException e) {
recoveryErrorCount++;
if (recoveryErrorCount > maxRecoveryErrorCount) {
+ if (nodes.length > 1) {
+ // if the primary datanode failed, remove it from the list.
+ // The original bad datanode is left in the list because it is
+ // conservative to remove only one datanode in one iteration.
+ for (int j = 0; j < nodes.length; j++) {
+ if (nodes[j] == primaryNode) {
+ errorIndex = j; // forget original bad node.
+ }
+ }
+ LOG.warn("Error Recovery for block " + block + " failed " +
+ " because recovery from primary datanode " +
+ primaryNode + " failed " + recoveryErrorCount +
+ " times. Marking primary datanode as bad.");
+ recoveryErrorCount = 0;
+ return true; // sleep when we return from here
+ }
String emsg = "Error Recovery for block " + block + " failed " +
" because recovery from primary datanode " +
- newnodes[0] + " failed " + recoveryErrorCount +
+ primaryNode + " failed " + recoveryErrorCount +
" times. Aborting...";
LOG.warn(emsg);
lastException = new IOException(emsg);
@@ -2468,7 +2485,7 @@
}
LOG.warn("Error Recovery for block " + block + " failed " +
" because recovery from primary datanode " +
- newnodes[0] + " failed " + recoveryErrorCount +
+ primaryNode + " failed " + recoveryErrorCount +
" times. Will retry...");
return true; // sleep when we return from here
} finally {