You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/10/06 23:21:01 UTC
svn commit: r453776 - in /lucene/hadoop/trunk: CHANGES.txt
src/java/org/apache/hadoop/dfs/DataNode.java
Author: cutting
Date: Fri Oct 6 14:21:00 2006
New Revision: 453776
URL: http://svn.apache.org/viewvc?view=rev&rev=453776
Log:
HADOOP-581. Fix datanode to not reset itself on commmunications errors with namenode. Contributed by Owen.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=453776&r1=453775&r2=453776
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Oct 6 14:21:00 2006
@@ -150,6 +150,12 @@
This should improve some DFS namenode failure modes.
(omalley via cutting)
+36. HADOOP-581. Fix datanode to not reset itself on communications
+ errors with the namenode. If a request to the namenode fails, the
+ datanode should retry, not restart. This reduces the load on the
+ namenode, since restarts cause a resend of the block report.
+ (omalley via cutting)
+
Release 0.6.2 - 2006-09-18
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?view=diff&rev=453776&r1=453775&r2=453776
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Fri Oct 6 14:21:00 2006
@@ -323,8 +323,8 @@
// Now loop for a long time....
//
- try {
- while (shouldRun) {
+ while (shouldRun) {
+ try {
long now = System.currentTimeMillis();
//
@@ -411,11 +411,15 @@
// Send newly-received blockids to namenode
//
blockArray = (Block[]) receivedBlockList.toArray(new Block[receivedBlockList.size()]);
- receivedBlockList.removeAllElements();
}
}
if( blockArray != null ) {
namenode.blockReceived( dnRegistration, blockArray );
+ synchronized (receivedBlockList) {
+ for(Block b: blockArray) {
+ receivedBlockList.remove(b);
+ }
+ }
}
//
@@ -431,19 +435,22 @@
}
}
} // synchronized
- } // while (shouldRun)
- } catch(DiskErrorException e) {
- handleDiskError(e.getLocalizedMessage());
- } catch( RemoteException re ) {
- String reClass = re.getClassName();
- if( UnregisteredDatanodeException.class.getName().equals( reClass )) {
- LOG.warn( "DataNode is shutting down: " +
- StringUtils.stringifyException(re));
- shutdown();
+ } catch(DiskErrorException e) {
+ handleDiskError(e.getLocalizedMessage());
return;
+ } catch( RemoteException re ) {
+ String reClass = re.getClassName();
+ if( UnregisteredDatanodeException.class.getName().equals( reClass )) {
+ LOG.warn( "DataNode is shutting down: " +
+ StringUtils.stringifyException(re));
+ shutdown();
+ return;
+ }
+ LOG.warn(StringUtils.stringifyException(re));
+ } catch (IOException e) {
+ LOG.warn(StringUtils.stringifyException(e));
}
- throw re;
- }
+ } // while (shouldRun)
} // offerService
@@ -968,9 +975,8 @@
try {
offerService();
} catch (Exception ex) {
- LOG.info("Exception: " + ex);
+ LOG.error("Exception: " + StringUtils.stringifyException(ex));
if (shouldRun) {
- LOG.info("Lost connection to namenode. Retrying...");
try {
Thread.sleep(5000);
} catch (InterruptedException ie) {