You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/10/06 23:21:01 UTC

svn commit: r453776 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/DataNode.java

Author: cutting
Date: Fri Oct  6 14:21:00 2006
New Revision: 453776

URL: http://svn.apache.org/viewvc?view=rev&rev=453776
Log:
HADOOP-581.  Fix datanode to not reset itself on commmunications errors with namenode.  Contributed by Owen.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=453776&r1=453775&r2=453776
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Oct  6 14:21:00 2006
@@ -150,6 +150,12 @@
     This should improve some DFS namenode failure modes.
     (omalley via cutting)
 
+36. HADOOP-581.  Fix datanode to not reset itself on communications
+    errors with the namenode.  If a request to the namenode fails, the
+    datanode should retry, not restart.  This reduces the load on the
+    namenode, since restarts cause a resend of the block report.
+    (omalley via cutting)
+
 
 Release 0.6.2 - 2006-09-18
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?view=diff&rev=453776&r1=453775&r2=453776
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Fri Oct  6 14:21:00 2006
@@ -323,8 +323,8 @@
       // Now loop for a long time....
       //
 
-      try {
-        while (shouldRun) {
+      while (shouldRun) {
+        try {
           long now = System.currentTimeMillis();
 
           //
@@ -411,11 +411,15 @@
               // Send newly-received blockids to namenode
               //
               blockArray = (Block[]) receivedBlockList.toArray(new Block[receivedBlockList.size()]);
-              receivedBlockList.removeAllElements();
             }
           }
           if( blockArray != null ) {
             namenode.blockReceived( dnRegistration, blockArray );
+            synchronized (receivedBlockList) {
+              for(Block b: blockArray) {
+                receivedBlockList.remove(b);
+              }
+            }
           }
             
           //
@@ -431,19 +435,22 @@
               }
             }
           } // synchronized
-        } // while (shouldRun)
-      } catch(DiskErrorException e) {
-        handleDiskError(e.getLocalizedMessage());
-      } catch( RemoteException re ) {
-        String reClass = re.getClassName();
-        if( UnregisteredDatanodeException.class.getName().equals( reClass )) {
-          LOG.warn( "DataNode is shutting down: " + 
-                    StringUtils.stringifyException(re));
-          shutdown();
+        } catch(DiskErrorException e) {
+          handleDiskError(e.getLocalizedMessage());
           return;
+        } catch( RemoteException re ) {
+          String reClass = re.getClassName();
+          if( UnregisteredDatanodeException.class.getName().equals( reClass )) {
+            LOG.warn( "DataNode is shutting down: " + 
+                      StringUtils.stringifyException(re));
+            shutdown();
+            return;
+          }
+          LOG.warn(StringUtils.stringifyException(re));
+        } catch (IOException e) {
+          LOG.warn(StringUtils.stringifyException(e));
         }
-        throw re;
-      }
+      } // while (shouldRun)
     } // offerService
 
     
@@ -968,9 +975,8 @@
             try {
                 offerService();
             } catch (Exception ex) {
-                LOG.info("Exception: " + ex);
+              LOG.error("Exception: " + StringUtils.stringifyException(ex));
               if (shouldRun) {
-                LOG.info("Lost connection to namenode.  Retrying...");
                 try {
                   Thread.sleep(5000);
                 } catch (InterruptedException ie) {