You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/05/02 23:37:22 UTC

svn commit: r534624 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/FSNamesystem.java

Author: cutting
Date: Wed May  2 14:37:21 2007
New Revision: 534624

URL: http://svn.apache.org/viewvc?view=rev&rev=534624
Log:
HADOOP-1312.  Fix a ConcurrentModificationException in NameNode that killed the heartbeat monitoring thread.  Contributed by Dhruba.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=534624&r1=534623&r2=534624
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed May  2 14:37:21 2007
@@ -312,6 +312,10 @@
 92. HADOOP-1308.  Use generics to restrict types when classes are
     passed as parameters to JobConf methods. (Michael Bieniosek via cutting)
 
+93. HADOOP-1312.  Fix a ConcurrentModificationException in NameNode
+    that killed the heartbeat monitoring thread.
+    (Dhruba Borthakur via cutting)
+
 
 Release 0.12.3 - 2007-04-06
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=534624&r1=534623&r2=534624
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Wed May  2 14:37:21 2007
@@ -1253,29 +1253,33 @@
    ******************************************************/
   class LeaseMonitor implements Runnable {
     public void run() {
-      while (fsRunning) {
-        synchronized (FSNamesystem.this) {
-          synchronized (leases) {
-            Lease top;
-            while ((sortedLeases.size() > 0) &&
-                   ((top = sortedLeases.first()) != null)) {
-              if (top.expiredHardLimit()) {
-                top.releaseLocks();
-                leases.remove(top.holder);
-                LOG.info("Removing lease " + top + ", leases remaining: " + sortedLeases.size());
-                if (!sortedLeases.remove(top)) {
-                  LOG.info("Unknown failure trying to remove " + top + " from lease set.");
+      try {
+        while (fsRunning) {
+          synchronized (FSNamesystem.this) {
+            synchronized (leases) {
+              Lease top;
+              while ((sortedLeases.size() > 0) &&
+                     ((top = sortedLeases.first()) != null)) {
+                if (top.expiredHardLimit()) {
+                  top.releaseLocks();
+                  leases.remove(top.holder);
+                  LOG.info("Removing lease " + top + ", leases remaining: " + sortedLeases.size());
+                  if (!sortedLeases.remove(top)) {
+                    LOG.info("Unknown failure trying to remove " + top + " from lease set.");
+                  }
+                } else {
+                  break;
                 }
-              } else {
-                break;
               }
             }
           }
+          try {
+            Thread.sleep(2000);
+          } catch (InterruptedException ie) {
+          }
         }
-        try {
-          Thread.sleep(2000);
-        } catch (InterruptedException ie) {
-        }
+      } catch (Exception e) {
+        FSNamesystem.LOG.error(StringUtils.stringifyException(e));
       }
     }
   }
@@ -1636,7 +1640,11 @@
      */
     public void run() {
       while (fsRunning) {
-        heartbeatCheck();
+        try {
+          heartbeatCheck();
+        } catch (Exception e) {
+          FSNamesystem.LOG.error(StringUtils.stringifyException(e));
+        }
         try {
           Thread.sleep(heartbeatRecheckInterval);
         } catch (InterruptedException ie) {
@@ -1809,10 +1817,12 @@
    * @author hairong
    */
   private void removeDatanode(DatanodeDescriptor nodeInfo) {
-    if (nodeInfo.isAlive) {
-      updateStats(nodeInfo, false);
-      heartbeats.remove(nodeInfo);
-      nodeInfo.isAlive = false;
+    synchronized (heartbeats) {
+      if (nodeInfo.isAlive) {
+        updateStats(nodeInfo, false);
+        heartbeats.remove(nodeInfo);
+        nodeInfo.isAlive = false;
+      }
     }
 
     for (Iterator<Block> it = nodeInfo.getBlockIterator(); it.hasNext();) {