You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/05/02 23:37:22 UTC
svn commit: r534624 - in /lucene/hadoop/trunk: CHANGES.txt
src/java/org/apache/hadoop/dfs/FSNamesystem.java
Author: cutting
Date: Wed May 2 14:37:21 2007
New Revision: 534624
URL: http://svn.apache.org/viewvc?view=rev&rev=534624
Log:
HADOOP-1312. Fix a ConcurrentModificationException in NameNode that killed the heartbeat monitoring thread. Contributed by Dhruba.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=534624&r1=534623&r2=534624
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed May 2 14:37:21 2007
@@ -312,6 +312,10 @@
92. HADOOP-1308. Use generics to restrict types when classes are
passed as parameters to JobConf methods. (Michael Bieniosek via cutting)
+93. HADOOP-1312. Fix a ConcurrentModificationException in NameNode
+ that killed the heartbeat monitoring thread.
+ (Dhruba Borthakur via cutting)
+
Release 0.12.3 - 2007-04-06
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=534624&r1=534623&r2=534624
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Wed May 2 14:37:21 2007
@@ -1253,29 +1253,33 @@
******************************************************/
class LeaseMonitor implements Runnable {
public void run() {
- while (fsRunning) {
- synchronized (FSNamesystem.this) {
- synchronized (leases) {
- Lease top;
- while ((sortedLeases.size() > 0) &&
- ((top = sortedLeases.first()) != null)) {
- if (top.expiredHardLimit()) {
- top.releaseLocks();
- leases.remove(top.holder);
- LOG.info("Removing lease " + top + ", leases remaining: " + sortedLeases.size());
- if (!sortedLeases.remove(top)) {
- LOG.info("Unknown failure trying to remove " + top + " from lease set.");
+ try {
+ while (fsRunning) {
+ synchronized (FSNamesystem.this) {
+ synchronized (leases) {
+ Lease top;
+ while ((sortedLeases.size() > 0) &&
+ ((top = sortedLeases.first()) != null)) {
+ if (top.expiredHardLimit()) {
+ top.releaseLocks();
+ leases.remove(top.holder);
+ LOG.info("Removing lease " + top + ", leases remaining: " + sortedLeases.size());
+ if (!sortedLeases.remove(top)) {
+ LOG.info("Unknown failure trying to remove " + top + " from lease set.");
+ }
+ } else {
+ break;
}
- } else {
- break;
}
}
}
+ try {
+ Thread.sleep(2000);
+ } catch (InterruptedException ie) {
+ }
}
- try {
- Thread.sleep(2000);
- } catch (InterruptedException ie) {
- }
+ } catch (Exception e) {
+ FSNamesystem.LOG.error(StringUtils.stringifyException(e));
}
}
}
@@ -1636,7 +1640,11 @@
*/
public void run() {
while (fsRunning) {
- heartbeatCheck();
+ try {
+ heartbeatCheck();
+ } catch (Exception e) {
+ FSNamesystem.LOG.error(StringUtils.stringifyException(e));
+ }
try {
Thread.sleep(heartbeatRecheckInterval);
} catch (InterruptedException ie) {
@@ -1809,10 +1817,12 @@
* @author hairong
*/
private void removeDatanode(DatanodeDescriptor nodeInfo) {
- if (nodeInfo.isAlive) {
- updateStats(nodeInfo, false);
- heartbeats.remove(nodeInfo);
- nodeInfo.isAlive = false;
+ synchronized (heartbeats) {
+ if (nodeInfo.isAlive) {
+ updateStats(nodeInfo, false);
+ heartbeats.remove(nodeInfo);
+ nodeInfo.isAlive = false;
+ }
}
for (Iterator<Block> it = nodeInfo.getBlockIterator(); it.hasNext();) {