You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cd...@apache.org on 2011/05/25 04:23:21 UTC
svn commit: r1127362 - in /hadoop/common/branches/branch-0.20-security:
CHANGES.txt
src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java
Author: cdouglas
Date: Wed May 25 02:23:20 2011
New Revision: 1127362
URL: http://svn.apache.org/viewvc?rev=1127362&view=rev
Log:
MAPREDUCE-2495. exit() the TaskTracker when the distributed cache cleanup
thread dies. Contributed by Robert Joseph Evans
Modified:
hadoop/common/branches/branch-0.20-security/CHANGES.txt
hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java
Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1127362&r1=1127361&r2=1127362&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Wed May 25 02:23:20 2011
@@ -35,6 +35,9 @@ Release 0.20.205.0 - unreleased
MAPREDUCE-2490. Add logging to graylist and blacklist activity to aid
diagnosis of related issues. (Jonathan Eagles via cdouglas)
+ MAPREDUCE-2495. exit() the TaskTracker when the distributed cache cleanup
+ thread dies. (Robert Joseph Evans via cdouglas)
+
Release 0.20.204.0 - unreleased
BUG FIXES
Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java?rev=1127362&r1=1127361&r2=1127362&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java Wed May 25 02:23:20 2011
@@ -93,8 +93,8 @@ public class TrackerDistributedCacheMana
private static final Random random = new Random();
- BaseDirManager baseDirManager = new BaseDirManager();
- CleanupThread cleanupThread;
+ protected BaseDirManager baseDirManager = new BaseDirManager();
+ protected CleanupThread cleanupThread;
public TrackerDistributedCacheManager(Configuration conf,
TaskController controller
@@ -874,7 +874,7 @@ public class TrackerDistributedCacheMana
/**
* A thread to check and cleanup the unused files periodically
*/
- private class CleanupThread extends Thread {
+ protected class CleanupThread extends Thread {
// How often do we check if we need to clean up cache files?
private long cleanUpCheckPeriod = 60000L; // 1 minute
public CleanupThread(Configuration conf) {
@@ -882,6 +882,7 @@ public class TrackerDistributedCacheMana
conf.getLong("mapreduce.tasktracker.distributedcache.checkperiod",
cleanUpCheckPeriod);
}
+
private volatile boolean running = true;
public void stopRunning() {
@@ -894,19 +895,33 @@ public class TrackerDistributedCacheMana
try {
Thread.sleep(cleanUpCheckPeriod);
baseDirManager.checkAndCleanup();
- } catch (Exception e) {
+ } catch (IOException e) {
LOG.error("Exception in DistributedCache CleanupThread.", e);
- // This thread should keep running and never crash.
+ } catch(InterruptedException e) {
+ LOG.info("Cleanup...",e);
+ //To force us to exit cleanly
+ running = false;
+ } catch (Throwable t) {
+ exitTaskTracker(t);
}
}
}
+
+ /**
+ * Exit the task tracker because of a fatal error.
+ */
+ protected void exitTaskTracker(Throwable t) {
+ LOG.fatal("Distributed Cache cleanup thread received runtime exception." +
+ " Exiting the TaskTracker", t);
+ Runtime.getRuntime().exit(-1);
+ }
}
/**
* This class holds properties of each base directories and is responsible
* for clean up unused cache files in base directories.
*/
- private class BaseDirManager {
+ protected class BaseDirManager {
// For holding the properties of each cache directory
private class CacheDir {
Modified: hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java?rev=1127362&r1=1127361&r2=1127362&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java Wed May 25 02:23:20 2011
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Random;
+import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import javax.security.auth.login.LoginException;
@@ -547,6 +548,62 @@ public class TestTrackerDistributedCache
}
}
+ public static class MyTrackerDistributedCacheManager
+ extends TrackerDistributedCacheManager {
+
+ public Throwable caught = null;
+ public CountDownLatch done = new CountDownLatch(1);
+
+
+ public MyTrackerDistributedCacheManager(Configuration conf,
+ TaskController controller) throws IOException {
+ super(conf, controller);
+ this.baseDirManager = new TrackerDistributedCacheManager.BaseDirManager() {
+
+ @Override
+ void checkAndCleanup() throws IOException {
+ throw new RuntimeException("This is a test!!!!");
+ }
+ };
+
+ this.cleanupThread = new TestCleanupThread(conf);
+ }
+
+ class TestCleanupThread extends TrackerDistributedCacheManager.CleanupThread {
+
+ public TestCleanupThread(Configuration conf) {
+ super(conf);
+ }
+
+ @Override
+ protected void exitTaskTracker(Throwable t) {
+ caught = t;
+ this.stopRunning();
+ done.countDown();
+ }
+ }
+ }
+
+ public void testRuntimeExceptionInCleanup() throws Exception {
+ if(!canRun()) {
+ return;
+ }
+
+ Configuration conf2 = new Configuration(conf);
+ conf2.set("mapred.local.dir", ROOT_MAPRED_LOCAL_DIR.toString());
+ conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT);
+ conf2.setLong("mapreduce.tasktracker.distributedcache.checkperiod", 0); // 0 ms (Don't sleep)
+
+ refreshConf(conf2);
+ MyTrackerDistributedCacheManager manager =
+ new MyTrackerDistributedCacheManager(conf2, taskController);
+ manager.startCleanupThread();
+
+ assertTrue(manager.done.await(200l, TimeUnit.MILLISECONDS));
+ assertNotNull(manager.caught);
+ assertTrue(manager.caught instanceof RuntimeException);
+ }
+
protected String getJobOwnerName() throws IOException {
return UserGroupInformation.getLoginUser().getUserName();
}