You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cd...@apache.org on 2011/05/25 04:23:21 UTC

svn commit: r1127362 - in /hadoop/common/branches/branch-0.20-security: CHANGES.txt src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java

Author: cdouglas
Date: Wed May 25 02:23:20 2011
New Revision: 1127362

URL: http://svn.apache.org/viewvc?rev=1127362&view=rev
Log:
MAPREDUCE-2495. exit() the TaskTracker when the distributed cache cleanup
thread dies. Contributed by Robert Joseph Evans

Modified:
    hadoop/common/branches/branch-0.20-security/CHANGES.txt
    hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
    hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java

Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1127362&r1=1127361&r2=1127362&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Wed May 25 02:23:20 2011
@@ -35,6 +35,9 @@ Release 0.20.205.0 - unreleased
     MAPREDUCE-2490. Add logging to graylist and blacklist activity to aid
     diagnosis of related issues. (Jonathan Eagles via cdouglas)
 
+    MAPREDUCE-2495. exit() the TaskTracker when the distributed cache cleanup
+    thread dies. (Robert Joseph Evans via cdouglas)
+
 Release 0.20.204.0 - unreleased
 
   BUG FIXES

Modified: hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java?rev=1127362&r1=1127361&r2=1127362&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java Wed May 25 02:23:20 2011
@@ -93,8 +93,8 @@ public class TrackerDistributedCacheMana
   
   private static final Random random = new Random();
   
-  BaseDirManager baseDirManager = new BaseDirManager();
-  CleanupThread cleanupThread;
+  protected BaseDirManager baseDirManager = new BaseDirManager();
+  protected CleanupThread cleanupThread;
 
   public TrackerDistributedCacheManager(Configuration conf,
                                         TaskController controller
@@ -874,7 +874,7 @@ public class TrackerDistributedCacheMana
   /**
    * A thread to check and cleanup the unused files periodically
    */
-  private class CleanupThread extends Thread {
+  protected class CleanupThread extends Thread {
     // How often do we check if we need to clean up cache files?
     private long cleanUpCheckPeriod = 60000L; // 1 minute
     public CleanupThread(Configuration conf) {
@@ -882,6 +882,7 @@ public class TrackerDistributedCacheMana
         conf.getLong("mapreduce.tasktracker.distributedcache.checkperiod",
             cleanUpCheckPeriod);
     }
+
     private volatile boolean running = true;
     
     public void stopRunning() {
@@ -894,19 +895,33 @@ public class TrackerDistributedCacheMana
         try {
           Thread.sleep(cleanUpCheckPeriod);
           baseDirManager.checkAndCleanup();
-        } catch (Exception e) {
+        } catch (IOException e) {
           LOG.error("Exception in DistributedCache CleanupThread.", e);
-          // This thread should keep running and never crash.
+        } catch(InterruptedException e) {
+          LOG.info("Cleanup...",e); 
+          //To force us to exit cleanly
+          running = false;
+        } catch (Throwable t) {
+          exitTaskTracker(t);
         }
       }
     }
+    
+    /**
+     * Exit the task tracker because of a fatal error.
+     */
+    protected void exitTaskTracker(Throwable t) {
+      LOG.fatal("Distributed Cache cleanup thread received runtime exception." +
+      		" Exiting the TaskTracker", t);
+      Runtime.getRuntime().exit(-1);
+    }
   }
 
   /**
    * This class holds properties of each base directories and is responsible
    * for clean up unused cache files in base directories.
    */
-  private class BaseDirManager {
+  protected class BaseDirManager {
 
     // For holding the properties of each cache directory
     private class CacheDir {

Modified: hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java?rev=1127362&r1=1127361&r2=1127362&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/filecache/TestTrackerDistributedCacheManager.java Wed May 25 02:23:20 2011
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Random;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 
 import javax.security.auth.login.LoginException;
@@ -547,6 +548,62 @@ public class TestTrackerDistributedCache
     }
   }
   
+  public static class MyTrackerDistributedCacheManager 
+      extends TrackerDistributedCacheManager {
+
+    public Throwable caught = null;
+    public CountDownLatch done = new CountDownLatch(1);
+
+    
+    public MyTrackerDistributedCacheManager(Configuration conf,
+        TaskController controller) throws IOException {
+      super(conf, controller);
+      this.baseDirManager = new TrackerDistributedCacheManager.BaseDirManager() {
+        
+        @Override
+        void checkAndCleanup() throws IOException {
+          throw new RuntimeException("This is a test!!!!");
+        }
+      };
+      
+      this.cleanupThread = new TestCleanupThread(conf);
+    }
+      
+    class TestCleanupThread extends TrackerDistributedCacheManager.CleanupThread {
+      
+      public TestCleanupThread(Configuration conf) {
+        super(conf);
+      }
+
+      @Override
+      protected void exitTaskTracker(Throwable t) {
+        caught = t;
+        this.stopRunning();
+        done.countDown();
+      }        
+    }
+  }
+  
+  public void testRuntimeExceptionInCleanup() throws Exception {
+    if(!canRun()) {
+      return;
+    }
+    
+    Configuration conf2 = new Configuration(conf);
+    conf2.set("mapred.local.dir", ROOT_MAPRED_LOCAL_DIR.toString());
+    conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT);
+    conf2.setLong("mapreduce.tasktracker.distributedcache.checkperiod", 0); // 0 ms (Don't sleep)
+    
+    refreshConf(conf2);
+    MyTrackerDistributedCacheManager manager = 
+        new MyTrackerDistributedCacheManager(conf2, taskController);
+    manager.startCleanupThread();
+    
+    assertTrue(manager.done.await(200l, TimeUnit.MILLISECONDS));
+    assertNotNull(manager.caught);
+    assertTrue(manager.caught instanceof RuntimeException);
+  }
+  
   protected String getJobOwnerName() throws IOException {
     return UserGroupInformation.getLoginUser().getUserName();
   }