You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by iv...@apache.org on 2013/10/11 07:45:57 UTC

svn commit: r1531200 - in /hadoop/common/branches/branch-1: ./ src/core/org/apache/hadoop/http/ src/mapred/org/apache/hadoop/filecache/ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/

Author: ivanmi
Date: Fri Oct 11 05:45:56 2013
New Revision: 1531200

URL: http://svn.apache.org/r1531200
Log:
MAPREDUCE-5512. TaskTracker hung after failed reconnect to the JobTracker. Contributed by Ivan Mitic.

Added:
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/TestMiniMRDaemonThreads.java
Modified:
    hadoop/common/branches/branch-1/CHANGES.txt
    hadoop/common/branches/branch-1/src/core/org/apache/hadoop/http/HttpServer.java
    hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
    hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/mapred/JobEndNotifier.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/MiniMRCluster.java

Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1531200&r1=1531199&r2=1531200&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Fri Oct 11 05:45:56 2013
@@ -151,6 +151,9 @@ Release 1.3.0 - unreleased
     MAPREDUCE-5569. FloatSplitter is not generating correct splits (Nathan
     Roberts via jlowe)
 
+    MAPREDUCE-5512. TaskTracker hung after failed reconnect to the JobTracker.
+    (ivanmi)
+
 Release 1.2.2 - unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-1/src/core/org/apache/hadoop/http/HttpServer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/core/org/apache/hadoop/http/HttpServer.java?rev=1531200&r1=1531199&r2=1531200&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/core/org/apache/hadoop/http/HttpServer.java (original)
+++ hadoop/common/branches/branch-1/src/core/org/apache/hadoop/http/HttpServer.java Fri Oct 11 05:45:56 2013
@@ -160,7 +160,9 @@ public class HttpServer implements Filte
     
     webServer.addConnector(listener);
 
-    webServer.setThreadPool(new QueuedThreadPool());
+    QueuedThreadPool threadPool = new QueuedThreadPool();
+    threadPool.setName("httpServerThreadPool");
+    webServer.setThreadPool(threadPool);
 
     final String appDir = getWebAppsPath();
     ContextHandlerCollection contexts = new ContextHandlerCollection();

Modified: hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java?rev=1531200&r1=1531199&r2=1531200&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/filecache/TrackerDistributedCacheManager.java Fri Oct 11 05:45:56 2013
@@ -125,6 +125,8 @@ public class TrackerDistributedCacheMana
 
     this.taskController = controller;
     this.cleanupThread = new CleanupThread(conf);
+    this.cleanupThread.setName("distCacheManagerCleanupThread");
+    this.cleanupThread.setDaemon(true);
   }
 
   /**

Modified: hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/mapred/JobEndNotifier.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/mapred/JobEndNotifier.java?rev=1531200&r1=1531199&r2=1531200&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/mapred/JobEndNotifier.java (original)
+++ hadoop/common/branches/branch-1/src/mapred/org/apache/hadoop/mapred/JobEndNotifier.java Fri Oct 11 05:45:56 2013
@@ -83,7 +83,7 @@ public class JobEndNotifier {
               LOG.error("Notification failure [" + notification + "]", ex);
             }
           }
-        });
+        }, "jobEndNotifier");
     thread.start();
   }
 

Modified: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/MiniMRCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/MiniMRCluster.java?rev=1531200&r1=1531199&r2=1531200&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/MiniMRCluster.java (original)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/MiniMRCluster.java Fri Oct 11 05:45:56 2013
@@ -617,7 +617,7 @@ public class MiniMRCluster {
   void startJobTracker(boolean wait) {
     //  Create the JobTracker
     jobTracker = new JobTrackerRunner(conf);
-    jobTrackerThread = new Thread(jobTracker);
+    jobTrackerThread = new Thread(jobTracker, "jobTrackerMain");
         
     jobTrackerThread.start();
     
@@ -712,7 +712,7 @@ public class MiniMRCluster {
    * Add a tasktracker to the Mini-MR cluster.
    */
   void addTaskTracker(TaskTrackerRunner taskTracker) {
-    Thread taskTrackerThread = new Thread(taskTracker);
+    Thread taskTrackerThread = new Thread(taskTracker, "taskTrackerMain");
     taskTrackerList.add(taskTracker);
     taskTrackerThreadList.add(taskTrackerThread);
     taskTrackerThread.start();

Added: hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/TestMiniMRDaemonThreads.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/TestMiniMRDaemonThreads.java?rev=1531200&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/TestMiniMRDaemonThreads.java (added)
+++ hadoop/common/branches/branch-1/src/test/org/apache/hadoop/mapred/TestMiniMRDaemonThreads.java Fri Oct 11 05:45:56 2013
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+import java.util.Set;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Validate the list of non-daemon threads running in the JobTracker
+ * and TaskTracker. Each non-daemon thread is a possible candidate for
+ * the service hang, so please make sure to validate all failure
+ * codepaths of a new non-daemon thread before making a change
+ * to this test.
+ */
+public class TestMiniMRDaemonThreads {
+
+  static String[] whitelistedDaemonKeys = {
+    "jobTrackerMain",
+    "httpServerThreadPool",
+    "taskTrackerMain",
+    "expireTrackers",
+    "jobEndNotifier",
+    "ReaderThread",
+    "Socket Reader",
+    "expireLaunchingTasks",
+    "retireJobs"
+    };
+
+  @Test
+  public void testValidateDaemonThreads() throws IOException, InterruptedException {
+    MiniMRCluster mr = null;
+    try {
+      JobConf jtConf = new JobConf();
+      mr = new MiniMRCluster(1, "file:///", 1, null, null, jtConf);
+      // Give it one second for all threads to start up
+      Thread.sleep(1000);
+
+      Set<Thread> threadSet = Thread.getAllStackTraces().keySet();
+      for (Thread t : threadSet) {
+        if (!t.isDaemon() && !Thread.currentThread().equals(t)) {
+          boolean found = false;
+          for(String key : whitelistedDaemonKeys) {
+            if (t.getName().contains(key)) {
+              found = true;
+              break;
+            }
+          }
+          if (!found) {
+            Assert.fail(
+                String.format("Thread '%s' is not a whitelisted daemon thread",
+                    t.getName()));
+          }
+        }
+      }
+
+    } finally {
+      if (mr != null) {
+        mr.shutdown();
+      }
+    }
+  }
+}