You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/06/17 00:05:26 UTC

svn commit: r1136704 - in /hadoop/common/branches/branch-0.20-security-204: ./ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/

Author: omalley
Date: Thu Jun 16 22:05:25 2011
New Revision: 1136704

URL: http://svn.apache.org/viewvc?rev=1136704&view=rev
Log:
MAPREDUCE-2529. Add support for regex-based shuffle metric counting
exceptions. (Thomas Graves via cdouglas)

Added:
    hadoop/common/branches/branch-0.20-security-204/src/test/org/apache/hadoop/mapred/TestShuffleExceptionCount.java
      - copied unchanged from r1131737, hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestShuffleExceptionCount.java
Modified:
    hadoop/common/branches/branch-0.20-security-204/CHANGES.txt   (contents, props changed)
    hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java
    hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java

Modified: hadoop/common/branches/branch-0.20-security-204/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-204/CHANGES.txt?rev=1136704&r1=1136703&r2=1136704&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-204/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security-204/CHANGES.txt Thu Jun 16 22:05:25 2011
@@ -86,6 +86,9 @@ Release 0.20.204.0 - unreleased
 
   IMPROVEMENTS
 
+    MAPREDUCE-2529. Add support for regex-based shuffle metric counting
+    exceptions. (Thomas Graves via cdouglas)
+
     HADOOP-7398. Suppress warnings about use of HADOOP_HOME. (omalley)
 
     MAPREDUCE-2415. Distribute the user task logs on to multiple disks.

Propchange: hadoop/common/branches/branch-0.20-security-204/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jun 16 22:05:25 2011
@@ -1,5 +1,5 @@
 /hadoop/common/branches/branch-0.20/CHANGES.txt:826138,826568,829987,831184,833001,880632,898713,909245,909723,960946,1044225
-/hadoop/common/branches/branch-0.20-security/CHANGES.txt:1097202,1098837,1100336,1134140
+/hadoop/common/branches/branch-0.20-security/CHANGES.txt:1097202,1098837,1100336,1131737,1134140
 /hadoop/common/branches/branch-0.20-security-203/CHANGES.txt:1096071,1097012-1099333,1102071,1128115
 /hadoop/common/branches/branch-0.20-security-205/CHANGES.txt:1133133,1133274,1133282
 /hadoop/core/branches/branch-0.18/CHANGES.txt:727226

Modified: hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java?rev=1136704&r1=1136703&r2=1136704&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java (original)
+++ hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java Thu Jun 16 22:05:25 2011
@@ -37,6 +37,8 @@ class ShuffleServerInstrumentation imple
       registry.newCounter("shuffle_failed_outputs", "", 0);
   final MetricMutableCounterInt successOutputs =
       registry.newCounter("shuffle_success_outputs", "", 0);
+  final MetricMutableCounterInt exceptionsCaught =
+    registry.newCounter("shuffle_exceptions_caught", "", 0);
 
   ShuffleServerInstrumentation(TaskTracker tt) {
     ttWorkerThreads = tt.workerThreads;
@@ -69,6 +71,12 @@ class ShuffleServerInstrumentation imple
     successOutputs.incr();
   }
 
+  //@Override
+  void exceptionsCaught() {
+    exceptionsCaught.incr();
+  }
+
+
   @Override
   public void getMetrics(MetricsBuilder builder, boolean all) {
     MetricsRecordBuilder rb = builder.addRecord(registry.name());

Modified: hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=1136704&r1=1136703&r2=1136704&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Thu Jun 16 22:05:25 2011
@@ -1466,6 +1466,15 @@ public class TaskTracker implements MRCo
     server.setAttribute("log", LOG);
     server.setAttribute("localDirAllocator", localDirAllocator);
     server.setAttribute("shuffleServerMetrics", shuffleServerMetrics);
+
+    String exceptionStackRegex =
+      conf.get("mapreduce.reduce.shuffle.catch.exception.stack.regex");
+    String exceptionMsgRegex =
+      conf.get("mapreduce.reduce.shuffle.catch.exception.message.regex");
+
+    server.setAttribute("exceptionStackRegex", exceptionStackRegex);
+    server.setAttribute("exceptionMsgRegex", exceptionMsgRegex);
+
     server.addInternalServlet("mapOutput", "/mapOutput", MapOutputServlet.class);
     server.addServlet("taskLog", "/tasklog", TaskLogServlet.class);
     server.start();
@@ -3673,6 +3682,10 @@ public class TaskTracker implements MRCo
         (ShuffleServerInstrumentation) context.getAttribute("shuffleServerMetrics");
       TaskTracker tracker = 
         (TaskTracker) context.getAttribute("task.tracker");
+      String exceptionStackRegex =
+        (String) context.getAttribute("exceptionStackRegex");
+      String exceptionMsgRegex =
+        (String) context.getAttribute("exceptionMsgRegex");
 
       verifyRequest(request, response, tracker, jobId);
 
@@ -3778,12 +3791,14 @@ public class TaskTracker implements MRCo
                  " from map: " + mapId + " given " + info.partLength + "/" + 
                  info.rawLength);
         }
+
       } catch (IOException ie) {
         Log log = (Log) context.getAttribute("log");
         String errorMsg = ("getMapOutput(" + mapId + "," + reduceId + 
                            ") failed :\n"+
                            StringUtils.stringifyException(ie));
         log.warn(errorMsg);
+        checkException(ie, exceptionMsgRegex, exceptionStackRegex, shuffleMetrics);
         if (isInputException) {
           tracker.mapOutputLost(TaskAttemptID.forName(mapId), errorMsg);
         }
@@ -3807,6 +3822,38 @@ public class TaskTracker implements MRCo
       shuffleMetrics.successOutput();
     }
     
+    protected void checkException(IOException ie, String exceptionMsgRegex,
+        String exceptionStackRegex, ShuffleServerInstrumentation shuffleMetrics) {
+      // parse exception to see if it looks like a regular expression you
+      // configure. If both msgRegex and StackRegex set then make sure both
+      // match, otherwise only the one set has to match.
+      if (exceptionMsgRegex != null) {
+        String msg = ie.getMessage();
+        if (msg == null || !msg.matches(exceptionMsgRegex)) {
+          return;
+        }
+      }
+      if (exceptionStackRegex != null
+          && !checkStackException(ie, exceptionStackRegex)) {
+        return;
+      }
+      shuffleMetrics.exceptionsCaught();
+    }
+
+    private boolean checkStackException(IOException ie,
+        String exceptionStackRegex) {
+      StackTraceElement[] stack = ie.getStackTrace();
+
+      for (StackTraceElement elem : stack) {
+        String stacktrace = elem.toString();
+        if (stacktrace.matches(exceptionStackRegex)) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+
     /**
      * verify that request has correct HASH for the url
      * and also add a field to reply header with hash of the HASH