You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/06/17 00:05:26 UTC
svn commit: r1136704 - in /hadoop/common/branches/branch-0.20-security-204:
./ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/
Author: omalley
Date: Thu Jun 16 22:05:25 2011
New Revision: 1136704
URL: http://svn.apache.org/viewvc?rev=1136704&view=rev
Log:
MAPREDUCE-2529. Add support for regex-based shuffle metric counting
exceptions. (Thomas Graves via cdouglas)
Added:
hadoop/common/branches/branch-0.20-security-204/src/test/org/apache/hadoop/mapred/TestShuffleExceptionCount.java
- copied unchanged from r1131737, hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/mapred/TestShuffleExceptionCount.java
Modified:
hadoop/common/branches/branch-0.20-security-204/CHANGES.txt (contents, props changed)
hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java
hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
Modified: hadoop/common/branches/branch-0.20-security-204/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-204/CHANGES.txt?rev=1136704&r1=1136703&r2=1136704&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-204/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security-204/CHANGES.txt Thu Jun 16 22:05:25 2011
@@ -86,6 +86,9 @@ Release 0.20.204.0 - unreleased
IMPROVEMENTS
+ MAPREDUCE-2529. Add support for regex-based shuffle metric counting
+ exceptions. (Thomas Graves via cdouglas)
+
HADOOP-7398. Suppress warnings about use of HADOOP_HOME. (omalley)
MAPREDUCE-2415. Distribute the user task logs on to multiple disks.
Propchange: hadoop/common/branches/branch-0.20-security-204/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jun 16 22:05:25 2011
@@ -1,5 +1,5 @@
/hadoop/common/branches/branch-0.20/CHANGES.txt:826138,826568,829987,831184,833001,880632,898713,909245,909723,960946,1044225
-/hadoop/common/branches/branch-0.20-security/CHANGES.txt:1097202,1098837,1100336,1134140
+/hadoop/common/branches/branch-0.20-security/CHANGES.txt:1097202,1098837,1100336,1131737,1134140
/hadoop/common/branches/branch-0.20-security-203/CHANGES.txt:1096071,1097012-1099333,1102071,1128115
/hadoop/common/branches/branch-0.20-security-205/CHANGES.txt:1133133,1133274,1133282
/hadoop/core/branches/branch-0.18/CHANGES.txt:727226
Modified: hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java?rev=1136704&r1=1136703&r2=1136704&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java (original)
+++ hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/ShuffleServerInstrumentation.java Thu Jun 16 22:05:25 2011
@@ -37,6 +37,8 @@ class ShuffleServerInstrumentation imple
registry.newCounter("shuffle_failed_outputs", "", 0);
final MetricMutableCounterInt successOutputs =
registry.newCounter("shuffle_success_outputs", "", 0);
+ final MetricMutableCounterInt exceptionsCaught =
+ registry.newCounter("shuffle_exceptions_caught", "", 0);
ShuffleServerInstrumentation(TaskTracker tt) {
ttWorkerThreads = tt.workerThreads;
@@ -69,6 +71,12 @@ class ShuffleServerInstrumentation imple
successOutputs.incr();
}
+ //@Override
+ void exceptionsCaught() {
+ exceptionsCaught.incr();
+ }
+
+
@Override
public void getMetrics(MetricsBuilder builder, boolean all) {
MetricsRecordBuilder rb = builder.addRecord(registry.name());
Modified: hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java?rev=1136704&r1=1136703&r2=1136704&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ hadoop/common/branches/branch-0.20-security-204/src/mapred/org/apache/hadoop/mapred/TaskTracker.java Thu Jun 16 22:05:25 2011
@@ -1466,6 +1466,15 @@ public class TaskTracker implements MRCo
server.setAttribute("log", LOG);
server.setAttribute("localDirAllocator", localDirAllocator);
server.setAttribute("shuffleServerMetrics", shuffleServerMetrics);
+
+ String exceptionStackRegex =
+ conf.get("mapreduce.reduce.shuffle.catch.exception.stack.regex");
+ String exceptionMsgRegex =
+ conf.get("mapreduce.reduce.shuffle.catch.exception.message.regex");
+
+ server.setAttribute("exceptionStackRegex", exceptionStackRegex);
+ server.setAttribute("exceptionMsgRegex", exceptionMsgRegex);
+
server.addInternalServlet("mapOutput", "/mapOutput", MapOutputServlet.class);
server.addServlet("taskLog", "/tasklog", TaskLogServlet.class);
server.start();
@@ -3673,6 +3682,10 @@ public class TaskTracker implements MRCo
(ShuffleServerInstrumentation) context.getAttribute("shuffleServerMetrics");
TaskTracker tracker =
(TaskTracker) context.getAttribute("task.tracker");
+ String exceptionStackRegex =
+ (String) context.getAttribute("exceptionStackRegex");
+ String exceptionMsgRegex =
+ (String) context.getAttribute("exceptionMsgRegex");
verifyRequest(request, response, tracker, jobId);
@@ -3778,12 +3791,14 @@ public class TaskTracker implements MRCo
" from map: " + mapId + " given " + info.partLength + "/" +
info.rawLength);
}
+
} catch (IOException ie) {
Log log = (Log) context.getAttribute("log");
String errorMsg = ("getMapOutput(" + mapId + "," + reduceId +
") failed :\n"+
StringUtils.stringifyException(ie));
log.warn(errorMsg);
+ checkException(ie, exceptionMsgRegex, exceptionStackRegex, shuffleMetrics);
if (isInputException) {
tracker.mapOutputLost(TaskAttemptID.forName(mapId), errorMsg);
}
@@ -3807,6 +3822,38 @@ public class TaskTracker implements MRCo
shuffleMetrics.successOutput();
}
+ protected void checkException(IOException ie, String exceptionMsgRegex,
+ String exceptionStackRegex, ShuffleServerInstrumentation shuffleMetrics) {
+ // parse exception to see if it looks like a regular expression you
+ // configure. If both msgRegex and StackRegex set then make sure both
+ // match, otherwise only the one set has to match.
+ if (exceptionMsgRegex != null) {
+ String msg = ie.getMessage();
+ if (msg == null || !msg.matches(exceptionMsgRegex)) {
+ return;
+ }
+ }
+ if (exceptionStackRegex != null
+ && !checkStackException(ie, exceptionStackRegex)) {
+ return;
+ }
+ shuffleMetrics.exceptionsCaught();
+ }
+
+ private boolean checkStackException(IOException ie,
+ String exceptionStackRegex) {
+ StackTraceElement[] stack = ie.getStackTrace();
+
+ for (StackTraceElement elem : stack) {
+ String stacktrace = elem.toString();
+ if (stacktrace.matches(exceptionStackRegex)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
/**
* verify that request has correct HASH for the url
* and also add a field to reply header with hash of the HASH