You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@eagle.apache.org by ha...@apache.org on 2016/12/13 10:59:32 UTC
incubator-eagle git commit: [EAGLE-839] add job diagnostics
Repository: incubator-eagle
Updated Branches:
refs/heads/master 87feb883c -> 84ceeb150
[EAGLE-839] add job diagnostics
Author: wujinhu <wu...@126.com>
Closes #735 from wujinhu/EAGLE-835.
Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/84ceeb15
Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/84ceeb15
Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/84ceeb15
Branch: refs/heads/master
Commit: 84ceeb150a2cdbc9a44be001fb5a86c4079644e1
Parents: 87feb88
Author: wujinhu <wu...@126.com>
Authored: Tue Dec 13 18:59:19 2016 +0800
Committer: Hao Chen <ha...@apache.org>
Committed: Tue Dec 13 18:59:19 2016 +0800
----------------------------------------------------------------------
.../mr/historyentity/JobExecutionAPIEntity.java | 12 +++----
.../mr/history/parser/JHFEventReaderBase.java | 36 +++++++++++++-------
.../mr/history/parser/JHFMRVer2EventReader.java | 3 ++
3 files changed, 33 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java b/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
index 0e40099..55233aa 100644
--- a/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
+++ b/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
@@ -94,7 +94,7 @@ public class JobExecutionAPIEntity extends JobBaseAPIEntity {
@Column("ad")
private String trackingUrl;
@Column("ae")
- private Map<String, Map<String, String>> failedTasks;
+ private String diagnostics;
public String getTrackingUrl() {
return trackingUrl;
@@ -348,12 +348,12 @@ public class JobExecutionAPIEntity extends JobBaseAPIEntity {
valueChanged("failedReduceAttempts");
}
- public Map<String, Map<String, String>> getFailedTasks() {
- return failedTasks;
+ public String getDiagnostics() {
+ return diagnostics;
}
- public void setFailedTasks(Map<String, Map<String, String>> failedTasks) {
- this.failedTasks = failedTasks;
- valueChanged("failedTasks");
+ public void setDiagnostics(String diagnostics) {
+ this.diagnostics = diagnostics;
+ valueChanged("diagnostics");
}
}
http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
index 80cdb1c..3a9e147 100644
--- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
@@ -18,6 +18,7 @@
package org.apache.eagle.jpm.mr.history.parser;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.eagle.jpm.mr.history.MRHistoryJobConfig;
import org.apache.eagle.jpm.mr.history.crawler.JobHistoryContentFilter;
import org.apache.eagle.jpm.mr.history.metrics.JobCounterMetricsGenerator;
@@ -60,6 +61,8 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
protected Map<String, String> taskRunningHosts;
// hostname to rack mapping
protected Map<String, String> host2RackMapping;
+ // taskattempt to error msg, attemptId, taskId, error
+ protected Map<String, Pair<String, String>> attempt2ErrorMsg;
protected String jobId;
protected String jobName;
@@ -105,11 +108,11 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
jobExecutionEntity.setTags(new HashMap<>(baseTags));
jobExecutionEntity.setNumFailedMaps(0);
jobExecutionEntity.setNumFailedReduces(0);
- jobExecutionEntity.setFailedTasks(new HashMap<>());
taskRunningHosts = new HashMap<>();
host2RackMapping = new HashMap<>();
+ attempt2ErrorMsg = new HashMap<>();
taskStartTime = new HashMap<>();
taskAttemptStartTime = new HashMap<>();
@@ -295,10 +298,28 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
jobExecutionEntity.setAvgReduceTaskDuration(this.sumReduceTaskDuration * 1.0 / numTotalReduces);
}
this.jobCounterMetricsGenerator.setBaseTags(jobExecutionEntity.getTags());
+
+ formatDiagnostics(values.get(Keys.DIAGNOSTICS));
+
entityCreated(jobExecutionEntity);
}
}
+ private void formatDiagnostics(String diagnostics) {
+ String formatDiagnostics = "";
+ if (diagnostics != null) {
+ for (String attemptId : attempt2ErrorMsg.keySet()) {
+ String taskId = attempt2ErrorMsg.get(attemptId).getLeft();
+ String error = attempt2ErrorMsg.get(attemptId).getRight();
+ if (diagnostics.contains(taskId)) {
+ formatDiagnostics = error;
+ break;
+ }
+ }
+ }
+ jobExecutionEntity.setDiagnostics(formatDiagnostics);
+ }
+
private void entityCreated(JobBaseAPIEntity entity) throws Exception {
for (HistoryJobEntityLifecycleListener lifecycleListener : this.jobEntityLifecycleListeners) {
lifecycleListener.jobEntityCreated(entity);
@@ -440,16 +461,7 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
}
entityCreated(entity);
- if (entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()) != null) {
- jobExecutionEntity.getFailedTasks().put(taskID,
- new HashMap<String, String>() {
- {
- put(entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()),
- entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()));//decide later
- }
- }
- );
- }
+ attempt2ErrorMsg.put(taskAttemptID, Pair.of(taskID, entity.getError()));
taskAttemptStartTime.remove(taskAttemptID);
} else {
// silently ignore
@@ -544,7 +556,7 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
ERROR, TASK_ATTEMPT_ID, TASK_STATUS, COPY_PHASE, SORT_PHASE, REDUCE_PHASE,
SHUFFLE_FINISHED, SORT_FINISHED, COUNTERS, SPLITS, JOB_PRIORITY, HTTP_PORT,
TRACKER_NAME, STATE_STRING, VERSION, MAP_COUNTERS, REDUCE_COUNTERS,
- VIEW_JOB, MODIFY_JOB, JOB_QUEUE, RACK,
+ VIEW_JOB, MODIFY_JOB, JOB_QUEUE, RACK, DIAGNOSTICS,
UBERISED, SPLIT_LOCATIONS, FAILED_DUE_TO_ATTEMPT, MAP_FINISH_TIME, PORT, RACK_NAME,
http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
index 6e0e3aa..8184f90 100644
--- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
@@ -255,6 +255,9 @@ public class JHFMRVer2EventReader extends JHFEventReaderBase {
if (js.getJobStatus() != null) {
values.put(Keys.JOB_STATUS, js.getJobStatus().toString());
}
+ if (js.getDiagnostics() != null) {
+ values.put(Keys.DIAGNOSTICS, js.getDiagnostics().toString());
+ }
handleJob(wrapper.getType(), values, null);
}