You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@eagle.apache.org by ha...@apache.org on 2016/12/13 10:59:32 UTC

incubator-eagle git commit: [EAGLE-839] add job diagnostics

Repository: incubator-eagle
Updated Branches:
  refs/heads/master 87feb883c -> 84ceeb150


[EAGLE-839] add job diagnostics

Author: wujinhu <wu...@126.com>

Closes #735 from wujinhu/EAGLE-835.


Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/84ceeb15
Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/84ceeb15
Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/84ceeb15

Branch: refs/heads/master
Commit: 84ceeb150a2cdbc9a44be001fb5a86c4079644e1
Parents: 87feb88
Author: wujinhu <wu...@126.com>
Authored: Tue Dec 13 18:59:19 2016 +0800
Committer: Hao Chen <ha...@apache.org>
Committed: Tue Dec 13 18:59:19 2016 +0800

----------------------------------------------------------------------
 .../mr/historyentity/JobExecutionAPIEntity.java | 12 +++----
 .../mr/history/parser/JHFEventReaderBase.java   | 36 +++++++++++++-------
 .../mr/history/parser/JHFMRVer2EventReader.java |  3 ++
 3 files changed, 33 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java b/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
index 0e40099..55233aa 100644
--- a/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
+++ b/eagle-jpm/eagle-jpm-entity/src/main/java/org/apache/eagle/jpm/mr/historyentity/JobExecutionAPIEntity.java
@@ -94,7 +94,7 @@ public class JobExecutionAPIEntity extends JobBaseAPIEntity {
     @Column("ad")
     private String trackingUrl;
     @Column("ae")
-    private Map<String, Map<String, String>> failedTasks;
+    private String diagnostics;
 
     public String getTrackingUrl() {
         return trackingUrl;
@@ -348,12 +348,12 @@ public class JobExecutionAPIEntity extends JobBaseAPIEntity {
         valueChanged("failedReduceAttempts");
     }
 
-    public Map<String, Map<String, String>> getFailedTasks() {
-        return failedTasks;
+    public String getDiagnostics() {
+        return diagnostics;
     }
 
-    public void setFailedTasks(Map<String, Map<String, String>> failedTasks) {
-        this.failedTasks = failedTasks;
-        valueChanged("failedTasks");
+    public void setDiagnostics(String diagnostics) {
+        this.diagnostics = diagnostics;
+        valueChanged("diagnostics");
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
index 80cdb1c..3a9e147 100644
--- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFEventReaderBase.java
@@ -18,6 +18,7 @@
 
 package org.apache.eagle.jpm.mr.history.parser;
 
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.eagle.jpm.mr.history.MRHistoryJobConfig;
 import org.apache.eagle.jpm.mr.history.crawler.JobHistoryContentFilter;
 import org.apache.eagle.jpm.mr.history.metrics.JobCounterMetricsGenerator;
@@ -60,6 +61,8 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
     protected Map<String, String> taskRunningHosts;
     // hostname to rack mapping
     protected Map<String, String> host2RackMapping;
+    // taskattempt to error msg, attemptId, taskId, error
+    protected Map<String, Pair<String, String>> attempt2ErrorMsg;
 
     protected String jobId;
     protected String jobName;
@@ -105,11 +108,11 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
         jobExecutionEntity.setTags(new HashMap<>(baseTags));
         jobExecutionEntity.setNumFailedMaps(0);
         jobExecutionEntity.setNumFailedReduces(0);
-        jobExecutionEntity.setFailedTasks(new HashMap<>());
 
         taskRunningHosts = new HashMap<>();
 
         host2RackMapping = new HashMap<>();
+        attempt2ErrorMsg = new HashMap<>();
 
         taskStartTime = new HashMap<>();
         taskAttemptStartTime = new HashMap<>();
@@ -295,10 +298,28 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
                 jobExecutionEntity.setAvgReduceTaskDuration(this.sumReduceTaskDuration * 1.0 / numTotalReduces);
             }
             this.jobCounterMetricsGenerator.setBaseTags(jobExecutionEntity.getTags());
+
+            formatDiagnostics(values.get(Keys.DIAGNOSTICS));
+
             entityCreated(jobExecutionEntity);
         }
     }
 
+    private void formatDiagnostics(String diagnostics) {
+        String formatDiagnostics = "";
+        if (diagnostics != null) {
+            for (String attemptId : attempt2ErrorMsg.keySet()) {
+                String taskId = attempt2ErrorMsg.get(attemptId).getLeft();
+                String error = attempt2ErrorMsg.get(attemptId).getRight();
+                if (diagnostics.contains(taskId)) {
+                    formatDiagnostics = error;
+                    break;
+                }
+            }
+        }
+        jobExecutionEntity.setDiagnostics(formatDiagnostics);
+    }
+
     private void entityCreated(JobBaseAPIEntity entity) throws Exception {
         for (HistoryJobEntityLifecycleListener lifecycleListener : this.jobEntityLifecycleListeners) {
             lifecycleListener.jobEntityCreated(entity);
@@ -440,16 +461,7 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
             }
 
             entityCreated(entity);
-            if (entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()) != null) {
-                jobExecutionEntity.getFailedTasks().put(taskID,
-                    new HashMap<String, String>() {
-                        {
-                            put(entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()),
-                                entity.getTags().get(MRJobTagName.ERROR_CATEGORY.toString()));//decide later
-                        }
-                    }
-                );
-            }
+            attempt2ErrorMsg.put(taskAttemptID, Pair.of(taskID, entity.getError()));
             taskAttemptStartTime.remove(taskAttemptID);
         } else {
             // silently ignore
@@ -544,7 +556,7 @@ public abstract class JHFEventReaderBase extends JobEntityCreationPublisher impl
         ERROR, TASK_ATTEMPT_ID, TASK_STATUS, COPY_PHASE, SORT_PHASE, REDUCE_PHASE,
         SHUFFLE_FINISHED, SORT_FINISHED, COUNTERS, SPLITS, JOB_PRIORITY, HTTP_PORT,
         TRACKER_NAME, STATE_STRING, VERSION, MAP_COUNTERS, REDUCE_COUNTERS,
-        VIEW_JOB, MODIFY_JOB, JOB_QUEUE, RACK,
+        VIEW_JOB, MODIFY_JOB, JOB_QUEUE, RACK, DIAGNOSTICS,
 
         UBERISED, SPLIT_LOCATIONS, FAILED_DUE_TO_ATTEMPT, MAP_FINISH_TIME, PORT, RACK_NAME,
 

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/84ceeb15/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
index 6e0e3aa..8184f90 100644
--- a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/parser/JHFMRVer2EventReader.java
@@ -255,6 +255,9 @@ public class JHFMRVer2EventReader extends JHFEventReaderBase {
         if (js.getJobStatus() != null) {
             values.put(Keys.JOB_STATUS, js.getJobStatus().toString());
         }
+        if (js.getDiagnostics() != null) {
+            values.put(Keys.DIAGNOSTICS, js.getDiagnostics().toString());
+        }
         handleJob(wrapper.getType(), values, null);
     }