You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by na...@apache.org on 2015/10/19 23:07:22 UTC

samza git commit: SAMZA-368 - AM UI should show failed container information

Repository: samza
Updated Branches:
  refs/heads/master 91de984ab -> f7f237e93


SAMZA-368 - AM UI should show failed container information


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/f7f237e9
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/f7f237e9
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/f7f237e9

Branch: refs/heads/master
Commit: f7f237e939f0dabe0cd5130768096083a2b4a543
Parents: 91de984
Author: Aleksandar Bircakovic <a....@levi9.com>
Authored: Mon Oct 19 13:51:57 2015 -0700
Committer: Navina <na...@gmail.com>
Committed: Mon Oct 19 13:51:57 2015 -0700

----------------------------------------------------------------------
 .../org/apache/samza/job/yarn/SamzaAppState.java    |  6 ++++++
 .../org/apache/samza/job/yarn/SamzaTaskManager.java |  1 +
 .../resources/scalate/WEB-INF/views/index.scaml     | 16 ++++++++++++++++
 3 files changed, 23 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/f7f237e9/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
index d5be36e..3df927e 100644
--- a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
+++ b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.samza.coordinator.JobCoordinator;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
 
 import java.net.URL;
 import java.util.HashSet;
@@ -119,6 +120,11 @@ public class SamzaAppState {
   public AtomicInteger releasedContainers = new AtomicInteger(0);
 
   /**
+   * ContainerStatus of failed containers.
+   */
+  public ConcurrentMap<String, ContainerStatus> failedContainersStatus = new ConcurrentHashMap<String, ContainerStatus>();
+
+  /**
    * Number of containers configured for the job
    */
   public int containerCount = 0;

http://git-wip-us.apache.org/repos/asf/samza/blob/f7f237e9/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
index d17ffe0..a3562a1 100644
--- a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
+++ b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
@@ -204,6 +204,7 @@ class SamzaTaskManager implements YarnAppMasterListener {
         log.info("Container " + containerIdStr + " failed with exit code " + exitStatus + " - " + containerStatus.getDiagnostics());
 
         state.failedContainers.incrementAndGet();
+        state.failedContainersStatus.put(containerIdStr, containerStatus);
         state.jobHealthy.set(false);
 
         if(containerId != -1) {

http://git-wip-us.apache.org/repos/asf/samza/blob/f7f237e9/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml b/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
index 2d16fe0..93660c7 100644
--- a/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
+++ b/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
@@ -139,6 +139,22 @@
                 Ordinary: #{state.jobCoordinator.jobModel.getContainerToHostValue(containerId, org.apache.samza.coordinator.stream.messages.SetContainerHostMapping.JMX_URL_KEY)}
                 Tunneling: #{state.jobCoordinator.jobModel.getContainerToHostValue(containerId, org.apache.samza.coordinator.stream.messages.SetContainerHostMapping.JMX_TUNNELING_URL_KEY)}
 
+      %h2 Failed Containers
+      %table.table.table-striped.table-bordered.tablesorter#containers-table
+        %thead
+          %tr
+            %th Container
+            %th Exit code
+            %th Message
+        %tbody
+          - for((containerId, containerStatus) <- state.failedContainersStatus)
+            %tr
+              %td
+                #{containerId}
+              %td
+                Exit code: #{containerStatus.getExitStatus}
+              %td
+                %div.value= containerStatus.getDiagnostics
 
     %div.tab-pane#task-groups
       %h2 Task Groups