You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by na...@apache.org on 2015/10/19 23:07:22 UTC
samza git commit: SAMZA-368 - AM UI should show failed container
information
Repository: samza
Updated Branches:
refs/heads/master 91de984ab -> f7f237e93
SAMZA-368 - AM UI should show failed container information
Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/f7f237e9
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/f7f237e9
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/f7f237e9
Branch: refs/heads/master
Commit: f7f237e939f0dabe0cd5130768096083a2b4a543
Parents: 91de984
Author: Aleksandar Bircakovic <a....@levi9.com>
Authored: Mon Oct 19 13:51:57 2015 -0700
Committer: Navina <na...@gmail.com>
Committed: Mon Oct 19 13:51:57 2015 -0700
----------------------------------------------------------------------
.../org/apache/samza/job/yarn/SamzaAppState.java | 6 ++++++
.../org/apache/samza/job/yarn/SamzaTaskManager.java | 1 +
.../resources/scalate/WEB-INF/views/index.scaml | 16 ++++++++++++++++
3 files changed, 23 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/samza/blob/f7f237e9/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
index d5be36e..3df927e 100644
--- a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
+++ b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaAppState.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.samza.coordinator.JobCoordinator;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
import java.net.URL;
import java.util.HashSet;
@@ -119,6 +120,11 @@ public class SamzaAppState {
public AtomicInteger releasedContainers = new AtomicInteger(0);
/**
+ * ContainerStatus of failed containers.
+ */
+ public ConcurrentMap<String, ContainerStatus> failedContainersStatus = new ConcurrentHashMap<String, ContainerStatus>();
+
+ /**
* Number of containers configured for the job
*/
public int containerCount = 0;
http://git-wip-us.apache.org/repos/asf/samza/blob/f7f237e9/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
index d17ffe0..a3562a1 100644
--- a/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
+++ b/samza-yarn/src/main/java/org/apache/samza/job/yarn/SamzaTaskManager.java
@@ -204,6 +204,7 @@ class SamzaTaskManager implements YarnAppMasterListener {
log.info("Container " + containerIdStr + " failed with exit code " + exitStatus + " - " + containerStatus.getDiagnostics());
state.failedContainers.incrementAndGet();
+ state.failedContainersStatus.put(containerIdStr, containerStatus);
state.jobHealthy.set(false);
if(containerId != -1) {
http://git-wip-us.apache.org/repos/asf/samza/blob/f7f237e9/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
----------------------------------------------------------------------
diff --git a/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml b/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
index 2d16fe0..93660c7 100644
--- a/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
+++ b/samza-yarn/src/main/resources/scalate/WEB-INF/views/index.scaml
@@ -139,6 +139,22 @@
Ordinary: #{state.jobCoordinator.jobModel.getContainerToHostValue(containerId, org.apache.samza.coordinator.stream.messages.SetContainerHostMapping.JMX_URL_KEY)}
Tunneling: #{state.jobCoordinator.jobModel.getContainerToHostValue(containerId, org.apache.samza.coordinator.stream.messages.SetContainerHostMapping.JMX_TUNNELING_URL_KEY)}
+ %h2 Failed Containers
+ %table.table.table-striped.table-bordered.tablesorter#containers-table
+ %thead
+ %tr
+ %th Container
+ %th Exit code
+ %th Message
+ %tbody
+ - for((containerId, containerStatus) <- state.failedContainersStatus)
+ %tr
+ %td
+ #{containerId}
+ %td
+ Exit code: #{containerStatus.getExitStatus}
+ %td
+ %div.value= containerStatus.getDiagnostics
%div.tab-pane#task-groups
%h2 Task Groups