You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by je...@apache.org on 2014/11/17 20:22:44 UTC

[35/50] [abbrv] tez git commit: TEZ-1758. TezClient should provide YARN diagnostics when the AM crashes (bikas)

TEZ-1758. TezClient should provide YARN diagnostics when the AM crashes (bikas)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a2d5768b
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a2d5768b
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a2d5768b

Branch: refs/heads/TEZ-8
Commit: a2d5768bc274684719dff64b46967264f3d18c0e
Parents: c6c08c1
Author: Bikas Saha <bi...@apache.org>
Authored: Sat Nov 8 11:59:14 2014 -0800
Committer: Bikas Saha <bi...@apache.org>
Committed: Sat Nov 8 11:59:14 2014 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../java/org/apache/tez/client/TezClient.java   | 16 ++++-
 .../org/apache/tez/client/TezClientUtils.java   |  9 ++-
 .../tez/dag/api/client/DAGClientImpl.java       | 12 ----
 .../org/apache/tez/client/TestTezClient.java    | 65 +++++++++++++++++++-
 5 files changed, 85 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index da457df..34ffc84 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -13,6 +13,7 @@ ALL CHANGES:
 Release 0.5.3: Unreleased
 
 ALL CHANGES:
+  TEZ-1758. TezClient should provide YARN diagnostics when the AM crashes
   TEZ-1742. Improve response time of internal preemption
   TEZ-1745. TestATSHistoryLoggingService::testATSHistoryLoggingServiceShutdown can be flaky.
   TEZ-1747. Increase test timeout for TestSecureShuffle.

http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/main/java/org/apache/tez/client/TezClient.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClient.java b/tez-api/src/main/java/org/apache/tez/client/TezClient.java
index 049fee3..fc70b48 100644
--- a/tez-api/src/main/java/org/apache/tez/client/TezClient.java
+++ b/tez-api/src/main/java/org/apache/tez/client/TezClient.java
@@ -84,12 +84,16 @@ import com.google.protobuf.ServiceException;
 public class TezClient {
 
   private static final Log LOG = LogFactory.getLog(TezClient.class);
+  
+  @VisibleForTesting
+  static final String NO_CLUSTER_DIAGNOSTICS_MSG = "No cluster diagnostics found.";
 
   private final String clientName;
   private ApplicationId sessionAppId;
   private ApplicationId lastSubmittedAppId;
   private AMConfiguration amConfig;
   private FrameworkClient frameworkClient;
+  private String diagnostics;
   private boolean isSession;
   private boolean sessionStarted = false;
   private boolean sessionStopped = false;
@@ -510,9 +514,14 @@ public class TezClient {
       case ACCEPTED:
       case SUBMITTED:
         return TezAppMasterStatus.INITIALIZING;
-      case FINISHED:
       case FAILED:
       case KILLED:
+        diagnostics = appReport.getDiagnostics();
+        LOG.info("App did not succeed. Diagnostics: "
+            + (appReport.getDiagnostics() != null ? appReport.getDiagnostics()
+                : NO_CLUSTER_DIAGNOSTICS_MSG));
+        return TezAppMasterStatus.SHUTDOWN;
+      case FINISHED:
         return TezAppMasterStatus.SHUTDOWN;
       case RUNNING:
         if (!isSession) {
@@ -600,7 +609,8 @@ public class TezClient {
     while (true) {
       TezAppMasterStatus status = getAppMasterStatus();
       if (status.equals(TezAppMasterStatus.SHUTDOWN)) {
-        throw new SessionNotRunning("TezSession has already shutdown");
+        throw new SessionNotRunning("TezSession has already shutdown. "
+            + ((diagnostics != null) ? diagnostics : NO_CLUSTER_DIAGNOSTICS_MSG));
       }
       if (status.equals(TezAppMasterStatus.READY)) {
         return;
@@ -647,7 +657,7 @@ public class TezClient {
     if (!sessionStarted) {
       throw new SessionNotRunning("Session not started");
     } else if (sessionStopped) {
-      throw new SessionNotRunning("Session stopped");
+      throw new SessionNotRunning("Session stopped by user");
     }
   }
   

http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
index 7be8013..de4bdd0 100644
--- a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
+++ b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
@@ -779,12 +779,17 @@ public class TezClientUtils {
         if (appState == YarnApplicationState.FINISHED
             || appState == YarnApplicationState.KILLED
             || appState == YarnApplicationState.FAILED) {
-          throw new SessionNotRunning("Application not running"
+          String msg = "Application not running"
               + ", applicationId=" + applicationId
               + ", yarnApplicationState=" + appReport.getYarnApplicationState()
               + ", finalApplicationStatus="
               + appReport.getFinalApplicationStatus()
-              + ", trackingUrl=" + appReport.getTrackingUrl());
+              + ", trackingUrl=" + appReport.getTrackingUrl()
+              + ", diagnostics="
+              + (appReport.getDiagnostics() != null ? appReport.getDiagnostics()
+                  : TezClient.NO_CLUSTER_DIAGNOSTICS_MSG);
+          LOG.info(msg);
+          throw new SessionNotRunning(msg);
         }
         return null;
       }

http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
index 0c8ef1a..87e64cd 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
@@ -441,18 +441,6 @@ public class DAGClientImpl extends DAGClient {
     }
   }
 
-  private ApplicationReport getApplicationReport() {
-    ApplicationReport appReport = null;
-    try {
-      appReport = frameworkClient.getApplicationReport(appId);
-    } catch (YarnException e) {
-      // do nothing
-    } catch (IOException e) {
-      // do nothing
-    }
-    return appReport;
-  }
-
   private void switchToTimelineClient() throws IOException, TezException {
     realClient.close();
     realClient = new DAGClientTimelineImpl(appId, dagId, conf, frameworkClient);

http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
----------------------------------------------------------------------
diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
index 0ce6dff..f4d4112 100644
--- a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
+++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.tez.dag.api.DAG;
 import org.apache.tez.dag.api.PreWarmVertex;
 import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.SessionNotRunning;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.TezException;
@@ -74,6 +75,7 @@ public class TestTezClient {
     DAGClientAMProtocolBlockingPB sessionAmProxy;
     YarnClient mockYarnClient;
     ApplicationId mockAppId;
+    boolean callRealGetSessionAMProxy;
 
     public TezClientForTest(String name, TezConfiguration tezConf,
         @Nullable Map<String, LocalResource> localResources,
@@ -89,7 +91,10 @@ public class TestTezClient {
     @Override
     protected DAGClientAMProtocolBlockingPB getSessionAMProxy(ApplicationId appId) 
         throws TezException, IOException {
-      return sessionAmProxy;
+      if (!callRealGetSessionAMProxy) {
+        return sessionAmProxy;
+      }
+      return super.getSessionAMProxy(appId);
     }
   }
   
@@ -317,5 +322,63 @@ public class TestTezClient {
     Assert.assertThat(exceptionReference.get(),CoreMatchers. instanceOf(InterruptedException.class));
     client.stop();
   }
+  
+  @Test(timeout = 5000)
+  public void testWaitTillReadyAppFailed() throws Exception {
+    final TezClientForTest client = configure();
+    client.start();
+    String msg = "Application Test Failed";
+    when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
+        .thenReturn(YarnApplicationState.NEW).thenReturn(YarnApplicationState.FAILED);
+    when(client.mockYarnClient.getApplicationReport(client.mockAppId).getDiagnostics()).thenReturn(
+        msg);
+    try {
+      client.waitTillReady();
+      Assert.fail();
+    } catch (SessionNotRunning e) {
+      Assert.assertTrue(e.getMessage().contains(msg));
+    }
+    client.stop();
+  }
+  
+  @Test(timeout = 5000)
+  public void testWaitTillReadyAppFailedNoDiagnostics() throws Exception {
+    final TezClientForTest client = configure();
+    client.start();
+    when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
+        .thenReturn(YarnApplicationState.NEW).thenReturn(YarnApplicationState.FAILED);
+    try {
+      client.waitTillReady();
+      Assert.fail();
+    } catch (SessionNotRunning e) {
+      Assert.assertTrue(e.getMessage().contains(TezClient.NO_CLUSTER_DIAGNOSTICS_MSG));
+    }
+    client.stop();
+  }
+  
+  @Test(timeout = 5000)
+  public void testSubmitDAGAppFailed() throws Exception {
+    final TezClientForTest client = configure();
+    client.start();
+    
+    client.callRealGetSessionAMProxy = true;
+    String msg = "Application Test Failed";
+    when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
+        .thenReturn(YarnApplicationState.KILLED);
+    when(client.mockYarnClient.getApplicationReport(client.mockAppId).getDiagnostics()).thenReturn(
+        msg);
+
+    Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1,
+        Resource.newInstance(1, 1));
+    DAG dag = DAG.create("DAG").addVertex(vertex);
+    
+    try {
+      client.submitDAG(dag);
+      Assert.fail();
+    } catch (SessionNotRunning e) {
+      Assert.assertTrue(e.getMessage().contains(msg));
+    }
+    client.stop();
+  }
 
 }