You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by je...@apache.org on 2014/11/17 20:22:44 UTC
[35/50] [abbrv] tez git commit: TEZ-1758. TezClient should provide
YARN diagnostics when the AM crashes (bikas)
TEZ-1758. TezClient should provide YARN diagnostics when the AM crashes (bikas)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a2d5768b
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a2d5768b
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a2d5768b
Branch: refs/heads/TEZ-8
Commit: a2d5768bc274684719dff64b46967264f3d18c0e
Parents: c6c08c1
Author: Bikas Saha <bi...@apache.org>
Authored: Sat Nov 8 11:59:14 2014 -0800
Committer: Bikas Saha <bi...@apache.org>
Committed: Sat Nov 8 11:59:14 2014 -0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../java/org/apache/tez/client/TezClient.java | 16 ++++-
.../org/apache/tez/client/TezClientUtils.java | 9 ++-
.../tez/dag/api/client/DAGClientImpl.java | 12 ----
.../org/apache/tez/client/TestTezClient.java | 65 +++++++++++++++++++-
5 files changed, 85 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index da457df..34ffc84 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -13,6 +13,7 @@ ALL CHANGES:
Release 0.5.3: Unreleased
ALL CHANGES:
+ TEZ-1758. TezClient should provide YARN diagnostics when the AM crashes
TEZ-1742. Improve response time of internal preemption
TEZ-1745. TestATSHistoryLoggingService::testATSHistoryLoggingServiceShutdown can be flaky.
TEZ-1747. Increase test timeout for TestSecureShuffle.
http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/main/java/org/apache/tez/client/TezClient.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClient.java b/tez-api/src/main/java/org/apache/tez/client/TezClient.java
index 049fee3..fc70b48 100644
--- a/tez-api/src/main/java/org/apache/tez/client/TezClient.java
+++ b/tez-api/src/main/java/org/apache/tez/client/TezClient.java
@@ -84,12 +84,16 @@ import com.google.protobuf.ServiceException;
public class TezClient {
private static final Log LOG = LogFactory.getLog(TezClient.class);
+
+ @VisibleForTesting
+ static final String NO_CLUSTER_DIAGNOSTICS_MSG = "No cluster diagnostics found.";
private final String clientName;
private ApplicationId sessionAppId;
private ApplicationId lastSubmittedAppId;
private AMConfiguration amConfig;
private FrameworkClient frameworkClient;
+ private String diagnostics;
private boolean isSession;
private boolean sessionStarted = false;
private boolean sessionStopped = false;
@@ -510,9 +514,14 @@ public class TezClient {
case ACCEPTED:
case SUBMITTED:
return TezAppMasterStatus.INITIALIZING;
- case FINISHED:
case FAILED:
case KILLED:
+ diagnostics = appReport.getDiagnostics();
+ LOG.info("App did not succeed. Diagnostics: "
+ + (appReport.getDiagnostics() != null ? appReport.getDiagnostics()
+ : NO_CLUSTER_DIAGNOSTICS_MSG));
+ return TezAppMasterStatus.SHUTDOWN;
+ case FINISHED:
return TezAppMasterStatus.SHUTDOWN;
case RUNNING:
if (!isSession) {
@@ -600,7 +609,8 @@ public class TezClient {
while (true) {
TezAppMasterStatus status = getAppMasterStatus();
if (status.equals(TezAppMasterStatus.SHUTDOWN)) {
- throw new SessionNotRunning("TezSession has already shutdown");
+ throw new SessionNotRunning("TezSession has already shutdown. "
+ + ((diagnostics != null) ? diagnostics : NO_CLUSTER_DIAGNOSTICS_MSG));
}
if (status.equals(TezAppMasterStatus.READY)) {
return;
@@ -647,7 +657,7 @@ public class TezClient {
if (!sessionStarted) {
throw new SessionNotRunning("Session not started");
} else if (sessionStopped) {
- throw new SessionNotRunning("Session stopped");
+ throw new SessionNotRunning("Session stopped by user");
}
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
index 7be8013..de4bdd0 100644
--- a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
+++ b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java
@@ -779,12 +779,17 @@ public class TezClientUtils {
if (appState == YarnApplicationState.FINISHED
|| appState == YarnApplicationState.KILLED
|| appState == YarnApplicationState.FAILED) {
- throw new SessionNotRunning("Application not running"
+ String msg = "Application not running"
+ ", applicationId=" + applicationId
+ ", yarnApplicationState=" + appReport.getYarnApplicationState()
+ ", finalApplicationStatus="
+ appReport.getFinalApplicationStatus()
- + ", trackingUrl=" + appReport.getTrackingUrl());
+ + ", trackingUrl=" + appReport.getTrackingUrl()
+ + ", diagnostics="
+ + (appReport.getDiagnostics() != null ? appReport.getDiagnostics()
+ : TezClient.NO_CLUSTER_DIAGNOSTICS_MSG);
+ LOG.info(msg);
+ throw new SessionNotRunning(msg);
}
return null;
}
http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
index 0c8ef1a..87e64cd 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
@@ -441,18 +441,6 @@ public class DAGClientImpl extends DAGClient {
}
}
- private ApplicationReport getApplicationReport() {
- ApplicationReport appReport = null;
- try {
- appReport = frameworkClient.getApplicationReport(appId);
- } catch (YarnException e) {
- // do nothing
- } catch (IOException e) {
- // do nothing
- }
- return appReport;
- }
-
private void switchToTimelineClient() throws IOException, TezException {
realClient.close();
realClient = new DAGClientTimelineImpl(appId, dagId, conf, frameworkClient);
http://git-wip-us.apache.org/repos/asf/tez/blob/a2d5768b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
----------------------------------------------------------------------
diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
index 0ce6dff..f4d4112 100644
--- a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
+++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.PreWarmVertex;
import org.apache.tez.dag.api.ProcessorDescriptor;
+import org.apache.tez.dag.api.SessionNotRunning;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezConstants;
import org.apache.tez.dag.api.TezException;
@@ -74,6 +75,7 @@ public class TestTezClient {
DAGClientAMProtocolBlockingPB sessionAmProxy;
YarnClient mockYarnClient;
ApplicationId mockAppId;
+ boolean callRealGetSessionAMProxy;
public TezClientForTest(String name, TezConfiguration tezConf,
@Nullable Map<String, LocalResource> localResources,
@@ -89,7 +91,10 @@ public class TestTezClient {
@Override
protected DAGClientAMProtocolBlockingPB getSessionAMProxy(ApplicationId appId)
throws TezException, IOException {
- return sessionAmProxy;
+ if (!callRealGetSessionAMProxy) {
+ return sessionAmProxy;
+ }
+ return super.getSessionAMProxy(appId);
}
}
@@ -317,5 +322,63 @@ public class TestTezClient {
Assert.assertThat(exceptionReference.get(),CoreMatchers. instanceOf(InterruptedException.class));
client.stop();
}
+
+ @Test(timeout = 5000)
+ public void testWaitTillReadyAppFailed() throws Exception {
+ final TezClientForTest client = configure();
+ client.start();
+ String msg = "Application Test Failed";
+ when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
+ .thenReturn(YarnApplicationState.NEW).thenReturn(YarnApplicationState.FAILED);
+ when(client.mockYarnClient.getApplicationReport(client.mockAppId).getDiagnostics()).thenReturn(
+ msg);
+ try {
+ client.waitTillReady();
+ Assert.fail();
+ } catch (SessionNotRunning e) {
+ Assert.assertTrue(e.getMessage().contains(msg));
+ }
+ client.stop();
+ }
+
+ @Test(timeout = 5000)
+ public void testWaitTillReadyAppFailedNoDiagnostics() throws Exception {
+ final TezClientForTest client = configure();
+ client.start();
+ when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
+ .thenReturn(YarnApplicationState.NEW).thenReturn(YarnApplicationState.FAILED);
+ try {
+ client.waitTillReady();
+ Assert.fail();
+ } catch (SessionNotRunning e) {
+ Assert.assertTrue(e.getMessage().contains(TezClient.NO_CLUSTER_DIAGNOSTICS_MSG));
+ }
+ client.stop();
+ }
+
+ @Test(timeout = 5000)
+ public void testSubmitDAGAppFailed() throws Exception {
+ final TezClientForTest client = configure();
+ client.start();
+
+ client.callRealGetSessionAMProxy = true;
+ String msg = "Application Test Failed";
+ when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
+ .thenReturn(YarnApplicationState.KILLED);
+ when(client.mockYarnClient.getApplicationReport(client.mockAppId).getDiagnostics()).thenReturn(
+ msg);
+
+ Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1,
+ Resource.newInstance(1, 1));
+ DAG dag = DAG.create("DAG").addVertex(vertex);
+
+ try {
+ client.submitDAG(dag);
+ Assert.fail();
+ } catch (SessionNotRunning e) {
+ Assert.assertTrue(e.getMessage().contains(msg));
+ }
+ client.stop();
+ }
}