You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by zj...@apache.org on 2015/05/20 04:13:58 UTC

tez git commit: TEZ-2460. Temporary solution for issue due to YARN-2560 (zjffdu)

Repository: tez
Updated Branches:
  refs/heads/master 1f927a55e -> a9048bb52


TEZ-2460. Temporary solution for issue due to YARN-2560 (zjffdu)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a9048bb5
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a9048bb5
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a9048bb5

Branch: refs/heads/master
Commit: a9048bb52c8efc194fca62f8764d97b6d3143147
Parents: 1f927a5
Author: Jeff Zhang <zj...@apache.org>
Authored: Wed May 20 10:13:46 2015 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Wed May 20 10:13:46 2015 +0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/tez/dag/api/TezConfiguration.java    | 11 +++++++++
 .../tez/dag/api/client/DAGClientImpl.java       | 24 ++++++++++++++++++--
 3 files changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a9048bb5/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 0b9bfa6..7781a9c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -22,6 +22,7 @@ Release 0.7.1: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2460. Temporary solution for issue due to YARN-2560
   TEZ-2455. Tez UI: Dag view caching, error handling and minor layout changes
   TEZ-2453. Tez UI: show the dagInfo is the application has set the same.
   TEZ-2447. Tez UI: Generic changes based on feedbacks.

http://git-wip-us.apache.org/repos/asf/tez/blob/a9048bb5/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
index e4170e9..99c7c9d 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java
@@ -1206,4 +1206,15 @@ public class TezConfiguration extends Configuration {
       TEZ_PREFIX + "test.minicluster.app.wait.on.shutdown.secs";
   public static final long TEZ_TEST_MINI_CLUSTER_APP_WAIT_ON_SHUTDOWN_SECS_DEFAULT = 30;
 
+  /**
+   * Long value
+   * Time to wait (in milliseconds) for yarn app's diagnotics is available
+   * Workaround for YARN-2560
+   */
+  @Private
+  @ConfigurationScope(Scope.CLIENT)
+  public static final String TEZ_CLIENT_DIAGNOSTICS_WAIT_TIMEOUT_MS =
+      TEZ_PREFIX + "client.diagnostics.wait.timeout-ms";
+  @Private
+  public static final long TEZ_CLIENT_DIAGNOSTICS_WAIT_TIMEOUT_MS_DEFAULT = 3*1000;
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a9048bb5/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
index de6ede6..baacdb9 100644
--- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
+++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
@@ -72,6 +73,7 @@ public class DAGClientImpl extends DAGClient {
       VertexStatus.State.SUCCEEDED, VertexStatus.State.FAILED, VertexStatus.State.KILLED,
       VertexStatus.State.ERROR);
   private long statusPollInterval;
+  private long diagnoticsWaitTimeout;
 
   public DAGClientImpl(ApplicationId appId, String dagId, TezConfiguration conf,
                        @Nullable FrameworkClient frameworkClient) {
@@ -106,6 +108,9 @@ public class DAGClientImpl extends DAGClient {
       LOG.error("DAG Status poll interval cannot be negative and setting to default value.");
       statusPollInterval = TezConfiguration.TEZ_DAG_STATUS_POLLINTERVAL_MS_DEFAULT;
     }
+    this.diagnoticsWaitTimeout = conf.getLong(
+        TezConfiguration.TEZ_CLIENT_DIAGNOSTICS_WAIT_TIMEOUT_MS,
+        TezConfiguration.TEZ_CLIENT_DIAGNOSTICS_WAIT_TIMEOUT_MS_DEFAULT);
   }
 
   @Override
@@ -445,10 +450,25 @@ public class DAGClientImpl extends DAGClient {
     }
 
     builder.setState(dagState);
-    if(appReport.getDiagnostics() != null) {
-      builder.addAllDiagnostics(Collections.singleton(appReport.getDiagnostics()));
+    // workaround before YARN-2560 is fixed
+    if (appReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED
+        || appReport.getFinalApplicationStatus() == FinalApplicationStatus.KILLED) {
+      long startTime = System.currentTimeMillis();
+      while((appReport.getDiagnostics() == null
+          || appReport.getDiagnostics().isEmpty())
+          && (System.currentTimeMillis() - startTime) < diagnoticsWaitTimeout) {
+        try {
+          Thread.sleep(100);
+          appReport = frameworkClient.getApplicationReport(appId);
+        } catch (YarnException e) {
+          throw new TezException(e);
+        } catch (InterruptedException e) {
+          throw new TezException(e);
+        }
+      }
     }
 
+    builder.addAllDiagnostics(Collections.singleton(appReport.getDiagnostics()));
     return dagStatus;
   }