You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by sr...@apache.org on 2016/02/18 10:57:55 UTC

[01/24] tez git commit: TEZ-3053. Containers timeout if they do not receive a task within the container timeout interval. (sseth)

Repository: tez
Updated Branches:
  refs/heads/TEZ-2980 d83904453 -> 478a5349d


TEZ-3053. Containers timeout if they do not receive a task within the
container timeout interval. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e171fddc
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e171fddc
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e171fddc

Branch: refs/heads/TEZ-2980
Commit: e171fddce3ae657dcc2baaf6b50913ef06a1d70c
Parents: cc06400
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Jan 20 10:30:41 2016 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Jan 20 10:30:41 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../tez/dag/app/TezTaskCommunicatorImpl.java    |  3 +-
 .../dag/app/TestTezTaskCommunicatorManager.java | 72 ++++++++++++++++++++
 3 files changed, 75 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/e171fddc/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index f1cc292..3b8b016 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3053. Containers timeout if they do not receive a task within the container timeout interval.
   TEZ-2898. tez tools : swimlanes.py is broken.
   TEZ-2937. Can Processor.close() be called after closing inputs and outputs?
   TEZ-3037. History URL should be set regardless of which history logging service is enabled.

http://git-wip-us.apache.org/repos/asf/tez/blob/e171fddc/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
index 0bbe97a..b879f07 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java
@@ -410,7 +410,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
 
   private ContainerTask getContainerTask(ContainerId containerId) throws IOException {
     ContainerInfo containerInfo = registeredContainers.get(containerId);
-    ContainerTask task = null;
+    ContainerTask task;
     if (containerInfo == null) {
       if (getContext().isKnownContainer(containerId)) {
         LOG.info("Container with id: " + containerId
@@ -422,6 +422,7 @@ public class TezTaskCommunicatorImpl extends TaskCommunicator {
       task = TASK_FOR_INVALID_JVM;
     } else {
       synchronized (containerInfo) {
+        getContext().containerAlive(containerId);
         if (containerInfo.taskSpec != null) {
           if (!containerInfo.taskPulled) {
             containerInfo.taskPulled = true;

http://git-wip-us.apache.org/repos/asf/tez/blob/e171fddc/tez-dag/src/test/java/org/apache/tez/dag/app/dag/app/TestTezTaskCommunicatorManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/app/TestTezTaskCommunicatorManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/app/TestTezTaskCommunicatorManager.java
new file mode 100644
index 0000000..65f43a8
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/app/TestTezTaskCommunicatorManager.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.dag.app;
+
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.tez.common.ContainerContext;
+import org.apache.tez.common.ContainerTask;
+import org.apache.tez.common.TezUtils;
+import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.dag.app.TezTaskCommunicatorImpl;
+import org.apache.tez.serviceplugins.api.TaskCommunicatorContext;
+import org.junit.Test;
+
+public class TestTezTaskCommunicatorManager {
+
+  @Test (timeout = 5000)
+  public void testContainerAliveOnGetTask() throws IOException {
+
+    TaskCommunicatorContext context = mock(TaskCommunicatorContext.class);
+    Configuration conf = new Configuration(false);
+    UserPayload userPayload = TezUtils.createUserPayloadFromConf(conf);
+
+
+
+
+    ApplicationId appId = ApplicationId.newInstance(1000, 1);
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
+    ContainerId containerId = createContainerId(appId, 1);
+
+    doReturn(appAttemptId).when(context).getApplicationAttemptId();
+    doReturn(userPayload).when(context).getInitialUserPayload();
+    doReturn(new Credentials()).when(context).getCredentials();
+
+    TezTaskCommunicatorImpl taskComm = new TezTaskCommunicatorImpl(context);
+
+    ContainerContext containerContext = new ContainerContext(containerId.toString());
+    taskComm.registerRunningContainer(containerId, "fakehost", 0);
+    ContainerTask containerTask = taskComm.getUmbilical().getTask(containerContext);
+    assertNull(containerTask);
+
+    verify(context).containerAlive(containerId);
+  }
+
+  @SuppressWarnings("deprecation")
+  private ContainerId createContainerId(ApplicationId applicationId, int containerIdx) {
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(applicationId, 1);
+    return ContainerId.newInstance(appAttemptId, containerIdx);
+  }
+}


[24/24] tez git commit: Merge branch 'TEZ-2980' of https://git-wip-us.apache.org/repos/asf/tez into TEZ-2980

Posted by sr...@apache.org.
Merge branch 'TEZ-2980' of https://git-wip-us.apache.org/repos/asf/tez into TEZ-2980


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/478a5349
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/478a5349
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/478a5349

Branch: refs/heads/TEZ-2980
Commit: 478a5349de7849a200e7c25f21cfd962f9a190a5
Parents: de3a074 d839044
Author: Sreenath Somarajapuram <sr...@apache.org>
Authored: Thu Feb 18 15:24:22 2016 +0530
Committer: Sreenath Somarajapuram <sr...@apache.org>
Committed: Thu Feb 18 15:24:22 2016 +0530

----------------------------------------------------------------------
 TEZ-2980-CHANGES.txt                            |  40 ++
 pom.xml                                         |   1 +
 tez-ui2/README.md                               |  87 ++++
 tez-ui2/findbugs-exclude.xml                    |  16 +
 tez-ui2/pom.xml                                 | 153 +++++++
 tez-ui2/src/main/resources/META-INF/LICENSE.txt | 395 ++++++++++++++++++
 tez-ui2/src/main/resources/META-INF/NOTICE.txt  |  10 +
 tez-ui2/src/main/webapp/.bowerrc                |   4 +
 tez-ui2/src/main/webapp/.editorconfig           |  34 ++
 tez-ui2/src/main/webapp/.ember-cli              |   9 +
 tez-ui2/src/main/webapp/.gitignore              |  18 +
 tez-ui2/src/main/webapp/.jshintrc               |  32 ++
 tez-ui2/src/main/webapp/.travis.yml             |  23 ++
 tez-ui2/src/main/webapp/.watchmanconfig         |   3 +
 tez-ui2/src/main/webapp/README.md               |  54 +++
 tez-ui2/src/main/webapp/WEB-INF/web.xml         |  25 ++
 .../src/main/webapp/app/adapters/abstract.js    |  70 ++++
 tez-ui2/src/main/webapp/app/adapters/ahs-app.js |  27 ++
 tez-ui2/src/main/webapp/app/adapters/am.js      |  28 ++
 tez-ui2/src/main/webapp/app/adapters/app-rm.js  |  22 +
 tez-ui2/src/main/webapp/app/adapters/app.js     |  22 +
 .../src/main/webapp/app/adapters/attempt-am.js  |  22 +
 tez-ui2/src/main/webapp/app/adapters/attempt.js |  22 +
 tez-ui2/src/main/webapp/app/adapters/dag-am.js  |  22 +
 tez-ui2/src/main/webapp/app/adapters/dag.js     |  22 +
 tez-ui2/src/main/webapp/app/adapters/loader.js  |  58 +++
 tez-ui2/src/main/webapp/app/adapters/rm.js      |  26 ++
 tez-ui2/src/main/webapp/app/adapters/task-am.js |  22 +
 tez-ui2/src/main/webapp/app/adapters/task.js    |  22 +
 .../src/main/webapp/app/adapters/timeline.js    | 106 +++++
 .../src/main/webapp/app/adapters/vertex-am.js   |  22 +
 tez-ui2/src/main/webapp/app/adapters/vertex.js  |  22 +
 tez-ui2/src/main/webapp/app/app.js              |  36 ++
 .../main/webapp/app/components/caller-info.js   |  78 ++++
 .../webapp/app/components/column-selector.js    | 104 +++++
 .../webapp/app/components/dags-page-search.js   |  44 ++
 .../webapp/app/components/dags-pagination-ui.js |  92 +++++
 .../webapp/app/components/date-formatter.js     |  30 ++
 .../app/components/em-table-status-cell.js      |  63 +++
 .../src/main/webapp/app/components/error-bar.js | 109 +++++
 .../main/webapp/app/components/stats-link.js    |  33 ++
 .../main/webapp/app/components/tab-n-refresh.js |  57 +++
 .../webapp/app/components/table-controls.js     |  29 ++
 .../webapp/app/components/zip-download-modal.js |  43 ++
 .../src/main/webapp/app/controllers/abstract.js |  50 +++
 tez-ui2/src/main/webapp/app/controllers/app.js  |  44 ++
 .../main/webapp/app/controllers/app/configs.js  |  60 +++
 .../src/main/webapp/app/controllers/app/dags.js |  98 +++++
 .../main/webapp/app/controllers/app/index.js    |  33 ++
 .../main/webapp/app/controllers/application.js  |  40 ++
 .../src/main/webapp/app/controllers/attempt.js  |  56 +++
 .../webapp/app/controllers/attempt/counters.js  |  26 ++
 .../webapp/app/controllers/attempt/index.js     |  22 +
 .../webapp/app/controllers/counters-table.js    |  74 ++++
 tez-ui2/src/main/webapp/app/controllers/dag.js  |  53 +++
 .../main/webapp/app/controllers/dag/attempts.js | 102 +++++
 .../main/webapp/app/controllers/dag/counters.js |  26 ++
 .../webapp/app/controllers/dag/graphical.js     | 174 ++++++++
 .../main/webapp/app/controllers/dag/index.js    |  22 +
 .../webapp/app/controllers/dag/index/index.js   | 129 ++++++
 .../main/webapp/app/controllers/dag/tasks.js    |  82 ++++
 .../main/webapp/app/controllers/dag/vertices.js | 122 ++++++
 tez-ui2/src/main/webapp/app/controllers/dags.js | 147 +++++++
 .../main/webapp/app/controllers/multi-table.js  |  37 ++
 tez-ui2/src/main/webapp/app/controllers/page.js |  23 ++
 .../src/main/webapp/app/controllers/parent.js   |  30 ++
 .../src/main/webapp/app/controllers/table.js    | 166 ++++++++
 tez-ui2/src/main/webapp/app/controllers/task.js |  54 +++
 .../webapp/app/controllers/task/attempts.js     |  80 ++++
 .../webapp/app/controllers/task/counters.js     |  26 ++
 .../main/webapp/app/controllers/task/index.js   |  22 +
 .../src/main/webapp/app/controllers/vertex.js   |  52 +++
 .../webapp/app/controllers/vertex/attempts.js   |  92 +++++
 .../webapp/app/controllers/vertex/counters.js   |  26 ++
 .../main/webapp/app/controllers/vertex/index.js |  50 +++
 .../main/webapp/app/controllers/vertex/tasks.js |  72 ++++
 tez-ui2/src/main/webapp/app/entities/am.js      |  58 +++
 .../src/main/webapp/app/entities/attempt-am.js  |  23 ++
 tez-ui2/src/main/webapp/app/entities/entity.js  | 181 +++++++++
 tez-ui2/src/main/webapp/app/entities/task-am.js |  23 ++
 .../src/main/webapp/app/entities/vertex-am.js   |  23 ++
 .../main/webapp/app/errors/unlinked-promise.js  |  34 ++
 tez-ui2/src/main/webapp/app/index.html          |  47 +++
 .../main/webapp/app/initializers/entities.js    |  29 ++
 tez-ui2/src/main/webapp/app/initializers/env.js |  28 ++
 .../src/main/webapp/app/initializers/hosts.js   |  28 ++
 .../src/main/webapp/app/initializers/jquery.js  |  39 ++
 .../src/main/webapp/app/initializers/loader.js  |  27 ++
 .../webapp/app/initializers/local-storage.js    |  26 ++
 .../webapp/app/mixins/auto-counter-column.js    |  69 ++++
 tez-ui2/src/main/webapp/app/mixins/name.js      |  30 ++
 tez-ui2/src/main/webapp/app/models/abstract.js  |  60 +++
 tez-ui2/src/main/webapp/app/models/ahs-app.js   |  43 ++
 .../src/main/webapp/app/models/am-timeline.js   |  46 +++
 tez-ui2/src/main/webapp/app/models/am.js        |  31 ++
 tez-ui2/src/main/webapp/app/models/app-rm.js    |  22 +
 tez-ui2/src/main/webapp/app/models/app.js       |  46 +++
 .../src/main/webapp/app/models/attempt-am.js    |  22 +
 tez-ui2/src/main/webapp/app/models/attempt.js   |  74 ++++
 tez-ui2/src/main/webapp/app/models/dag-am.js    |  22 +
 tez-ui2/src/main/webapp/app/models/dag.js       |  70 ++++
 tez-ui2/src/main/webapp/app/models/rm.js        |  26 ++
 tez-ui2/src/main/webapp/app/models/task-am.js   |  22 +
 tez-ui2/src/main/webapp/app/models/task.js      |  66 +++
 tez-ui2/src/main/webapp/app/models/timeline.js  |  89 ++++
 tez-ui2/src/main/webapp/app/models/vertex-am.js |  32 ++
 tez-ui2/src/main/webapp/app/models/vertex.js    | 118 ++++++
 tez-ui2/src/main/webapp/app/router.js           |  57 +++
 tez-ui2/src/main/webapp/app/routes/abstract.js  | 189 +++++++++
 .../src/main/webapp/app/routes/am-pollster.js   |  93 +++++
 tez-ui2/src/main/webapp/app/routes/app.js       |  38 ++
 .../src/main/webapp/app/routes/app/configs.js   |  37 ++
 tez-ui2/src/main/webapp/app/routes/app/dags.js  |  37 ++
 tez-ui2/src/main/webapp/app/routes/app/index.js |  39 ++
 .../src/main/webapp/app/routes/application.js   |  80 ++++
 tez-ui2/src/main/webapp/app/routes/attempt.js   |  38 ++
 .../main/webapp/app/routes/attempt/counters.js  |  35 ++
 .../src/main/webapp/app/routes/attempt/index.js |  35 ++
 tez-ui2/src/main/webapp/app/routes/dag.js       |  38 ++
 .../src/main/webapp/app/routes/dag/attempts.js  |  37 ++
 .../src/main/webapp/app/routes/dag/counters.js  |  36 ++
 .../src/main/webapp/app/routes/dag/graphical.js |  81 ++++
 tez-ui2/src/main/webapp/app/routes/dag/index.js |  58 +++
 .../main/webapp/app/routes/dag/index/index.js   |  62 +++
 tez-ui2/src/main/webapp/app/routes/dag/tasks.js |  37 ++
 .../src/main/webapp/app/routes/dag/vertices.js  |  37 ++
 tez-ui2/src/main/webapp/app/routes/dags.js      | 110 +++++
 .../main/webapp/app/routes/multi-am-pollster.js |  35 ++
 tez-ui2/src/main/webapp/app/routes/pollster.js  |  70 ++++
 .../webapp/app/routes/single-am-pollster.js     |  34 ++
 tez-ui2/src/main/webapp/app/routes/task.js      |  38 ++
 .../src/main/webapp/app/routes/task/attempts.js |  37 ++
 .../src/main/webapp/app/routes/task/counters.js |  35 ++
 .../src/main/webapp/app/routes/task/index.js    |  35 ++
 tez-ui2/src/main/webapp/app/routes/vertex.js    |  38 ++
 .../main/webapp/app/routes/vertex/attempts.js   |  37 ++
 .../main/webapp/app/routes/vertex/counters.js   |  35 ++
 .../src/main/webapp/app/routes/vertex/index.js  |  35 ++
 .../src/main/webapp/app/routes/vertex/tasks.js  |  37 ++
 .../src/main/webapp/app/serializers/ahs-app.js  |  49 +++
 tez-ui2/src/main/webapp/app/serializers/am.js   |  41 ++
 .../src/main/webapp/app/serializers/app-rm.js   |  33 ++
 tez-ui2/src/main/webapp/app/serializers/app.js  |  32 ++
 .../main/webapp/app/serializers/attempt-am.js   |  23 ++
 .../src/main/webapp/app/serializers/attempt.js  |  30 ++
 .../src/main/webapp/app/serializers/dag-am.js   |  28 ++
 tez-ui2/src/main/webapp/app/serializers/dag.js  | 151 +++++++
 .../src/main/webapp/app/serializers/loader.js   |  93 +++++
 tez-ui2/src/main/webapp/app/serializers/rm.js   |  28 ++
 .../src/main/webapp/app/serializers/task-am.js  |  23 ++
 tez-ui2/src/main/webapp/app/serializers/task.js |  28 ++
 .../src/main/webapp/app/serializers/timeline.js |  52 +++
 .../main/webapp/app/serializers/vertex-am.js    |  30 ++
 .../src/main/webapp/app/serializers/vertex.js   |  56 +++
 tez-ui2/src/main/webapp/app/services/env.js     |  58 +++
 tez-ui2/src/main/webapp/app/services/hosts.js   |  71 ++++
 tez-ui2/src/main/webapp/app/services/loader.js  | 138 +++++++
 .../main/webapp/app/services/local-storage.js   |  39 ++
 .../src/main/webapp/app/services/pollster.js    | 110 +++++
 tez-ui2/src/main/webapp/app/styles/app.less     |  39 ++
 .../src/main/webapp/app/styles/caller-info.less |  26 ++
 tez-ui2/src/main/webapp/app/styles/colors.less  |  44 ++
 .../main/webapp/app/styles/column-selector.less |  81 ++++
 .../webapp/app/styles/dags-page-search.less     |  67 +++
 .../main/webapp/app/styles/date-formatter.less  |  21 +
 .../main/webapp/app/styles/details-page.less    |  60 +++
 .../src/main/webapp/app/styles/error-bar.less   | 102 +++++
 .../src/main/webapp/app/styles/page-layout.less | 157 +++++++
 tez-ui2/src/main/webapp/app/styles/shared.less  |  55 +++
 .../main/webapp/app/styles/tab-n-refresh.less   |  44 ++
 .../main/webapp/app/styles/table-controls.less  |  28 ++
 tez-ui2/src/main/webapp/app/styles/tooltip.less |  24 ++
 .../webapp/app/styles/zip-download-modal.less   |  30 ++
 tez-ui2/src/main/webapp/app/templates/app.hbs   |  20 +
 .../main/webapp/app/templates/app/configs.hbs   |  34 ++
 .../src/main/webapp/app/templates/app/dags.hbs  |  37 ++
 .../src/main/webapp/app/templates/app/index.hbs | 127 ++++++
 .../main/webapp/app/templates/application.hbs   |  67 +++
 .../src/main/webapp/app/templates/attempt.hbs   |  20 +
 .../webapp/app/templates/attempt/counters.hbs   |  34 ++
 .../main/webapp/app/templates/attempt/index.hbs |  79 ++++
 .../app/templates/components/caller-info.hbs    |  24 ++
 .../templates/components/column-selector.hbs    |  50 +++
 .../templates/components/dags-page-search.hbs   |  74 ++++
 .../templates/components/dags-pagination-ui.hbs |  27 ++
 .../app/templates/components/date-formatter.hbs |  19 +
 .../components/em-table-status-cell.hbs         |  23 ++
 .../app/templates/components/error-bar.hbs      |  31 ++
 .../app/templates/components/stats-link.hbs     |  25 ++
 .../app/templates/components/tab-n-refresh.hbs  |  44 ++
 .../app/templates/components/table-controls.hbs |  19 +
 .../templates/components/zip-download-modal.hbs |  36 ++
 tez-ui2/src/main/webapp/app/templates/dag.hbs   |  20 +
 .../main/webapp/app/templates/dag/attempts.hbs  |  37 ++
 .../main/webapp/app/templates/dag/counters.hbs  |  34 ++
 .../main/webapp/app/templates/dag/graphical.hbs |  14 +
 .../src/main/webapp/app/templates/dag/index.hbs | 100 +++++
 .../webapp/app/templates/dag/index/index.hbs    |  80 ++++
 .../src/main/webapp/app/templates/dag/tasks.hbs |  37 ++
 .../main/webapp/app/templates/dag/vertices.hbs  |  37 ++
 tez-ui2/src/main/webapp/app/templates/dags.hbs  |  41 ++
 .../src/main/webapp/app/templates/loading.hbs   |  24 ++
 .../main/webapp/app/templates/simple-modal.hbs  |  35 ++
 tez-ui2/src/main/webapp/app/templates/task.hbs  |  20 +
 .../main/webapp/app/templates/task/attempts.hbs |  37 ++
 .../main/webapp/app/templates/task/counters.hbs |  34 ++
 .../main/webapp/app/templates/task/index.hbs    |  84 ++++
 .../src/main/webapp/app/templates/vertex.hbs    |  20 +
 .../webapp/app/templates/vertex/attempts.hbs    |  37 ++
 .../webapp/app/templates/vertex/counters.hbs    |  34 ++
 .../main/webapp/app/templates/vertex/index.hbs  | 143 +++++++
 .../main/webapp/app/templates/vertex/tasks.hbs  |  37 ++
 .../src/main/webapp/app/transforms/object.js    |  29 ++
 .../app/utils/counter-column-definition.js      |  97 +++++
 .../main/webapp/app/utils/download-dag-zip.js   | 407 +++++++++++++++++++
 tez-ui2/src/main/webapp/app/utils/misc.js       |  23 ++
 tez-ui2/src/main/webapp/blueprints/.jshintrc    |   6 +
 .../files/tests/unit/entities/__name__-test.js  |  30 ++
 .../main/webapp/blueprints/entity-test/index.js |  35 ++
 .../entity/files/app/entities/__name__.js       |  22 +
 .../src/main/webapp/blueprints/entity/index.js  |  31 ++
 tez-ui2/src/main/webapp/bower.json              |  27 ++
 tez-ui2/src/main/webapp/config/build-info.js    |  32 ++
 tez-ui2/src/main/webapp/config/configs.env      |  31 ++
 .../src/main/webapp/config/default-app-conf.js  | 333 +++++++++++++++
 tez-ui2/src/main/webapp/config/environment.js   |  70 ++++
 tez-ui2/src/main/webapp/ember-cli-build.js      |  61 +++
 tez-ui2/src/main/webapp/package.json            |  62 +++
 .../webapp/public/assets/images/favicon.png     | Bin 0 -> 1416 bytes
 .../main/webapp/public/assets/images/logo.png   | Bin 0 -> 77160 bytes
 tez-ui2/src/main/webapp/public/crossdomain.xml  |  15 +
 tez-ui2/src/main/webapp/public/robots.txt       |   3 +
 tez-ui2/src/main/webapp/testem.json             |  12 +
 tez-ui2/src/main/webapp/tests/.jshintrc         |  52 +++
 .../main/webapp/tests/helpers/destroy-app.js    |  23 ++
 .../tests/helpers/module-for-acceptance.js      |  41 ++
 .../src/main/webapp/tests/helpers/resolver.js   |  29 ++
 .../src/main/webapp/tests/helpers/start-app.js  |  36 ++
 tez-ui2/src/main/webapp/tests/index.html        |  52 +++
 .../integration/components/caller-info-test.js  |  42 ++
 .../components/column-selector-test.js          |  87 ++++
 .../components/dags-page-search-test.js         |  45 ++
 .../components/dags-pagination-ui-test.js       |  47 +++
 .../components/date-formatter-test.js           |  40 ++
 .../components/em-table-status-cell-test.js     |  55 +++
 .../integration/components/error-bar-test.js    |  43 ++
 .../integration/components/stats-link-test.js   |  38 ++
 .../components/tab-n-refresh-test.js            |  50 +++
 .../components/table-controls-test.js           |  43 ++
 .../components/zip-download-modal-test.js       |  46 +++
 tez-ui2/src/main/webapp/tests/test-helper.js    |  24 ++
 .../webapp/tests/unit/adapters/abstract-test.js | 110 +++++
 .../webapp/tests/unit/adapters/ahs-app-test.js  |  34 ++
 .../main/webapp/tests/unit/adapters/am-test.js  |  50 +++
 .../webapp/tests/unit/adapters/app-rm-test.js   |  29 ++
 .../main/webapp/tests/unit/adapters/app-test.js |  30 ++
 .../tests/unit/adapters/attempt-am-test.js      |  29 ++
 .../webapp/tests/unit/adapters/attempt-test.js  |  30 ++
 .../webapp/tests/unit/adapters/dag-am-test.js   |  29 ++
 .../main/webapp/tests/unit/adapters/dag-test.js |  30 ++
 .../webapp/tests/unit/adapters/loader-test.js   | 137 +++++++
 .../main/webapp/tests/unit/adapters/rm-test.js  |  31 ++
 .../webapp/tests/unit/adapters/task-am-test.js  |  29 ++
 .../webapp/tests/unit/adapters/task-test.js     |  29 ++
 .../webapp/tests/unit/adapters/timeline-test.js |  84 ++++
 .../tests/unit/adapters/vertex-am-test.js       |  29 ++
 .../webapp/tests/unit/adapters/vertex-test.js   |  30 ++
 .../tests/unit/controllers/abstract-test.js     |  76 ++++
 .../webapp/tests/unit/controllers/app-test.js   |  37 ++
 .../tests/unit/controllers/app/configs-test.js  |  40 ++
 .../tests/unit/controllers/app/dags-test.js     |  40 ++
 .../tests/unit/controllers/app/index-test.js    |  36 ++
 .../tests/unit/controllers/application-test.js  |  45 ++
 .../tests/unit/controllers/attempt-test.js      |  37 ++
 .../unit/controllers/attempt/counters-test.js   |  36 ++
 .../unit/controllers/attempt/index-test.js      |  35 ++
 .../unit/controllers/counters-table-test.js     |  91 +++++
 .../webapp/tests/unit/controllers/dag-test.js   |  37 ++
 .../tests/unit/controllers/dag/attempts-test.js |  41 ++
 .../tests/unit/controllers/dag/counters-test.js |  36 ++
 .../unit/controllers/dag/graphical-test.js      |  47 +++
 .../tests/unit/controllers/dag/index-test.js    |  35 ++
 .../unit/controllers/dag/index/index-test.js    |  40 ++
 .../tests/unit/controllers/dag/tasks-test.js    |  41 ++
 .../tests/unit/controllers/dag/vertices-test.js |  84 ++++
 .../webapp/tests/unit/controllers/dags-test.js  |  48 +++
 .../tests/unit/controllers/multi-table-test.js  |  41 ++
 .../webapp/tests/unit/controllers/page-test.js  |  51 +++
 .../tests/unit/controllers/parent-test.js       |  36 ++
 .../webapp/tests/unit/controllers/table-test.js |  65 +++
 .../webapp/tests/unit/controllers/task-test.js  |  37 ++
 .../unit/controllers/task/attempts-test.js      |  41 ++
 .../unit/controllers/task/counters-test.js      |  36 ++
 .../tests/unit/controllers/task/index-test.js   |  35 ++
 .../tests/unit/controllers/vertex-test.js       |  37 ++
 .../unit/controllers/vertex/attempts-test.js    |  41 ++
 .../unit/controllers/vertex/counters-test.js    |  36 ++
 .../tests/unit/controllers/vertex/index-test.js |  35 ++
 .../tests/unit/controllers/vertex/tasks-test.js |  41 ++
 .../main/webapp/tests/unit/entities/am-test.js  |  33 ++
 .../tests/unit/entities/attempt-am-test.js      |  31 ++
 .../webapp/tests/unit/entities/entity-test.js   | 263 ++++++++++++
 .../webapp/tests/unit/entities/task-am-test.js  |  31 ++
 .../tests/unit/entities/vertex-am-test.js       |  31 ++
 .../tests/unit/initializers/entities-test.js    |  40 ++
 .../webapp/tests/unit/initializers/env-test.js  |  40 ++
 .../tests/unit/initializers/hosts-test.js       |  38 ++
 .../tests/unit/initializers/jquery-test.js      |  38 ++
 .../tests/unit/initializers/loader-test.js      |  40 ++
 .../unit/initializers/local-storage-test.js     |  39 ++
 .../unit/mixins/auto-counter-column-test.js     |  78 ++++
 .../main/webapp/tests/unit/mixins/name-test.js  |  44 ++
 .../webapp/tests/unit/models/abstract-test.js   |  63 +++
 .../webapp/tests/unit/models/ahs-app-test.js    |  31 ++
 .../main/webapp/tests/unit/models/am-test.js    |  31 ++
 .../tests/unit/models/am-timeline-test.js       |  34 ++
 .../webapp/tests/unit/models/app-rm-test.js     |  30 ++
 .../main/webapp/tests/unit/models/app-test.js   |  40 ++
 .../webapp/tests/unit/models/attempt-am-test.js |  30 ++
 .../webapp/tests/unit/models/attempt-test.js    |  76 ++++
 .../webapp/tests/unit/models/dag-am-test.js     |  30 ++
 .../main/webapp/tests/unit/models/dag-test.js   |  40 ++
 .../main/webapp/tests/unit/models/rm-test.js    |  30 ++
 .../webapp/tests/unit/models/task-am-test.js    |  30 ++
 .../main/webapp/tests/unit/models/task-test.js  |  61 +++
 .../webapp/tests/unit/models/timeline-test.js   | 124 ++++++
 .../webapp/tests/unit/models/vertex-am-test.js  |  30 ++
 .../webapp/tests/unit/models/vertex-test.js     |  61 +++
 .../webapp/tests/unit/routes/abstract-test.js   | 282 +++++++++++++
 .../tests/unit/routes/am-pollster-test.js       |  35 ++
 .../main/webapp/tests/unit/routes/app-test.js   |  29 ++
 .../tests/unit/routes/app/configs-test.js       |  46 +++
 .../webapp/tests/unit/routes/app/dags-test.js   |  46 +++
 .../webapp/tests/unit/routes/app/index-test.js  |  46 +++
 .../tests/unit/routes/application-test.js       |  65 +++
 .../webapp/tests/unit/routes/attempt-test.js    |  32 ++
 .../tests/unit/routes/attempt/counters-test.js  |  46 +++
 .../tests/unit/routes/attempt/index-test.js     |  46 +++
 .../main/webapp/tests/unit/routes/dag-test.js   |  32 ++
 .../tests/unit/routes/dag/attempts-test.js      |  46 +++
 .../tests/unit/routes/dag/counters-test.js      |  47 +++
 .../tests/unit/routes/dag/graphical-test.js     |  38 ++
 .../webapp/tests/unit/routes/dag/index-test.js  |  47 +++
 .../tests/unit/routes/dag/index/index-test.js   |  50 +++
 .../webapp/tests/unit/routes/dag/tasks-test.js  |  46 +++
 .../tests/unit/routes/dag/vertices-test.js      |  46 +++
 .../main/webapp/tests/unit/routes/dags-test.js  |  58 +++
 .../tests/unit/routes/multi-am-pollster-test.js |  32 ++
 .../webapp/tests/unit/routes/pollster-test.js   |  39 ++
 .../unit/routes/single-am-pollster-test.js      |  32 ++
 .../main/webapp/tests/unit/routes/task-test.js  |  32 ++
 .../tests/unit/routes/task/attempts-test.js     |  46 +++
 .../tests/unit/routes/task/counters-test.js     |  46 +++
 .../webapp/tests/unit/routes/task/index-test.js |  46 +++
 .../webapp/tests/unit/routes/vertex-test.js     |  32 ++
 .../tests/unit/routes/vertex/attempts-test.js   |  46 +++
 .../tests/unit/routes/vertex/counters-test.js   |  46 +++
 .../tests/unit/routes/vertex/index-test.js      |  46 +++
 .../tests/unit/routes/vertex/tasks-test.js      |  51 +++
 .../tests/unit/serializers/ahs-app-test.js      |  34 ++
 .../webapp/tests/unit/serializers/am-test.js    |  30 ++
 .../tests/unit/serializers/app-rm-test.js       |  30 ++
 .../webapp/tests/unit/serializers/app-test.js   |  31 ++
 .../tests/unit/serializers/attempt-am-test.js   |  31 ++
 .../tests/unit/serializers/attempt-test.js      |  31 ++
 .../tests/unit/serializers/dag-am-test.js       |  30 ++
 .../webapp/tests/unit/serializers/dag-test.js   | 131 ++++++
 .../tests/unit/serializers/loader-test.js       | 193 +++++++++
 .../webapp/tests/unit/serializers/rm-test.js    |  30 ++
 .../tests/unit/serializers/task-am-test.js      |  31 ++
 .../webapp/tests/unit/serializers/task-test.js  |  31 ++
 .../tests/unit/serializers/timeline-test.js     |  41 ++
 .../tests/unit/serializers/vertex-am-test.js    |  31 ++
 .../tests/unit/serializers/vertex-test.js       |  49 +++
 .../main/webapp/tests/unit/services/env-test.js |  80 ++++
 .../webapp/tests/unit/services/hosts-test.js    |  77 ++++
 .../webapp/tests/unit/services/loader-test.js   | 257 ++++++++++++
 .../tests/unit/services/local-storage-test.js   |  42 ++
 .../webapp/tests/unit/services/pollster-test.js |  29 ++
 .../webapp/tests/unit/transforms/object-test.js |  30 ++
 .../utils/counter-column-definition-test.js     | 124 ++++++
 .../tests/unit/utils/download-dag-zip-test.js   |  26 ++
 .../main/webapp/tests/unit/utils/misc-test.js   |  26 ++
 383 files changed, 19709 insertions(+)
----------------------------------------------------------------------



[18/24] tez git commit: TEZ-3103. Shuffle can hang when memory to memory merging enabled (jlowe)

Posted by sr...@apache.org.
TEZ-3103. Shuffle can hang when memory to memory merging enabled (jlowe)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a2c590bc
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a2c590bc
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a2c590bc

Branch: refs/heads/TEZ-2980
Commit: a2c590bcb00de093e8365c4c423d5014777cfacf
Parents: 7e3d546
Author: Jason Lowe <jl...@apache.org>
Authored: Fri Feb 12 18:19:29 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Fri Feb 12 18:19:29 2016 +0000

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../shuffle/orderedgrouped/MergeManager.java    | 21 ++++++
 .../orderedgrouped/TestMergeManager.java        | 74 ++++++++++++++++++++
 3 files changed, 97 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a2c590bc/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8cb7505..5f09280 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3103. Shuffle can hang when memory to memory merging enabled
   TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime).
   TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
   TEZ-3090. MRInput should make dagIdentifier, vertexIdentifier, etc available to the InputFormat jobConf.
@@ -331,6 +332,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3103. Shuffle can hang when memory to memory merging enabled
   TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
   TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
   TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.

http://git-wip-us.apache.org/repos/asf/tez/blob/a2c590bc/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
index dfa509f..b56a9a8 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
@@ -500,6 +500,12 @@ public class MergeManager implements FetchedInputAllocatorOrderedGrouped {
     LOG.info("closeInMemoryMergedFile -> size: " + mapOutput.getSize() +
              ", inMemoryMergedMapOutputs.size() -> " + 
              inMemoryMergedMapOutputs.size());
+
+    commitMemory += mapOutput.getSize();
+
+    if (commitMemory >= mergeThreshold) {
+      startMemToDiskMerge();
+    }
   }
 
   @Override
@@ -1155,4 +1161,19 @@ public class MergeManager implements FetchedInputAllocatorOrderedGrouped {
                  comparator, progressable, spilledRecordsCounter, null,
                  additionalBytesRead, null);
   }
+
+  @VisibleForTesting
+  long getCommitMemory() {
+    return commitMemory;
+  }
+
+  @VisibleForTesting
+  long getUsedMemory() {
+    return usedMemory;
+  }
+
+  @VisibleForTesting
+  void waitForMemToMemMerge() throws InterruptedException {
+    memToMemMerger.waitForMerge();
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a2c590bc/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java
index b8f99de..c62c116 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java
@@ -29,12 +29,15 @@ import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.verify;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.UUID;
 
 import com.google.common.collect.Sets;
+
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 import org.slf4j.Logger;
@@ -52,6 +55,7 @@ import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
+import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
 import org.apache.tez.runtime.library.common.sort.impl.IFile;
 import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
 import org.junit.After;
@@ -174,6 +178,76 @@ public class TestMergeManager {
     Assert.assertTrue(mergeManager.postMergeMemLimit == initialMemoryAvailable);
   }
 
+  @Test(timeout=20000)
+  public void testIntermediateMemoryMergeAccounting() throws Exception {
+    Configuration conf = new TezConfiguration(defaultConf);
+    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
+    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
+    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
+    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
+    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 2);
+
+    Path localDir = new Path(workDir, "local");
+    Path srcDir = new Path(workDir, "srcData");
+    localFs.mkdirs(localDir);
+    localFs.mkdirs(srcDir);
+
+    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
+
+    FileSystem localFs = FileSystem.getLocal(conf);
+    LocalDirAllocator localDirAllocator =
+        new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
+    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
+
+    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
+
+    MergeManager mergeManager =
+        new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null,
+            exceptionReporter, 2000000, null, false, -1);
+    mergeManager.configureAndStart();
+
+    assertEquals(0, mergeManager.getUsedMemory());
+    assertEquals(0, mergeManager.getCommitMemory());
+
+    byte[] data1 = generateData(conf, 10);
+    byte[] data2 = generateData(conf, 20);
+    MapOutput firstMapOutput = mergeManager.reserve(null, data1.length, data1.length, 0);
+    MapOutput secondMapOutput = mergeManager.reserve(null, data2.length, data2.length, 0);
+    assertEquals(MapOutput.Type.MEMORY, firstMapOutput.getType());
+    assertEquals(MapOutput.Type.MEMORY, secondMapOutput.getType());
+    assertEquals(0, mergeManager.getCommitMemory());
+    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
+
+    System.arraycopy(data1, 0, firstMapOutput.getMemory(), 0, data1.length);
+    System.arraycopy(data2, 0, secondMapOutput.getMemory(), 0, data2.length);
+
+    secondMapOutput.commit();
+    assertEquals(data2.length, mergeManager.getCommitMemory());
+    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
+    firstMapOutput.commit();
+
+    mergeManager.waitForMemToMemMerge();
+    assertEquals(data1.length + data2.length, mergeManager.getCommitMemory());
+    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
+  }
+
+  private byte[] generateData(Configuration conf, int numEntries) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    FSDataOutputStream fsdos = new FSDataOutputStream(baos, null);
+    IFile.Writer writer =
+        new IFile.Writer(conf, fsdos, IntWritable.class, IntWritable.class, null, null, null);
+    for (int i = 0; i < numEntries; ++i) {
+      writer.append(new IntWritable(i), new IntWritable(i));
+    }
+    writer.close();
+    int compressedLength = (int)writer.getCompressedLength();
+    int rawLength = (int)writer.getRawLength();
+    byte[] data = new byte[rawLength];
+    ShuffleUtils.shuffleToMemory(data, new ByteArrayInputStream(baos.toByteArray()),
+        rawLength, compressedLength, null, false, 0, LOG, "sometask");
+    return data;
+  }
+
   class InterruptingThread implements Runnable {
 
     MergeManager.OnDiskMerger mergeThread;


[16/24] tez git commit: TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier

Posted by sr...@apache.org.
TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/6f57630e
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/6f57630e
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/6f57630e

Branch: refs/heads/TEZ-2980
Commit: 6f57630ee5d5a1a02342a25e4b48a698fd237390
Parents: 99c85d3
Author: Jonathan Eagles <je...@yahoo-inc.com>
Authored: Thu Feb 11 10:33:52 2016 -0600
Committer: Jonathan Eagles <je...@yahoo-inc.com>
Committed: Thu Feb 11 10:33:52 2016 -0600

----------------------------------------------------------------------
 CHANGES.txt                                                        | 2 ++
 .../tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java | 2 ++
 2 files changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/6f57630e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c769843..61aaaa7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
   TEZ-3090. MRInput should make dagIdentifier, vertexIdentifier, etc available to the InputFormat jobConf.
   TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
   TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.
@@ -329,6 +330,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
   TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
   TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.
   TEZ-2307. Possible wrong error message when submitting new dag.

http://git-wip-us.apache.org/repos/asf/tez/blob/6f57630e/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
index de3b2cb..db43651 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java
@@ -116,6 +116,8 @@ public class Shuffle implements ExceptionReporter {
       Class<? extends CompressionCodec> codecClass =
           ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class);
       codec = ReflectionUtils.newInstance(codecClass, conf);
+      // Work around needed for HADOOP-12191. Avoids the native initialization synchronization race
+      codec.getDecompressorType();
     } else {
       codec = null;
     }


[20/24] tez git commit: TEZ-3101. Tez UI: Task attempt log link doesn't have the correct protocol. (sree)

Posted by sr...@apache.org.
TEZ-3101. Tez UI: Task attempt log link doesn't have the correct protocol. (sree)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/fec46aa9
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/fec46aa9
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/fec46aa9

Branch: refs/heads/TEZ-2980
Commit: fec46aa960d8a9ba8e82af528380cb2931aa16e6
Parents: e24bf44
Author: Sreenath Somarajapuram <sr...@apache.org>
Authored: Wed Feb 17 12:48:54 2016 +0530
Committer: Sreenath Somarajapuram <sr...@apache.org>
Committed: Wed Feb 17 12:48:54 2016 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 tez-ui/src/main/webapp/app/scripts/configs.js   |  8 ++++++
 .../src/main/webapp/app/scripts/helpers/misc.js | 27 ++++++++++++--------
 .../components/basic-table/logs-cell.hbs        |  2 +-
 4 files changed, 26 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/fec46aa9/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8bd6f73..e2f77f6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -26,6 +26,7 @@ ALL CHANGES:
   TEZ-2937. Can Processor.close() be called after closing inputs and outputs?
   TEZ-3037. History URL should be set regardless of which history logging service is enabled.
   TEZ-3032. DAG start time getting logged using system time instead of recorded time in startTime field.
+  TEZ-3101. Tez UI: Task attempt log link doesn't have the correct protocol.
 
 Release 0.8.2: 2016-01-19
 

http://git-wip-us.apache.org/repos/asf/tez/blob/fec46aa9/tez-ui/src/main/webapp/app/scripts/configs.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/configs.js b/tez-ui/src/main/webapp/app/scripts/configs.js
index b4dafc7..49046a5 100644
--- a/tez-ui/src/main/webapp/app/scripts/configs.js
+++ b/tez-ui/src/main/webapp/app/scripts/configs.js
@@ -43,6 +43,14 @@ App.setConfigs({
      * For configuration see http://momentjs.com/timezone/docs/
      */
     //timezone: "UTC",
+
+    /*
+     * yarnProtocol:
+     * If specified, this protocol would be used to construct node manager log links.
+     * Possible values: http, https
+     * Default value: If not specified, protocol of RMWebUrl will be used
+     */
+    //yarnProtocol: "<value>",
   },
 
   /*

http://git-wip-us.apache.org/repos/asf/tez/blob/fec46aa9/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/scripts/helpers/misc.js b/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
index 8e19686..6f091ee 100644
--- a/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
+++ b/tez-ui/src/main/webapp/app/scripts/helpers/misc.js
@@ -638,7 +638,7 @@ App.Helpers.misc = {
    * @param queryParams {Object} Params to be added
    * @return modified path
    */
-  modifyUrl: function (url, path, queryParams) {
+  modifyUrl: function (url, path, queryParams, protocol) {
     var urlParts = url.split('?'),
         params = {};
 
@@ -666,7 +666,13 @@ App.Helpers.misc = {
 
     urlParts[0] += path || '';
 
-    return urlParts[1] ? '%@?%@'.fmt(urlParts[0], urlParts[1]) : urlParts[0];
+    url = urlParts[1] ? '%@?%@'.fmt(urlParts[0], urlParts[1]) : urlParts[0];
+
+    if(url.indexOf("://") === -1 && protocol) {
+      url = "%@://%@".fmt(protocol, url);
+    }
+
+    return url;
   },
 
   constructLogLinks: function (attempt, yarnAppState, amUser) {
@@ -675,18 +681,20 @@ App.Helpers.misc = {
         logLinks = {},
         params = amUser ? {
           "user.name": amUser
-        } : {};
+        } : {},
+        RMWebUrl = App.env.RMWebUrl || "",
+        currentProtocol = location.protocol,
+        protocol = App.env.yarnProtocol ||
+            RMWebUrl.substr(0, RMWebUrl.indexOf("://")) ||
+            currentProtocol.substr(0, currentProtocol.length - 1);
 
     if(attempt) {
-      link = attempt.get('inProgressLog') || attempt.get('completedLog');
+      link = attempt.get('inProgressLog');
       if(link) {
         if(!link.match("/syslog_")) {
           path = "/syslog_" + attempt.get('id');
-          if(amUser) {
-            path += "/" + amUser;
-          }
         }
-        logLinks.viewUrl = App.Helpers.misc.modifyUrl(link, path, params);
+        logLinks.viewUrl = App.Helpers.misc.modifyUrl(link, path, params, protocol);
       }
 
       link = attempt.get('completedLog');
@@ -695,9 +703,6 @@ App.Helpers.misc = {
 
         if(!link.match("/syslog_")) {
           path = "/syslog_" + attempt.get('id');
-          if(amUser) {
-            path += "/" + amUser;
-          }
         }
 
         logLinks.downloadUrl = App.Helpers.misc.modifyUrl(link, path, params);

http://git-wip-us.apache.org/repos/asf/tez/blob/fec46aa9/tez-ui/src/main/webapp/app/templates/components/basic-table/logs-cell.hbs
----------------------------------------------------------------------
diff --git a/tez-ui/src/main/webapp/app/templates/components/basic-table/logs-cell.hbs b/tez-ui/src/main/webapp/app/templates/components/basic-table/logs-cell.hbs
index 4745d78..c46cc13 100644
--- a/tez-ui/src/main/webapp/app/templates/components/basic-table/logs-cell.hbs
+++ b/tez-ui/src/main/webapp/app/templates/components/basic-table/logs-cell.hbs
@@ -20,7 +20,7 @@
   <i class="waiting"></i>&nbsp;
 {{else}}
   {{#if view.cellContent.viewUrl}}
-    <a target="_blank" href="//{{unbound view.cellContent.viewUrl}}">View</a>
+    <a target="_blank" href="{{unbound view.cellContent.viewUrl}}">View</a>
     &nbsp;
   {{/if}}
   {{#if view.cellContent.downloadUrl}}


[04/24] tez git commit: TEZ-3036. Tez AM can hang on startup with no indication of error (jlowe)

Posted by sr...@apache.org.
TEZ-3036. Tez AM can hang on startup with no indication of error (jlowe)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/92def52f
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/92def52f
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/92def52f

Branch: refs/heads/TEZ-2980
Commit: 92def52ff8b02eab7aae38170ca6c9b0caf83ef7
Parents: ca447ba
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Jan 21 20:28:17 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Jan 21 20:28:17 2016 +0000

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../org/apache/tez/dag/app/DAGAppMaster.java    | 48 ++++++++++++++------
 2 files changed, 35 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/92def52f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index bec7dd4..4ffcf13 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3036. Tez AM can hang on startup with no indication of error
   TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED
   TEZ-2594. Fix LICENSE for missing entries for full and minimal tarballs.
   TEZ-3053. Containers timeout if they do not receive a task within the container timeout interval.
@@ -319,6 +320,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3036. Tez AM can hang on startup with no indication of error
   TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED
   TEZ-2937. Can Processor.close() be called after closing inputs and outputs?
   TEZ-3037. History URL should be set regardless of which history logging service is enabled.

http://git-wip-us.apache.org/repos/asf/tez/blob/92def52f/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 609a018..c16bdb9 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -1732,7 +1732,18 @@ public class DAGAppMaster extends AbstractService {
         LOG.debug("Service dependency: " + dependency.getName() + " notify" +
                   " for service: " + service.getName());
       }
-      if (dependency.isInState(Service.STATE.STARTED)) {
+      Throwable dependencyError = dependency.getFailureCause();
+      if (dependencyError != null) {
+        synchronized(this) {
+          dependenciesFailed = true;
+          if(LOG.isDebugEnabled()) {
+            LOG.debug("Service: " + service.getName() + " will fail to start"
+                + " as dependent service " + dependency.getName()
+                + " failed to start: " + dependencyError);
+          }
+          this.notifyAll();
+        }
+      } else if (dependency.isInState(Service.STATE.STARTED)) {
         if(dependenciesStarted.incrementAndGet() == dependencies.size()) {
           synchronized(this) {
             if(LOG.isDebugEnabled()) {
@@ -1742,17 +1753,6 @@ public class DAGAppMaster extends AbstractService {
             this.notifyAll();
           }
         }
-      } else if (!service.isInState(Service.STATE.STARTED)
-          && dependency.getFailureState() != null) {
-        synchronized(this) {
-          dependenciesFailed = true;
-          if(LOG.isDebugEnabled()) {
-            LOG.debug("Service: " + service.getName() + " will fail to start"
-                + " as dependent service " + dependency.getName()
-                + " failed to start");
-          }
-          this.notifyAll();
-        }
       }
     }
 
@@ -1786,9 +1786,12 @@ public class DAGAppMaster extends AbstractService {
 
   private static class ServiceThread extends Thread {
     final ServiceWithDependency serviceWithDependency;
-    Throwable error = null;
-    public ServiceThread(ServiceWithDependency serviceWithDependency) {
+    final Map<Service, ServiceWithDependency> services;
+    volatile Throwable error = null;
+    public ServiceThread(ServiceWithDependency serviceWithDependency,
+        Map<Service, ServiceWithDependency> services) {
       this.serviceWithDependency = serviceWithDependency;
+      this.services = services;
       this.setName("ServiceThread:" + serviceWithDependency.service.getName());
     }
 
@@ -1800,7 +1803,14 @@ public class DAGAppMaster extends AbstractService {
       try {
         serviceWithDependency.start();
       } catch (Throwable t) {
+        // AbstractService does not notify listeners if something throws, so
+        // notify dependent services explicitly to prevent hanging.
+        // AbstractService only records fault causes for exceptions, not
+        // errors, so dependent services will proceed thinking startup
+        // succeeded if an error is thrown. The error will be noted when the
+        // main thread joins the ServiceThread.
         error = t;
+        notifyDependentServices();
       } finally {
         if(LOG.isDebugEnabled()) {
           LOG.debug("Service: " + serviceWithDependency.service.getName() +
@@ -1812,6 +1822,14 @@ public class DAGAppMaster extends AbstractService {
             + serviceWithDependency.service.getName());
       }
     }
+
+    private void notifyDependentServices() {
+      for (ServiceWithDependency otherSvc : services.values()) {
+        if (otherSvc.dependencies.contains(serviceWithDependency.service)) {
+          otherSvc.stateChanged(serviceWithDependency.service);
+        }
+      }
+    }
   }
 
   void startServices(){
@@ -1824,7 +1842,7 @@ public class DAGAppMaster extends AbstractService {
       for(ServiceWithDependency sd : services.values()) {
         // start the service. If this fails that service
         // will be stopped and an exception raised
-        ServiceThread st = new ServiceThread(sd);
+        ServiceThread st = new ServiceThread(sd, services);
         threads.add(st);
       }
 


[05/24] tez git commit: TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services (zjffdu)

Posted by sr...@apache.org.
TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services (zjffdu)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/2bf27de3
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/2bf27de3
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/2bf27de3

Branch: refs/heads/TEZ-2980
Commit: 2bf27de3bfbe7a54fbf2982fa50ab6b32974d6b1
Parents: 92def52
Author: Jeff Zhang <zj...@apache.org>
Authored: Thu Jan 21 21:32:59 2016 -0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Thu Jan 21 21:38:40 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 ++
 .../tez/dag/app/dag/impl/TaskAttemptImpl.java   | 24 +++++++++++++-------
 2 files changed, 18 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/2bf27de3/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 4ffcf13..91c86f8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.
   TEZ-3036. Tez AM can hang on startup with no indication of error
   TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED
   TEZ-2594. Fix LICENSE for missing entries for full and minimal tarballs.
@@ -320,6 +321,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.
   TEZ-3036. Tez AM can hang on startup with no indication of error
   TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED
   TEZ-2937. Can Processor.close() be called after closing inputs and outputs?

http://git-wip-us.apache.org/repos/asf/tez/blob/2bf27de3/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
index dda4891..0affff2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java
@@ -1785,15 +1785,23 @@ public class TaskAttemptImpl implements TaskAttempt,
   public void setLastEventSent(TezEvent lastEventSent) {
     writeLock.lock();
     try {
-      DataEventDependencyInfo info = new DataEventDependencyInfo(
+      // TEZ-3066 ideally Heartbeat just happens in FAIL_IN_PROGRESS & KILL_IN_PROGRESS,
+      // add other states here just in case. create TEZ-3068 for a more elegant solution.
+      if (!EnumSet.of(TaskAttemptStateInternal.FAIL_IN_PROGRESS,
+        TaskAttemptStateInternal.KILL_IN_PROGRESS,
+        TaskAttemptStateInternal.FAILED,
+        TaskAttemptStateInternal.KILLED,
+        TaskAttemptStateInternal.SUCCEEDED).contains(getInternalState())) {
+        DataEventDependencyInfo info = new DataEventDependencyInfo(
           lastEventSent.getEventReceivedTime(), lastEventSent.getSourceInfo().getTaskAttemptID());
-      // task attempt id may be null for input data information events
-      if (appendNextDataEvent) {
-        appendNextDataEvent = false;
-        lastDataEvents.add(info);
-      } else {
-        // over-write last event - array list makes it quick
-        lastDataEvents.set(lastDataEvents.size() - 1, info);
+        // task attempt id may be null for input data information events
+        if (appendNextDataEvent) {
+          appendNextDataEvent = false;
+          lastDataEvents.add(info);
+        } else {
+          // over-write last event - array list makes it quick
+          lastDataEvents.set(lastDataEvents.size() - 1, info);
+        }
       }
     } finally {
       writeLock.unlock();


[21/24] tez git commit: TEZ-3029. Add an onError method to service plugin contexts. (sseth)

Posted by sr...@apache.org.
http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
index 28670ff..fd56495 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java
@@ -84,6 +84,7 @@ import org.apache.tez.dag.app.dag.event.DAGAppMasterEventDAGFinished;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType;
 import org.apache.tez.dag.app.dag.event.DAGEvent;
 import org.apache.tez.dag.app.dag.event.DAGEventStartDag;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
 import org.apache.tez.dag.app.dag.event.DAGEventType;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
@@ -711,11 +712,11 @@ public class TestCommit {
         TaskState.SUCCEEDED));
     Assert.assertEquals(VertexState.COMMITTING, v1.getState());
     // kill dag which will trigger the vertex killed event
-    dag.handle(new DAGEvent(dag.getID(), DAGEventType.DAG_KILL));
+    dag.handle(new DAGEventTerminateDag(dag.getID(), DAGTerminationCause.DAG_KILL, null));
     dispatcher.await();
     Assert.assertEquals(VertexState.KILLED, v1.getState());
     Assert.assertTrue(v1.commitFutures.isEmpty());
-    Assert.assertEquals(VertexTerminationCause.DAG_KILL,
+    Assert.assertEquals(VertexTerminationCause.DAG_TERMINATED,
         v1.getTerminationCause());
     Assert.assertEquals(DAGState.KILLED, dag.getState());
     Assert
@@ -1514,10 +1515,20 @@ public class TestCommit {
     // Assert.assertEquals(0, v3OutputCommitter.abortCounter);
   }
 
-  // Kill dag while it is in COMMITTING in the case of
-  // TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS is true
+
   @Test(timeout = 5000)
   public void testDAGKilledWhileCommitting1_OnDAGSuccess() throws Exception {
+    _testDAGTerminatedWhileCommitting1_OnDAGSuccess(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testServiceErrorWhileCommitting1_OnDAGSuccess() throws Exception {
+    _testDAGTerminatedWhileCommitting1_OnDAGSuccess(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
+  // Kill dag while it is in COMMITTING in the case of
+  // TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS is true
+  private void _testDAGTerminatedWhileCommitting1_OnDAGSuccess(DAGTerminationCause terminationCause) throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         true);
     setupDAG(createDAGPlan(true, true));
@@ -1534,14 +1545,14 @@ public class TestCommit {
     v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(),
         TaskState.SUCCEEDED));
     waitUntil(dag, DAGState.COMMITTING);
-    dag.handle(new DAGEvent(dag.getID(), DAGEventType.DAG_KILL));
-    waitUntil(dag, DAGState.KILLED);
+    dag.handle(new DAGEventTerminateDag(dag.getID(), terminationCause, null));
+    waitUntil(dag, terminationCause.getFinishedState());
 
     Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v3.getState());
     Assert
-        .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+        .assertEquals(terminationCause, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
     historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);
     historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
@@ -1569,10 +1580,20 @@ public class TestCommit {
     Assert.assertEquals(1, v3OutputCommitter.abortCounter);
   }
 
-  // Kill dag while it is in COMMITTING in the case of
-  // TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS is false
+
   @Test(timeout = 5000)
   public void testDAGKilledWhileCommitting1_OnVertexSuccess() throws Exception {
+    _testDAGTerminatedWhileCommitting1_OnVertexSuccess(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testServiceErrorWhileCommitting1_OnVertexSuccess() throws Exception {
+    _testDAGTerminatedWhileCommitting1_OnVertexSuccess(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
+  // Kill dag while it is in COMMITTING in the case of
+  // TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS is false
+  private void _testDAGTerminatedWhileCommitting1_OnVertexSuccess(DAGTerminationCause terminationCause) throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         false);
     setupDAG(createDAGPlan(true, true));
@@ -1596,15 +1617,15 @@ public class TestCommit {
     v3OutputCommitter.unblockCommit();
     // dag go to COMMITTING due to the pending commit of v12Out
     waitUntil(dag, DAGState.COMMITTING);
-    dag.handle(new DAGEvent(dag.getID(), DAGEventType.DAG_KILL));
-    waitUntil(dag, DAGState.KILLED);
+    dag.handle(new DAGEventTerminateDag(dag.getID(), terminationCause, null));
+    waitUntil(dag, terminationCause.getFinishedState());
 
     Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v3.getState());
-    Assert.assertEquals(DAGState.KILLED, dag.getState());
+    Assert.assertEquals(terminationCause.getFinishedState(), dag.getState());
     Assert
-        .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+        .assertEquals(terminationCause, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
     historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 1);
     historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
@@ -1631,9 +1652,18 @@ public class TestCommit {
     Assert.assertEquals(1, v3OutputCommitter.abortCounter);
   }
 
-  // DAG killed while dag is still in RUNNING and vertex is in COMMITTING
   @Test(timeout = 5000)
   public void testDAGKilledWhileRunning_OnVertexSuccess() throws Exception {
+    _testDAGKilledWhileRunning_OnVertexSuccess(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testServiceErrorWhileRunning_OnVertexSuccess() throws Exception {
+    _testDAGKilledWhileRunning_OnVertexSuccess(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
+  // DAG killed while dag is still in RUNNING and vertex is in COMMITTING
+  private void _testDAGKilledWhileRunning_OnVertexSuccess(DAGTerminationCause terminationCause) throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         false);
     setupDAG(createDAGPlan(true, true));
@@ -1652,17 +1682,17 @@ public class TestCommit {
     Assert.assertEquals(VertexState.COMMITTING, v3.getState());
     // dag is still in RUNNING because v3 has not completed
     Assert.assertEquals(DAGState.RUNNING, dag.getState());
-    dag.handle(new DAGEvent(dag.getID(), DAGEventType.DAG_KILL));
-    waitUntil(dag, DAGState.KILLED);
+    dag.handle(new DAGEventTerminateDag(dag.getID(), terminationCause, null));
+    waitUntil(dag, terminationCause.getFinishedState());
 
     Assert.assertEquals(VertexState.SUCCEEDED, v1.getState());
     Assert.assertEquals(VertexState.SUCCEEDED, v2.getState());
     Assert.assertEquals(VertexState.KILLED, v3.getState());
-    Assert.assertEquals(VertexTerminationCause.DAG_KILL, v3.getTerminationCause());
+    Assert.assertEquals(VertexTerminationCause.DAG_TERMINATED, v3.getTerminationCause());
     Assert.assertTrue(v3.commitFutures.isEmpty());
-    Assert.assertEquals(DAGState.KILLED, dag.getState());
+    Assert.assertEquals(terminationCause.getFinishedState(), dag.getState());
     Assert
-        .assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+        .assertEquals(terminationCause, dag.getTerminationCause());
     Assert.assertTrue(dag.commitFutures.isEmpty());
     // commit uv12 may not have started, so can't verify the VertexGroupCommitStartedEvent
     historyEventHandler.verifyVertexGroupCommitFinishedEvent("uv12", 0);
@@ -1903,10 +1933,19 @@ public class TestCommit {
     Assert.assertEquals(1, v3OutputCommitter.abortCounter);
   }
 
-  // test commit will be canceled no matter it is started or still in the threadpool
-  // ControlledThreadPoolExecutor is used for to not schedule the commits
   @Test(timeout = 5000)
   public void testCommitCanceled_OnDAGSuccess() throws Exception {
+    _testCommitCanceled_OnDAGSuccess(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testCommitCanceled_OnDAGSuccess2() throws Exception {
+    _testCommitCanceled_OnDAGSuccess(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
+  // test commit will be canceled no matter it is started or still in the threadpool
+  // ControlledThreadPoolExecutor is used for to not schedule the commits
+  private void _testCommitCanceled_OnDAGSuccess(DAGTerminationCause terminationCause) throws Exception {
     conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS,
         true);
     setupDAG(createDAGPlan(true, true));
@@ -1931,10 +1970,10 @@ public class TestCommit {
     // mean the commits have been submitted to ThreadPool
     Assert.assertEquals(2, dag.commitFutures.size());
 
-    dag.handle(new DAGEvent(dag.getID(), DAGEventType.DAG_KILL));
-    waitUntil(dag, DAGState.KILLED);
+    dag.handle(new DAGEventTerminateDag(dag.getID(), terminationCause, null));
+    waitUntil(dag, terminationCause.getFinishedState());
     
-    Assert.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+    Assert.assertEquals(terminationCause, dag.getTerminationCause());
     // mean the commits have been canceled
     Assert.assertTrue(dag.commitFutures.isEmpty());
     historyEventHandler.verifyVertexGroupCommitStartedEvent("uv12", 0);

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
index 2158368..480e3cf 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java
@@ -21,7 +21,6 @@ package org.apache.tez.dag.app.dag.impl;
 import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
@@ -41,6 +40,7 @@ import java.util.concurrent.locks.ReentrantLock;
 import org.apache.commons.lang.StringUtils;
 import org.apache.tez.common.counters.Limits;
 import org.apache.tez.common.counters.TezCounters;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
 import org.apache.tez.hadoop.shim.DefaultHadoopShim;
 import org.apache.tez.hadoop.shim.HadoopShim;
 import org.slf4j.Logger;
@@ -1641,8 +1641,7 @@ public class TestDAGImpl {
     startDAG(dag);
     dispatcher.await();
 
-    dispatcher.getEventHandler().handle(
-        new DAGEvent(dagId, DAGEventType.DAG_KILL));
+    dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dagId, DAGTerminationCause.DAG_KILL, null));
     dispatcher.await();
 
     Assert.assertEquals(DAGState.KILLED, dag.getState());
@@ -1654,9 +1653,18 @@ public class TestDAGImpl {
 
   }
 
-  @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
   public void testKillRunningDAG() {
+    _testTerminateRunningDAG(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testServiceErrorRunningDAG() {
+    _testTerminateRunningDAG(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
+  @SuppressWarnings("unchecked")
+  private void _testTerminateRunningDAG(DAGTerminationCause terminationCause) {
     initDAG(dag);
     startDAG(dag);
     dispatcher.await();
@@ -1674,7 +1682,7 @@ public class TestDAGImpl {
     Assert.assertEquals(VertexState.SUCCEEDED, v0.getState());
     Assert.assertEquals(VertexState.RUNNING, v1.getState());
 
-    dispatcher.getEventHandler().handle(new DAGEvent(dagId, DAGEventType.DAG_KILL));
+    dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dagId, terminationCause, null));
     dispatcher.await();
 
     Assert.assertEquals(DAGState.TERMINATING, dag.getState());
@@ -1817,7 +1825,7 @@ public class TestDAGImpl {
       dispatcher.getEventHandler().handle(new DAGEventVertexCompleted(
           TezVertexID.getInstance(dagId, 5), VertexState.FAILED));
     } else if (testState == DAGStatus.State.KILLED) {
-      dispatcher.getEventHandler().handle(new DAGEvent(dagId, DAGEventType.DAG_KILL));
+      dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dagId, DAGTerminationCause.DAG_KILL, null));
     } else if (testState == DAGStatus.State.ERROR) {
       dispatcher.getEventHandler().handle(new DAGEventStartDag(dagId, new LinkedList<URL>()));
     } else {
@@ -1871,11 +1879,21 @@ public class TestDAGImpl {
     }
   }
 
+
+  @Test(timeout = 5000)
+  public void testDAGKill() {
+    _testDAGTerminate(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testDAGServiceError() {
+    _testDAGTerminate(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
   // Couple of vertices succeed. DAG_KILLED processed, which causes the rest of the vertices to be
   // marked as KILLED.
   @SuppressWarnings("unchecked")
-  @Test(timeout = 5000)
-  public void testDAGKill() {
+  private void _testDAGTerminate(DAGTerminationCause terminationCause) {
     initDAG(dag);
     startDAG(dag);
     dispatcher.await();
@@ -1887,10 +1905,10 @@ public class TestDAGImpl {
 
     dispatcher.getEventHandler().handle(new DAGEventVertexCompleted(
         TezVertexID.getInstance(dagId, 1), VertexState.SUCCEEDED));
-    dispatcher.getEventHandler().handle(new DAGEvent(dagId, DAGEventType.DAG_KILL));
+    dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dagId, terminationCause, null));
     dispatcher.await();
-    Assert.assertEquals(DAGState.KILLED, dag.getState());
-    Assert.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+    Assert.assertEquals(terminationCause.getFinishedState(), dag.getState());
+    Assert.assertEquals(terminationCause, dag.getTerminationCause());
     Assert.assertEquals(2, dag.getSuccessfulVertices());
 
     int killedCount = 0;
@@ -1902,16 +1920,25 @@ public class TestDAGImpl {
     Assert.assertEquals(4, killedCount);
 
     for (Vertex v : dag.getVertices().values()) {
-      Assert.assertEquals(VertexTerminationCause.DAG_KILL, v.getTerminationCause());
+      Assert.assertEquals(VertexTerminationCause.DAG_TERMINATED, v.getTerminationCause());
     }
 
     Assert.assertEquals(1, dagFinishEventHandler.dagFinishEvents);
   }
 
+  @Test(timeout = 5000)
+  public void testDAGKillVertexSuccessAfterTerminated() {
+    _testDAGKillVertexSuccessAfterTerminated(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testDAGServiceErrorVertexSuccessAfterTerminated() {
+    _testDAGKillVertexSuccessAfterTerminated(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
   // Vertices succeed after a DAG kill has been processed. Should be ignored.
   @SuppressWarnings("unchecked")
-  @Test(timeout = 5000)
-  public void testDAGKillVertexSuccessAfterKill() {
+  private void _testDAGKillVertexSuccessAfterTerminated(DAGTerminationCause terminationCause) {
     initDAG(dag);
     startDAG(dag);
     dispatcher.await();
@@ -1923,10 +1950,10 @@ public class TestDAGImpl {
 
     dispatcher.getEventHandler().handle(new DAGEventVertexCompleted(
         TezVertexID.getInstance(dagId, 1), VertexState.SUCCEEDED));
-    dispatcher.getEventHandler().handle(new DAGEvent(dagId, DAGEventType.DAG_KILL));
+    dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dagId, terminationCause, null));
     dispatcher.await();
 
-    Assert.assertEquals(DAGState.KILLED, dag.getState());
+    Assert.assertEquals(terminationCause.getFinishedState(), dag.getState());
 
     // Vertex SUCCESS gets processed after the DAG has reached the KILLED state. Should be ignored.
     for (int i = 2; i < 6; ++i) {
@@ -1943,18 +1970,27 @@ public class TestDAGImpl {
     }
     Assert.assertEquals(4, killedCount);
 
-    Assert.assertEquals(DAGTerminationCause.DAG_KILL, dag.getTerminationCause());
+    Assert.assertEquals(terminationCause, dag.getTerminationCause());
     Assert.assertEquals(2, dag.getSuccessfulVertices());
     for (Vertex v : dag.getVertices().values()) {
-      Assert.assertEquals(VertexTerminationCause.DAG_KILL, v.getTerminationCause());
+      Assert.assertEquals(VertexTerminationCause.DAG_TERMINATED, v.getTerminationCause());
     }
     Assert.assertEquals(1, dagFinishEventHandler.dagFinishEvents);
   }
 
-  // Vertex KILLED after a DAG_KILLED is issued. Termination reason should be DAG_KILLED
-  @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
   public void testDAGKillPending() {
+    _testDAGKillPending(DAGTerminationCause.DAG_KILL);
+  }
+
+  @Test(timeout = 5000)
+  public void testDAGServiceErrorPending() {
+    _testDAGKillPending(DAGTerminationCause.SERVICE_PLUGIN_ERROR);
+  }
+
+  // Vertex KILLED after a DAG_KILLED is issued. Termination reason should be DAG_KILLED
+  @SuppressWarnings("unchecked")
+  private void _testDAGKillPending(DAGTerminationCause terminationCause) {
     initDAG(dag);
     startDAG(dag);
     dispatcher.await();
@@ -1972,17 +2008,17 @@ public class TestDAGImpl {
           TezVertexID.getInstance(dagId, i), VertexState.SUCCEEDED));
     }
     dispatcher.await();
-    dispatcher.getEventHandler().handle(new DAGEvent(dagId, DAGEventType.DAG_KILL));
+    dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dagId, terminationCause, null));
     dispatcher.await();
-    Assert.assertEquals(DAGState.KILLED, dag.getState());
+    Assert.assertEquals(terminationCause.getFinishedState(), dag.getState());
 
     dispatcher.getEventHandler().handle(new DAGEventVertexCompleted(
         TezVertexID.getInstance(dagId, 5), VertexState.KILLED));
     dispatcher.await();
-    Assert.assertEquals(DAGState.KILLED, dag.getState());
+    Assert.assertEquals(terminationCause.getFinishedState(), dag.getState());
     Assert.assertEquals(5, dag.getSuccessfulVertices());
     Assert.assertEquals(dag.getVertex(TezVertexID.getInstance(dagId, 5)).getTerminationCause(),
-        VertexTerminationCause.DAG_KILL);
+        VertexTerminationCause.DAG_TERMINATED);
     Assert.assertEquals(1, dagFinishEventHandler.dagFinishEvents);
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
index 986f64d..659d099 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java
@@ -143,7 +143,6 @@ import org.apache.tez.dag.app.dag.event.TaskAttemptEvent;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptFailed;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely;
-import org.apache.tez.dag.app.dag.event.TaskAttemptEventTerminationCauseEvent;
 import org.apache.tez.dag.app.dag.event.TaskAttemptEventType;
 import org.apache.tez.dag.app.dag.event.TaskEvent;
 import org.apache.tez.dag.app.dag.event.TaskEventScheduleTask;
@@ -189,7 +188,6 @@ import org.apache.tez.runtime.api.events.InputDataInformationEvent;
 import org.apache.tez.runtime.api.events.InputFailedEvent;
 import org.apache.tez.runtime.api.events.InputInitializerEvent;
 import org.apache.tez.runtime.api.events.InputUpdatePayloadEvent;
-import org.apache.tez.runtime.api.events.TaskAttemptFailedEvent;
 import org.apache.tez.runtime.api.events.VertexManagerEvent;
 import org.apache.tez.test.EdgeManagerForTest;
 import org.apache.tez.test.VertexManagerPluginForTest;
@@ -206,7 +204,6 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
-import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 import org.mockito.internal.util.collections.Sets;
 
@@ -2515,10 +2512,10 @@ public class TestVertexImpl {
 
   private void killVertex(VertexImpl v) {
     dispatcher.getEventHandler().handle(
-        new VertexEventTermination(v.getVertexId(), VertexTerminationCause.DAG_KILL));
+        new VertexEventTermination(v.getVertexId(), VertexTerminationCause.DAG_TERMINATED));
     dispatcher.await();
     Assert.assertEquals(VertexState.KILLED, v.getState());
-    Assert.assertEquals(v.getTerminationCause(), VertexTerminationCause.DAG_KILL);
+    Assert.assertEquals(v.getTerminationCause(), VertexTerminationCause.DAG_TERMINATED);
   }
 
   private void startVertex(VertexImpl v,
@@ -3322,7 +3319,7 @@ public class TestVertexImpl {
         StringUtils.join(v3.getDiagnostics(), ",").toLowerCase(Locale.ENGLISH);
     assertTrue(diagnostics.contains(
         "vertex received kill while in running state"));
-    Assert.assertEquals(VertexTerminationCause.DAG_KILL, v3.getTerminationCause());
+    Assert.assertEquals(VertexTerminationCause.DAG_TERMINATED, v3.getTerminationCause());
     assertTrue(diagnostics.contains(v3.getTerminationCause().name().toLowerCase(Locale.ENGLISH)));
   }
 
@@ -3334,7 +3331,7 @@ public class TestVertexImpl {
     startVertex(v);
 
     dispatcher.getEventHandler().handle(
-        new VertexEventTermination(v.getVertexId(), VertexTerminationCause.DAG_KILL));
+        new VertexEventTermination(v.getVertexId(), VertexTerminationCause.DAG_TERMINATED));
     dispatcher.await();
     Assert.assertEquals(VertexState.KILLED, v.getState());
 
@@ -3359,7 +3356,7 @@ public class TestVertexImpl {
     startVertex(v);
 
     dispatcher.getEventHandler().handle(
-        new VertexEventTermination(v.getVertexId(), VertexTerminationCause.DAG_KILL));
+        new VertexEventTermination(v.getVertexId(), VertexTerminationCause.DAG_TERMINATED));
     dispatcher.await();
     Assert.assertEquals(VertexState.KILLED, v.getState());
 

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java
index 1f75afb..b3568eb 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java
@@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
@@ -52,15 +53,21 @@ import org.apache.tez.dag.api.TezException;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.TaskCommunicatorManagerInterface;
+import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
 import org.apache.tez.dag.app.rm.ContainerLauncherLaunchRequestEvent;
 import org.apache.tez.dag.app.rm.ContainerLauncherStopRequestEvent;
+import org.apache.tez.dag.helpers.DagInfoImplForTest;
+import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.serviceplugins.api.ContainerLaunchRequest;
 import org.apache.tez.serviceplugins.api.ContainerLauncher;
 import org.apache.tez.serviceplugins.api.ContainerLauncherContext;
 import org.apache.tez.serviceplugins.api.ContainerStopRequest;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
 import org.apache.tez.serviceplugins.api.ServicePluginException;
+import org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -244,6 +251,75 @@ public class TestContainerLauncherManager {
 
   @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
+  public void testReportFailureFromContainerLauncher() throws ServicePluginException, TezException {
+    final String dagName = DAG_NAME;
+    final int dagIndex = DAG_INDEX;
+    TezDAGID dagId = TezDAGID.getInstance(ApplicationId.newInstance(0, 0), dagIndex);
+    DAG dag = mock(DAG.class);
+    doReturn(dagName).when(dag).getName();
+    doReturn(dagId).when(dag).getID();
+    EventHandler eventHandler = mock(EventHandler.class);
+    AppContext appContext = mock(AppContext.class);
+    doReturn(eventHandler).when(appContext).getEventHandler();
+    doReturn(dag).when(appContext).getCurrentDAG();
+    doReturn("testlauncher").when(appContext).getContainerLauncherName(0);
+
+    NamedEntityDescriptor<TaskCommunicatorDescriptor> taskCommDescriptor =
+        new NamedEntityDescriptor<>("testlauncher", ContainerLauncherForTest.class.getName());
+    List<NamedEntityDescriptor> list = new LinkedList<>();
+    list.add(taskCommDescriptor);
+    ContainerLauncherManager containerLauncherManager =
+        new ContainerLauncherManager(appContext, mock(TaskCommunicatorManagerInterface.class), "",
+            list, false);
+
+    try {
+      ContainerLaunchContext clc1 = mock(ContainerLaunchContext.class);
+      Container container1 = mock(Container.class);
+      ContainerLauncherLaunchRequestEvent launchRequestEvent =
+          new ContainerLauncherLaunchRequestEvent(clc1, container1, 0, 0, 0);
+
+
+      containerLauncherManager.handle(launchRequestEvent);
+
+      ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
+      verify(eventHandler, times(1)).handle(argumentCaptor.capture());
+
+      Event rawEvent = argumentCaptor.getValue();
+      assertTrue(rawEvent instanceof DAGAppMasterEventUserServiceFatalError);
+      DAGAppMasterEventUserServiceFatalError event =
+          (DAGAppMasterEventUserServiceFatalError) rawEvent;
+      assertEquals(DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR, event.getType());
+      assertTrue(event.getDiagnosticInfo().contains("ReportedFatalError"));
+      assertTrue(
+          event.getDiagnosticInfo().contains(ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
+      assertTrue(event.getDiagnosticInfo().contains("[0:testlauncher]"));
+
+      reset(eventHandler);
+      // stop container
+
+      ContainerId containerId2 = mock(ContainerId.class);
+      NodeId nodeId2 = mock(NodeId.class);
+      ContainerLauncherStopRequestEvent stopRequestEvent =
+          new ContainerLauncherStopRequestEvent(containerId2, nodeId2, null, 0, 0, 0);
+
+      argumentCaptor = ArgumentCaptor.forClass(Event.class);
+
+      containerLauncherManager.handle(stopRequestEvent);
+      verify(eventHandler, times(1)).handle(argumentCaptor.capture());
+      rawEvent = argumentCaptor.getValue();
+      assertTrue(rawEvent instanceof DAGEventTerminateDag);
+      DAGEventTerminateDag killEvent = (DAGEventTerminateDag) rawEvent;
+      assertTrue(killEvent.getDiagnosticInfo().contains("ReportError"));
+      assertTrue(killEvent.getDiagnosticInfo()
+          .contains(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE.name()));
+      assertTrue(killEvent.getDiagnosticInfo().contains("[0:testlauncher]"));
+    } finally {
+      containerLauncherManager.stop();
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
   public void testContainerLauncherUserError() throws ServicePluginException {
 
     ContainerLauncher containerLauncher = mock(ContainerLauncher.class);
@@ -256,7 +332,8 @@ public class TestContainerLauncherManager {
     Configuration conf = new Configuration(false);
 
     ContainerLauncherManager containerLauncherManager =
-        new ContainerLauncherManager(containerLauncher, appContext);
+        new ContainerLauncherManager(appContext);
+    containerLauncherManager.setContainerLauncher(containerLauncher);
     try {
       containerLauncherManager.init(conf);
       containerLauncherManager.start();
@@ -437,4 +514,26 @@ public class TestContainerLauncherManager {
     }
   }
 
+  private static final String DAG_NAME = "dagName";
+  private static final int DAG_INDEX = 1;
+  public static class ContainerLauncherForTest extends ContainerLauncher {
+
+    public ContainerLauncherForTest(
+        ContainerLauncherContext containerLauncherContext) {
+      super(containerLauncherContext);
+    }
+
+    @Override
+    public void launchContainer(ContainerLaunchRequest launchRequest) throws
+        ServicePluginException {
+      getContext().reportError(ServicePluginErrorDefaults.INCONSISTENT_STATE, "ReportedFatalError", null);
+    }
+
+    @Override
+    public void stopContainer(ContainerStopRequest stopRequest) throws ServicePluginException {
+      getContext()
+          .reportError(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE, "ReportError", new DagInfoImplForTest(DAG_INDEX, DAG_NAME));
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
index f69d8be..a3e5ff5 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java
@@ -33,6 +33,7 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -49,6 +50,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.io.IOExceptionWithCause;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
@@ -74,6 +76,7 @@ import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerContextDr
 import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerWithDrainableContext;
 import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AlwaysMatchesContainerMatcher;
 import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.PreemptionMatcher;
+import org.apache.tez.serviceplugins.api.DagInfo;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext.AppFinalStatus;
 import org.junit.After;
@@ -503,10 +506,14 @@ public class TestTaskScheduler {
     drainableAppCallback.drain();
     verify(mockApp).nodesUpdated(mockUpdatedNodes);
 
-    Exception mockException = mock(Exception.class);
+    ArgumentCaptor<String> argumentCaptor = ArgumentCaptor.forClass(String.class);
+    Exception mockException = new IOException("mockexception");
     scheduler.onError(mockException);
     drainableAppCallback.drain();
-    verify(mockApp).onError(mockException);
+    verify(mockApp)
+        .reportError(eq(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR), argumentCaptor.capture(),
+            any(DagInfo.class));
+    assertTrue(argumentCaptor.getValue().contains("mockexception"));
 
     scheduler.onShutdownRequest();
     drainableAppCallback.drain();
@@ -1220,10 +1227,14 @@ public class TestTaskScheduler {
     drainableAppCallback.drain();
     verify(mockApp).nodesUpdated(mockUpdatedNodes);
 
-    Exception mockException = mock(Exception.class);
+
+    ArgumentCaptor<String> argumentCaptor = ArgumentCaptor.forClass(String.class);
+    Exception mockException = new IOException("mockexception");
     scheduler.onError(mockException);
     drainableAppCallback.drain();
-    verify(mockApp).onError(mockException);
+    verify(mockApp).reportError(eq(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR), argumentCaptor.capture(),
+            any(DagInfo.class));
+    assertTrue(argumentCaptor.getValue().contains("mockexception"));
 
     scheduler.onShutdownRequest();
     drainableAppCallback.drain();

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
index b54d024..ab85751 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java
@@ -24,6 +24,8 @@ import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Collection;
@@ -68,6 +70,8 @@ import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ServicePluginLifecycleAbstractService;
 import org.apache.tez.dag.app.rm.YarnTaskSchedulerService.CookieContainerRequest;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskScheduler;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
 
@@ -283,9 +287,10 @@ class TestTaskSchedulerHelpers {
     }
 
     @Override
-    public void onError(Throwable t) {
+    public void reportError(@Nonnull ServicePluginError servicePluginError, String message,
+                            DagInfo dagInfo) {
       invocations++;
-      real.onError(t);
+      real.reportError(servicePluginError, message, dagInfo);
     }
 
     @Override
@@ -327,6 +332,12 @@ class TestTaskSchedulerHelpers {
       return real.getApplicationAttemptId();
     }
 
+    @Nullable
+    @Override
+    public DagInfo getCurrentDagInfo() {
+      return real.getCurrentDagInfo();
+    }
+
     @Override
     public String getAppHostName() {
       return real.getAppHostName();

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java
index 4d828e2..791bb7f 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java
@@ -28,11 +28,13 @@ import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.reset;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 import java.lang.reflect.Method;
 import java.net.InetSocketAddress;
@@ -71,9 +73,11 @@ import org.apache.tez.dag.api.client.DAGClientServer;
 import org.apache.tez.dag.app.AppContext;
 import org.apache.tez.dag.app.ContainerContext;
 import org.apache.tez.dag.app.ServicePluginLifecycleAbstractService;
+import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.dag.TaskAttempt;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
 import org.apache.tez.dag.app.dag.impl.TaskAttemptImpl;
 import org.apache.tez.dag.app.dag.impl.TaskImpl;
 import org.apache.tez.dag.app.dag.impl.VertexImpl;
@@ -84,16 +88,19 @@ import org.apache.tez.dag.app.rm.container.AMContainerEventType;
 import org.apache.tez.dag.app.rm.container.AMContainerMap;
 import org.apache.tez.dag.app.rm.container.AMContainerState;
 import org.apache.tez.dag.app.web.WebUIService;
+import org.apache.tez.dag.helpers.DagInfoImplForTest;
 import org.apache.tez.dag.records.TaskAttemptTerminationCause;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
 import org.apache.tez.dag.records.TezVertexID;
 import org.apache.tez.runtime.api.impl.TaskSpec;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
 import org.apache.tez.serviceplugins.api.ServicePluginException;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 import org.apache.tez.serviceplugins.api.TaskScheduler;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
+import org.apache.tez.serviceplugins.api.TaskSchedulerDescriptor;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
@@ -539,6 +546,81 @@ public class TestTaskSchedulerManager {
 
   @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
+  public void testReportFailureFromTaskScheduler() {
+    String dagName = DAG_NAME;
+    Configuration conf = new TezConfiguration();
+    String taskSchedulerName = "testTaskScheduler";
+    String expIdentifier = "[0:" + taskSchedulerName + "]";
+    EventHandler eventHandler = mock(EventHandler.class);
+    AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS);
+    doReturn(taskSchedulerName).when(appContext).getTaskSchedulerName(0);
+    doReturn(eventHandler).when(appContext).getEventHandler();
+    doReturn(conf).when(appContext).getAMConf();
+    InetSocketAddress address = new InetSocketAddress("host", 55000);
+
+    DAGClientServer dagClientServer = mock(DAGClientServer.class);
+    doReturn(address).when(dagClientServer).getBindAddress();
+
+    DAG dag = mock(DAG.class);
+    TezDAGID dagId = TezDAGID.getInstance(ApplicationId.newInstance(1, 0), DAG_INDEX);
+    doReturn(dagName).when(dag).getName();
+    doReturn(dagId).when(dag).getID();
+    doReturn(dag).when(appContext).getCurrentDAG();
+
+    NamedEntityDescriptor<TaskSchedulerDescriptor> namedEntityDescriptor =
+        new NamedEntityDescriptor<>(taskSchedulerName, TaskSchedulerForFailureTest.class.getName());
+    List<NamedEntityDescriptor> list = new LinkedList<>();
+    list.add(namedEntityDescriptor);
+
+    TaskSchedulerManager taskSchedulerManager =
+        new TaskSchedulerManager(appContext, dagClientServer, eventHandler,
+            mock(ContainerSignatureMatcher.class), mock(WebUIService.class), list, false) {
+          @Override
+          TaskSchedulerContext wrapTaskSchedulerContext(TaskSchedulerContext rawContext) {
+            // Avoid wrapping in threads
+            return rawContext;
+          }
+        };
+    try {
+      taskSchedulerManager.init(new TezConfiguration());
+      taskSchedulerManager.start();
+
+      taskSchedulerManager.getTotalResources(0);
+      ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
+      verify(eventHandler, times(1)).handle(argumentCaptor.capture());
+
+      Event rawEvent = argumentCaptor.getValue();
+      assertTrue(rawEvent instanceof DAGEventTerminateDag);
+      DAGEventTerminateDag killEvent = (DAGEventTerminateDag) rawEvent;
+      assertTrue(killEvent.getDiagnosticInfo().contains("ReportError"));
+      assertTrue(killEvent.getDiagnosticInfo()
+          .contains(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE.name()));
+      assertTrue(killEvent.getDiagnosticInfo().contains(expIdentifier));
+
+
+      reset(eventHandler);
+      taskSchedulerManager.getAvailableResources(0);
+      argumentCaptor = ArgumentCaptor.forClass(Event.class);
+
+      verify(eventHandler, times(1)).handle(argumentCaptor.capture());
+      rawEvent = argumentCaptor.getValue();
+
+      assertTrue(rawEvent instanceof DAGAppMasterEventUserServiceFatalError);
+      DAGAppMasterEventUserServiceFatalError event =
+          (DAGAppMasterEventUserServiceFatalError) rawEvent;
+      assertEquals(DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, event.getType());
+      assertTrue(event.getDiagnosticInfo().contains("ReportedFatalError"));
+      assertTrue(
+          event.getDiagnosticInfo().contains(ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
+      assertTrue(event.getDiagnosticInfo().contains(expIdentifier));
+
+    } finally {
+      taskSchedulerManager.stop();
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
   public void testTaskSchedulerUserError() {
     TaskScheduler taskScheduler = mock(TaskScheduler.class, new ExceptionAnswer());
 
@@ -798,4 +880,83 @@ public class TestTaskSchedulerManager {
       return false;
     }
   }
+
+  private static final String DAG_NAME = "dagName";
+  private static final int DAG_INDEX = 1;
+  public static class TaskSchedulerForFailureTest extends TaskScheduler {
+
+    public TaskSchedulerForFailureTest(TaskSchedulerContext taskSchedulerContext) {
+      super(taskSchedulerContext);
+    }
+
+    @Override
+    public Resource getAvailableResources() throws ServicePluginException {
+      getContext().reportError(ServicePluginErrorDefaults.INCONSISTENT_STATE, "ReportedFatalError", null);
+      return Resource.newInstance(1024, 1);
+    }
+
+    @Override
+    public Resource getTotalResources() throws ServicePluginException {
+      getContext()
+          .reportError(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE, "ReportError", new DagInfoImplForTest(DAG_INDEX, DAG_NAME));
+      return Resource.newInstance(1024, 1);
+    }
+
+    @Override
+    public int getClusterNodeCount() throws ServicePluginException {
+      return 0;
+    }
+
+    @Override
+    public void blacklistNode(NodeId nodeId) throws ServicePluginException {
+
+    }
+
+    @Override
+    public void unblacklistNode(NodeId nodeId) throws ServicePluginException {
+
+    }
+
+    @Override
+    public void allocateTask(Object task, Resource capability, String[] hosts, String[] racks,
+                             Priority priority, Object containerSignature,
+                             Object clientCookie) throws
+        ServicePluginException {
+
+    }
+
+    @Override
+    public void allocateTask(Object task, Resource capability, ContainerId containerId,
+                             Priority priority, Object containerSignature,
+                             Object clientCookie) throws
+        ServicePluginException {
+
+    }
+
+    @Override
+    public boolean deallocateTask(Object task, boolean taskSucceeded,
+                                  TaskAttemptEndReason endReason,
+                                  @Nullable String diagnostics) throws ServicePluginException {
+      return false;
+    }
+
+    @Override
+    public Object deallocateContainer(ContainerId containerId) throws ServicePluginException {
+      return null;
+    }
+
+    @Override
+    public void setShouldUnregister() throws ServicePluginException {
+
+    }
+
+    @Override
+    public boolean hasUnregistered() throws ServicePluginException {
+      return false;
+    }
+
+    @Override
+    public void dagComplete() throws ServicePluginException {
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java b/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java
new file mode 100644
index 0000000..f92513f
--- /dev/null
+++ b/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.helpers;
+
+import org.apache.tez.serviceplugins.api.DagInfo;
+
+public class DagInfoImplForTest implements DagInfo {
+
+  private final int index;
+  private final String name;
+
+  public DagInfoImplForTest(int index, String name) {
+    this.index = index;
+    this.name = name;
+  }
+
+  @Override
+  public int getIndex() {
+    return index;
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/ErrorPluginConfiguration.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/ErrorPluginConfiguration.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/ErrorPluginConfiguration.java
new file mode 100644
index 0000000..32d1fb6
--- /dev/null
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/ErrorPluginConfiguration.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+
+import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.serviceplugins.api.ServicePluginContextBase;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
+
+public class ErrorPluginConfiguration {
+
+  public static final String REPORT_FATAL_ERROR_MESSAGE = "ReportedFatalError";
+  public static final String REPORT_NONFATAL_ERROR_MESSAGE = "ReportedError";
+  public static final String THROW_ERROR_EXCEPTION_STRING = "Simulated Error";
+
+  private static final String CONF_THROW_ERROR = "throw.error";
+  private static final String CONF_REPORT_ERROR = "report.error";
+  private static final String CONF_REPORT_ERROR_FATAL = "report.error.fatal";
+  private static final String CONF_REPORT_ERROR_DAG_NAME = "report.error.dag.name";
+
+  private final HashMap<String, String> kv;
+
+  private ErrorPluginConfiguration() {
+    this.kv = new HashMap<>();
+  }
+
+  private ErrorPluginConfiguration(HashMap<String, String> map) {
+    this.kv = map;
+  }
+
+  public static ErrorPluginConfiguration createThrowErrorConf() {
+    ErrorPluginConfiguration conf = new ErrorPluginConfiguration();
+    conf.kv.put(CONF_THROW_ERROR, String.valueOf(true));
+    return conf;
+  }
+
+  public static ErrorPluginConfiguration createReportFatalErrorConf(String dagName) {
+    ErrorPluginConfiguration conf = new ErrorPluginConfiguration();
+    conf.kv.put(CONF_REPORT_ERROR, String.valueOf(true));
+    conf.kv.put(CONF_REPORT_ERROR_FATAL, String.valueOf(true));
+    conf.kv.put(CONF_REPORT_ERROR_DAG_NAME, dagName);
+    return conf;
+  }
+
+  public static ErrorPluginConfiguration createReportNonFatalErrorConf(String dagName) {
+    ErrorPluginConfiguration conf = new ErrorPluginConfiguration();
+    conf.kv.put(CONF_REPORT_ERROR, String.valueOf(true));
+    conf.kv.put(CONF_REPORT_ERROR_FATAL, String.valueOf(false));
+    conf.kv.put(CONF_REPORT_ERROR_DAG_NAME, dagName);
+    return conf;
+  }
+
+  public static UserPayload toUserPayload(ErrorPluginConfiguration conf) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ObjectOutputStream oos = new ObjectOutputStream(baos);
+    oos.writeObject(conf.kv);
+    oos.close();
+    UserPayload userPayload = UserPayload.create(ByteBuffer.wrap(baos.toByteArray()));
+    return userPayload;
+  }
+
+  @SuppressWarnings("unchecked")
+  public static ErrorPluginConfiguration toErrorPluginConfiguration(UserPayload userPayload) throws
+      IOException, ClassNotFoundException {
+
+    byte[] b = new byte[userPayload.getPayload().remaining()];
+    userPayload.getPayload().get(b);
+    ByteArrayInputStream bais = new ByteArrayInputStream(b);
+    ObjectInputStream ois = new ObjectInputStream(bais);
+
+    HashMap<String, String> map = (HashMap) ois.readObject();
+    ErrorPluginConfiguration conf = new ErrorPluginConfiguration(map);
+    return conf;
+  }
+
+  public boolean shouldThrowError() {
+    return (kv.containsKey(CONF_THROW_ERROR) && Boolean.parseBoolean(kv.get(CONF_THROW_ERROR)));
+  }
+
+  public boolean shouldReportFatalError(String dagName) {
+    if (kv.containsKey(CONF_REPORT_ERROR) && Boolean.parseBoolean(kv.get(CONF_REPORT_ERROR)) &&
+        Boolean.parseBoolean(kv.get(CONF_REPORT_ERROR_FATAL))) {
+      if (dagName == null || dagName.isEmpty() || kv.get(CONF_REPORT_ERROR_DAG_NAME).equals("*") ||
+          kv.get(CONF_REPORT_ERROR_DAG_NAME).equals(dagName)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public boolean shouldReportNonFatalError(String dagName) {
+    if (kv.containsKey(CONF_REPORT_ERROR) && Boolean.parseBoolean(kv.get(CONF_REPORT_ERROR)) &&
+        Boolean.parseBoolean(kv.get(CONF_REPORT_ERROR_FATAL)) == false) {
+      if (dagName == null || dagName.isEmpty() || kv.get(CONF_REPORT_ERROR_DAG_NAME).equals("*") ||
+          kv.get(CONF_REPORT_ERROR_DAG_NAME).equals(dagName)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public static void processError(ErrorPluginConfiguration conf, ServicePluginContextBase context) {
+    if (conf.shouldThrowError()) {
+      throw new RuntimeException(ErrorPluginConfiguration.THROW_ERROR_EXCEPTION_STRING);
+    } else if (conf.shouldReportFatalError(null)) {
+      context.reportError(ServicePluginErrorDefaults.INCONSISTENT_STATE,
+          ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
+          context.getCurrentDagInfo());
+    } else if (conf.shouldReportNonFatalError(null)) {
+      context.reportError(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE,
+          ErrorPluginConfiguration.REPORT_NONFATAL_ERROR_MESSAGE,
+          context.getCurrentDagInfo());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncherWithErrors.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncherWithErrors.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncherWithErrors.java
index d489cca..b4ea176 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncherWithErrors.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncherWithErrors.java
@@ -14,24 +14,33 @@
 
 package org.apache.tez.dag.app.launcher;
 
+import java.io.IOException;
+
+import org.apache.tez.dag.app.ErrorPluginConfiguration;
 import org.apache.tez.serviceplugins.api.ContainerLaunchRequest;
 import org.apache.tez.serviceplugins.api.ContainerLauncher;
 import org.apache.tez.serviceplugins.api.ContainerLauncherContext;
 import org.apache.tez.serviceplugins.api.ContainerStopRequest;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
 
 public class TezTestServiceContainerLauncherWithErrors extends ContainerLauncher {
+
+  private final ErrorPluginConfiguration conf;
+
   public TezTestServiceContainerLauncherWithErrors(
-      ContainerLauncherContext containerLauncherContext) {
+      ContainerLauncherContext containerLauncherContext) throws IOException,
+      ClassNotFoundException {
     super(containerLauncherContext);
+    conf = ErrorPluginConfiguration.toErrorPluginConfiguration(containerLauncherContext.getInitialUserPayload());
   }
 
   @Override
   public void launchContainer(ContainerLaunchRequest launchRequest) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public void stopContainer(ContainerStopRequest stopRequest) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java
index 1705eac..13d4815 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java
@@ -16,18 +16,25 @@ package org.apache.tez.dag.app.rm;
 
 import javax.annotation.Nullable;
 
+import java.io.IOException;
+
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.tez.dag.app.ErrorPluginConfiguration;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 import org.apache.tez.serviceplugins.api.TaskScheduler;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
 
 public class TezTestServiceTaskSchedulerServiceWithErrors extends TaskScheduler {
+
+  private final ErrorPluginConfiguration conf;
+
   public TezTestServiceTaskSchedulerServiceWithErrors(
-      TaskSchedulerContext taskSchedulerContext) {
+      TaskSchedulerContext taskSchedulerContext) throws IOException, ClassNotFoundException {
     super(taskSchedulerContext);
+    conf = ErrorPluginConfiguration.toErrorPluginConfiguration(taskSchedulerContext.getInitialUserPayload());
   }
 
   @Override
@@ -47,35 +54,37 @@ public class TezTestServiceTaskSchedulerServiceWithErrors extends TaskScheduler
 
   @Override
   public void blacklistNode(NodeId nodeId) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public void unblacklistNode(NodeId nodeId) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public void allocateTask(Object task, Resource capability, String[] hosts, String[] racks,
                            Priority priority, Object containerSignature, Object clientCookie) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public void allocateTask(Object task, Resource capability, ContainerId containerId,
                            Priority priority, Object containerSignature, Object clientCookie) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason,
                                 @Nullable String diagnostics) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
+    return true;
   }
 
   @Override
   public Object deallocateContainer(ContainerId containerId) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
+    return null;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorWithErrors.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorWithErrors.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorWithErrors.java
index 90313d4..8221957 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorWithErrors.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorWithErrors.java
@@ -15,6 +15,7 @@
 package org.apache.tez.dag.app.taskcomm;
 
 import javax.annotation.Nullable;
+import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Map;
 
@@ -22,6 +23,8 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.tez.dag.app.ErrorPluginConfiguration;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
 import org.apache.tez.serviceplugins.api.TaskCommunicator;
 import org.apache.tez.serviceplugins.api.TaskCommunicatorContext;
 import org.apache.tez.dag.api.event.VertexStateUpdate;
@@ -31,20 +34,24 @@ import org.apache.tez.serviceplugins.api.ContainerEndReason;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 
 public class TezTestServiceTaskCommunicatorWithErrors extends TaskCommunicator {
+
+  private final ErrorPluginConfiguration conf;
+
   public TezTestServiceTaskCommunicatorWithErrors(
-      TaskCommunicatorContext taskCommunicatorContext) {
+      TaskCommunicatorContext taskCommunicatorContext) throws IOException, ClassNotFoundException {
     super(taskCommunicatorContext);
+    conf = ErrorPluginConfiguration.toErrorPluginConfiguration(taskCommunicatorContext.getInitialUserPayload());
   }
 
   @Override
   public void registerRunningContainer(ContainerId containerId, String hostname, int port) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public void registerContainerEnd(ContainerId containerId, ContainerEndReason endReason,
                                    @Nullable String diagnostics) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
@@ -52,14 +59,14 @@ public class TezTestServiceTaskCommunicatorWithErrors extends TaskCommunicator {
                                          Map<String, LocalResource> additionalResources,
                                          Credentials credentials, boolean credentialsChanged,
                                          int priority) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
   public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID,
                                            TaskAttemptEndReason endReason,
                                            @Nullable String diagnostics) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
@@ -69,7 +76,7 @@ public class TezTestServiceTaskCommunicatorWithErrors extends TaskCommunicator {
 
   @Override
   public void onVertexStateUpdated(VertexStateUpdate stateUpdate) {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
   }
 
   @Override
@@ -78,6 +85,7 @@ public class TezTestServiceTaskCommunicatorWithErrors extends TaskCommunicator {
 
   @Override
   public Object getMetaInfo() {
-    throw new RuntimeException("Simulated Error");
+    ErrorPluginConfiguration.processError(conf, getContext());
+    return null;
   }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServicesErrors.java
----------------------------------------------------------------------
diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServicesErrors.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServicesErrors.java
index bfd3ed2..ac6ebde 100644
--- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServicesErrors.java
+++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServicesErrors.java
@@ -19,7 +19,9 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.util.EnumSet;
+import java.util.List;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -40,6 +42,7 @@ import org.apache.tez.dag.api.Vertex;
 import org.apache.tez.dag.api.client.DAGClient;
 import org.apache.tez.dag.api.client.DAGStatus;
 import org.apache.tez.dag.api.client.StatusGetOpts;
+import org.apache.tez.dag.app.ErrorPluginConfiguration;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType;
 import org.apache.tez.dag.app.launcher.TezTestServiceContainerLauncherWithErrors;
 import org.apache.tez.dag.app.launcher.TezTestServiceNoOpContainerLauncher;
@@ -49,6 +52,7 @@ import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorImpl;
 import org.apache.tez.dag.app.taskcomm.TezTestServiceTaskCommunicatorWithErrors;
 import org.apache.tez.examples.JoinValidateConfigured;
 import org.apache.tez.serviceplugins.api.ContainerLauncherDescriptor;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
 import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor;
 import org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor;
 import org.apache.tez.serviceplugins.api.TaskSchedulerDescriptor;
@@ -63,7 +67,13 @@ public class TestExternalTezServicesErrors {
   private static final Logger LOG = LoggerFactory.getLogger(TestExternalTezServicesErrors.class);
 
   private static final String EXT_PUSH_ENTITY_NAME = "ExtServiceTestPush";
-  private static final String EXT_FAIL_ENTITY_NAME = "ExtServiceTestFail";
+  private static final String EXT_THROW_ERROR_ENTITY_NAME = "ExtServiceTestThrowErrors";
+  private static final String EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME = "ExtServiceTestReportNonFatalErrors";
+  private static final String EXT_REPORT_FATAL_ERROR_ENTITY_NAME = "ExtServiceTestReportFatalErrors";
+
+  private static final String SUFFIX_LAUNCHER = "ContainerLauncher";
+  private static final String SUFFIX_TASKCOMM = "TaskCommunicator";
+  private static final String SUFFIX_SCHEDULER = "TaskScheduler";
 
   private static ExternalTezServiceTestHelper extServiceTestHelper;
 
@@ -76,12 +86,32 @@ public class TestExternalTezServicesErrors {
   private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_EXT_SERVICE_PUSH =
       Vertex.VertexExecutionContext.create(
           EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
-  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_FAIL =
-      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_FAIL_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
-  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_FAIL =
-      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_FAIL_ENTITY_NAME);
-  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_FAIL =
-      Vertex.VertexExecutionContext.create(EXT_FAIL_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
+  // Throw error contexts
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_THROW =
+      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_THROW_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_THROW =
+      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME,
+          EXT_THROW_ERROR_ENTITY_NAME);
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_THROW =
+      Vertex.VertexExecutionContext.create(EXT_THROW_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
+
+  // Report-non-fatal contexts
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_REPORT_NON_FATAL =
+      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_REPORT_NON_FATAL =
+      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME,
+          EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME);
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_REPORT_NON_FATAL =
+      Vertex.VertexExecutionContext.create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
+
+  // Report fatal contexts
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_LAUNCHER_REPORT_FATAL =
+      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_REPORT_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_TASKCOMM_REPORT_FATAL =
+      Vertex.VertexExecutionContext.create(EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME,
+          EXT_REPORT_FATAL_ERROR_ENTITY_NAME);
+  private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_SCHEDULER_REPORT_FATAL =
+      Vertex.VertexExecutionContext.create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME, EXT_PUSH_ENTITY_NAME, EXT_PUSH_ENTITY_NAME);
 
 
   private static final Vertex.VertexExecutionContext EXECUTION_CONTEXT_DEFAULT = EXECUTION_CONTEXT_EXT_SERVICE_PUSH;
@@ -93,29 +123,63 @@ public class TestExternalTezServicesErrors {
   public static void setup() throws Exception {
 
     extServiceTestHelper = new ExternalTezServiceTestHelper(TEST_ROOT_DIR);
-    UserPayload userPayload = TezUtils.createUserPayloadFromConf(extServiceTestHelper.getConfForJobs());
+    UserPayload userPayload =
+        TezUtils.createUserPayloadFromConf(extServiceTestHelper.getConfForJobs());
+    UserPayload userPayloadThrowError =
+        ErrorPluginConfiguration.toUserPayload(ErrorPluginConfiguration.createThrowErrorConf());
+
+    UserPayload userPayloadReportFatalErrorLauncher = ErrorPluginConfiguration
+        .toUserPayload(ErrorPluginConfiguration.createReportFatalErrorConf(SUFFIX_LAUNCHER));
+    UserPayload userPayloadReportFatalErrorTaskComm = ErrorPluginConfiguration
+        .toUserPayload(ErrorPluginConfiguration.createReportFatalErrorConf(SUFFIX_TASKCOMM));
+    UserPayload userPayloadReportFatalErrorScheduler = ErrorPluginConfiguration
+        .toUserPayload(ErrorPluginConfiguration.createReportFatalErrorConf(SUFFIX_SCHEDULER));
+
+    UserPayload userPayloadReportNonFatalErrorLauncher = ErrorPluginConfiguration
+        .toUserPayload(ErrorPluginConfiguration.createReportNonFatalErrorConf(SUFFIX_LAUNCHER));
+    UserPayload userPayloadReportNonFatalErrorTaskComm = ErrorPluginConfiguration
+        .toUserPayload(ErrorPluginConfiguration.createReportNonFatalErrorConf(SUFFIX_TASKCOMM));
+    UserPayload userPayloadReportNonFatalErrorScheduler = ErrorPluginConfiguration
+        .toUserPayload(ErrorPluginConfiguration.createReportNonFatalErrorConf(SUFFIX_SCHEDULER));
 
     TaskSchedulerDescriptor[] taskSchedulerDescriptors = new TaskSchedulerDescriptor[]{
         TaskSchedulerDescriptor
             .create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskSchedulerService.class.getName())
             .setUserPayload(userPayload),
-        TaskSchedulerDescriptor.create(EXT_FAIL_ENTITY_NAME,
+        TaskSchedulerDescriptor.create(EXT_THROW_ERROR_ENTITY_NAME,
+            TezTestServiceTaskSchedulerServiceWithErrors.class.getName()).setUserPayload(
+            userPayloadThrowError),
+        TaskSchedulerDescriptor.create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME,
+            TezTestServiceTaskSchedulerServiceWithErrors.class.getName()).setUserPayload(
+            userPayloadReportFatalErrorScheduler),
+        TaskSchedulerDescriptor.create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME,
             TezTestServiceTaskSchedulerServiceWithErrors.class.getName()).setUserPayload(
-            userPayload)};
+            userPayloadReportNonFatalErrorScheduler),
+    };
 
     ContainerLauncherDescriptor[] containerLauncherDescriptors = new ContainerLauncherDescriptor[]{
         ContainerLauncherDescriptor
             .create(EXT_PUSH_ENTITY_NAME, TezTestServiceNoOpContainerLauncher.class.getName())
             .setUserPayload(userPayload),
-        ContainerLauncherDescriptor.create(EXT_FAIL_ENTITY_NAME,
-            TezTestServiceContainerLauncherWithErrors.class.getName()).setUserPayload(userPayload)};
+        ContainerLauncherDescriptor.create(EXT_THROW_ERROR_ENTITY_NAME,
+            TezTestServiceContainerLauncherWithErrors.class.getName()).setUserPayload(userPayloadThrowError),
+        ContainerLauncherDescriptor.create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME,
+            TezTestServiceContainerLauncherWithErrors.class.getName()).setUserPayload(userPayloadReportFatalErrorLauncher),
+        ContainerLauncherDescriptor.create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME,
+            TezTestServiceContainerLauncherWithErrors.class.getName()).setUserPayload(userPayloadReportNonFatalErrorLauncher)
+    };
 
     TaskCommunicatorDescriptor[] taskCommunicatorDescriptors = new TaskCommunicatorDescriptor[]{
         TaskCommunicatorDescriptor
             .create(EXT_PUSH_ENTITY_NAME, TezTestServiceTaskCommunicatorImpl.class.getName())
             .setUserPayload(userPayload),
-        TaskCommunicatorDescriptor.create(EXT_FAIL_ENTITY_NAME,
-            TezTestServiceTaskCommunicatorWithErrors.class.getName()).setUserPayload(userPayload)};
+        TaskCommunicatorDescriptor.create(EXT_THROW_ERROR_ENTITY_NAME,
+            TezTestServiceTaskCommunicatorWithErrors.class.getName()).setUserPayload(userPayloadThrowError),
+        TaskCommunicatorDescriptor.create(EXT_REPORT_FATAL_ERROR_ENTITY_NAME,
+            TezTestServiceTaskCommunicatorWithErrors.class.getName()).setUserPayload(userPayloadReportFatalErrorTaskComm),
+        TaskCommunicatorDescriptor.create(EXT_REPORT_NON_FATAL_ERROR_ENTITY_NAME,
+            TezTestServiceTaskCommunicatorWithErrors.class.getName()).setUserPayload(userPayloadReportNonFatalErrorTaskComm)
+    };
 
     servicePluginsDescriptor = ServicePluginsDescriptor.create(true, true,
         taskSchedulerDescriptors, containerLauncherDescriptors, taskCommunicatorDescriptors);
@@ -137,35 +201,86 @@ public class TestExternalTezServicesErrors {
     extServiceTestHelper.tearDownAll();
   }
 
-  @Test (timeout = 90000)
-  public void testContainerLauncherError() throws Exception {
-    testServiceError("_testContainerLauncherError_", EXECUTION_CONTEXT_LAUNCHER_FAIL,
-        DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR);
+  @Test(timeout = 90000)
+  public void testContainerLauncherThrowError() throws Exception {
+    testFatalError("_testContainerLauncherError_", EXECUTION_CONTEXT_LAUNCHER_THROW,
+        SUFFIX_LAUNCHER, Lists.newArrayList("Service Error",
+            DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR.name()));
+  }
+
+  @Test(timeout = 90000)
+  public void testTaskCommunicatorThrowError() throws Exception {
+    testFatalError("_testContainerLauncherError_", EXECUTION_CONTEXT_TASKCOMM_THROW,
+        SUFFIX_TASKCOMM, Lists.newArrayList("Service Error",
+            DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR.name()));
+  }
+
+  @Test(timeout = 90000)
+  public void testTaskSchedulerThrowError() throws Exception {
+    testFatalError("_testContainerLauncherError_", EXECUTION_CONTEXT_SCHEDULER_THROW,
+        SUFFIX_SCHEDULER, Lists.newArrayList("Service Error",
+            DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR.name()));
+  }
+
+  @Test (timeout = 150000)
+  public void testNonFatalErrors() throws IOException, TezException, InterruptedException {
+    String methodName = "testNonFatalErrors";
+    TezConfiguration tezClientConf = new TezConfiguration(extServiceTestHelper.getConfForJobs());
+    TezClient tezClient = TezClient
+        .newBuilder(TestExternalTezServicesErrors.class.getSimpleName() + methodName + "_session",
+            tezClientConf)
+        .setIsSession(true).setServicePluginDescriptor(servicePluginsDescriptor).build();
+    try {
+      tezClient.start();
+      LOG.info("TezSessionStarted for " + methodName);
+      tezClient.waitTillReady();
+      LOG.info("TezSession ready for submission for " + methodName);
+
+
+      runAndVerifyForNonFatalErrors(tezClient, SUFFIX_LAUNCHER, EXECUTION_CONTEXT_LAUNCHER_REPORT_NON_FATAL);
+      runAndVerifyForNonFatalErrors(tezClient, SUFFIX_TASKCOMM, EXECUTION_CONTEXT_TASKCOMM_REPORT_NON_FATAL);
+      runAndVerifyForNonFatalErrors(tezClient, SUFFIX_SCHEDULER, EXECUTION_CONTEXT_SCHEDULER_REPORT_NON_FATAL);
+
+    } finally {
+      tezClient.stop();
+    }
+  }
+
+  @Test(timeout = 90000)
+  public void testContainerLauncherReportFatalError() throws Exception {
+    testFatalError("_testContainerLauncherReportFatalError_",
+        EXECUTION_CONTEXT_LAUNCHER_REPORT_FATAL, SUFFIX_LAUNCHER, Lists
+            .newArrayList(ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
+                ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
   }
 
-  @Test (timeout = 90000)
-  public void testTaskCommunicatorError() throws Exception {
-    testServiceError("_testTaskCommunicatorError_", EXECUTION_CONTEXT_TASKCOMM_FAIL,
-        DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR);
+  @Test(timeout = 90000)
+  public void testTaskCommReportFatalError() throws Exception {
+    testFatalError("_testTaskCommReportFatalError_", EXECUTION_CONTEXT_TASKCOMM_REPORT_FATAL,
+        SUFFIX_TASKCOMM, Lists.newArrayList(ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
+            ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
   }
 
-  @Test (timeout = 90000)
-  public void testTaskSchedulerError() throws Exception {
-    testServiceError("_testTaskSchedulerError_", EXECUTION_CONTEXT_SCHEDULER_FAIL,
-        DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR);
+  @Test(timeout = 90000)
+  public void testTaskSchedulerReportFatalError() throws Exception {
+    testFatalError("_testTaskSchedulerReportFatalError_",
+        EXECUTION_CONTEXT_SCHEDULER_REPORT_FATAL, SUFFIX_SCHEDULER,
+        Lists.newArrayList(ErrorPluginConfiguration.REPORT_FATAL_ERROR_MESSAGE,
+            ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
   }
 
-  private void testServiceError(String methodName,
-                                Vertex.VertexExecutionContext lhsExecutionContext,
-                                DAGAppMasterEventType expectedEventType) throws
-      IOException, TezException, InterruptedException, YarnException {
+
+  private void testFatalError(String methodName,
+                              Vertex.VertexExecutionContext lhsExecutionContext,
+                              String dagNameSuffix, List<String> expectedDiagMessages) throws
+      IOException, TezException, YarnException, InterruptedException {
     TezConfiguration tezClientConf = new TezConfiguration(extServiceTestHelper.getConfForJobs());
     TezClient tezClient = TezClient
         .newBuilder(TestExternalTezServicesErrors.class.getSimpleName() + methodName + "_session",
             tezClientConf)
         .setIsSession(true).setServicePluginDescriptor(servicePluginsDescriptor).build();
 
-    ApplicationId appId;
+    ApplicationId appId= null;
     try {
       tezClient.start();
       LOG.info("TezSessionStarted for " + methodName);
@@ -175,10 +290,11 @@ public class TestExternalTezServicesErrors {
       JoinValidateConfigured joinValidate =
           new JoinValidateConfigured(EXECUTION_CONTEXT_DEFAULT, lhsExecutionContext,
               EXECUTION_CONTEXT_EXT_SERVICE_PUSH,
-              EXECUTION_CONTEXT_EXT_SERVICE_PUSH, "LauncherFailTest");
+              EXECUTION_CONTEXT_EXT_SERVICE_PUSH, dagNameSuffix);
 
       DAG dag = joinValidate
-          .createDag(new TezConfiguration(extServiceTestHelper.getConfForJobs()), HASH_JOIN_EXPECTED_RESULT_PATH,
+          .createDag(new TezConfiguration(extServiceTestHelper.getConfForJobs()),
+              HASH_JOIN_EXPECTED_RESULT_PATH,
               HASH_JOIN_OUTPUT_PATH, 3);
 
       DAGClient dagClient = tezClient.submitDAG(dag);
@@ -188,14 +304,15 @@ public class TestExternalTezServicesErrors {
       assertEquals(DAGStatus.State.ERROR, dagStatus.getState());
       boolean foundDiag = false;
       for (String diag : dagStatus.getDiagnostics()) {
-        if (diag.contains("Service Error") && diag.contains(
-            expectedEventType.toString()) &&
-            diag.contains("Simulated Error")) {
-          foundDiag = true;
+        foundDiag = checkDiag(diag, expectedDiagMessages);
+        if (foundDiag) {
+          break;
         }
       }
       appId = tezClient.getAppMasterApplicationId();
       assertTrue(foundDiag);
+    } catch (InterruptedException e) {
+      e.printStackTrace();
     } finally {
       tezClient.stop();
     }
@@ -222,14 +339,58 @@ public class TestExternalTezServicesErrors {
         String diag = appAttemptReport.getDiagnostics();
         assertEquals(FinalApplicationStatus.FAILED, appReport.getFinalApplicationStatus());
         assertEquals(YarnApplicationState.FINISHED, appReport.getYarnApplicationState());
-        assertTrue(diag.contains("Service Error") && diag.contains(
-            expectedEventType.toString()) &&
-            diag.contains("Simulated Error"));
-
+        checkDiag(diag, expectedDiagMessages);
       } finally {
         yarnClient.stop();
       }
     }
   }
 
+  private boolean checkDiag(String diag, List<String> expected) {
+    boolean found = true;
+    for (String exp : expected) {
+      if (diag.contains(exp)) {
+        found = true;
+        continue;
+      } else {
+        found = false;
+        break;
+      }
+    }
+    return found;
+  }
+
+
+  private void runAndVerifyForNonFatalErrors(TezClient tezClient, String componentName,
+                                             Vertex.VertexExecutionContext lhsContext) throws
+      TezException,
+      InterruptedException, IOException {
+    LOG.info("Running JoinValidate with componentName reportNonFatalException");
+    JoinValidateConfigured joinValidate =
+        new JoinValidateConfigured(EXECUTION_CONTEXT_DEFAULT, lhsContext,
+            EXECUTION_CONTEXT_EXT_SERVICE_PUSH,
+            EXECUTION_CONTEXT_EXT_SERVICE_PUSH, componentName);
+
+    DAG dag = joinValidate
+        .createDag(new TezConfiguration(extServiceTestHelper.getConfForJobs()),
+            HASH_JOIN_EXPECTED_RESULT_PATH,
+            HASH_JOIN_OUTPUT_PATH, 3);
+
+    DAGClient dagClient = tezClient.submitDAG(dag);
+
+    DAGStatus dagStatus =
+        dagClient.waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
+    assertEquals(DAGStatus.State.FAILED, dagStatus.getState());
+
+    boolean foundDiag = false;
+    for (String diag : dagStatus.getDiagnostics()) {
+      if (diag.contains(ErrorPluginConfiguration.REPORT_NONFATAL_ERROR_MESSAGE) &&
+          diag.contains(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE.name())) {
+        foundDiag = true;
+        break;
+      }
+    }
+    assertTrue(foundDiag);
+  }
+
 }


[19/24] tez git commit: Fixing CHANGES.txt (rbalamohan)

Posted by sr...@apache.org.
Fixing CHANGES.txt (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/e24bf448
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/e24bf448
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/e24bf448

Branch: refs/heads/TEZ-2980
Commit: e24bf44869de45d8dd18b3e724d52ddbfea7f66a
Parents: a2c590b
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Fri Feb 12 10:36:52 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Fri Feb 12 10:36:52 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/e24bf448/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 5f09280..8bd6f73 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -333,6 +333,7 @@ INCOMPATIBLE CHANGES
 
 ALL CHANGES
   TEZ-3103. Shuffle can hang when memory to memory merging enabled
+  TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime).
   TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
   TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
   TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.


[12/24] tez git commit: TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin (rbalamohan)

Posted by sr...@apache.org.
TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/12e1aae1
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/12e1aae1
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/12e1aae1

Branch: refs/heads/TEZ-2980
Commit: 12e1aae1eac834a77e3a2b5b3be1cd6d59196d48
Parents: 89bc6ab
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Thu Feb 4 16:29:48 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Thu Feb 4 16:29:48 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                              |  1 +
 .../tez/analyzer/plugins/CriticalPathAnalyzer.java       | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/12e1aae1/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index b7bb98a..2e484eb 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
   TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.
   TEZ-2307. Possible wrong error message when submitting new dag
   TEZ-2974. Tez tools: TFileRecordReader in tez-tools should support reading >2 GB tfiles.

http://git-wip-us.apache.org/repos/asf/tez/blob/12e1aae1/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
----------------------------------------------------------------------
diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
index d4efdf9..a7e37ab 100644
--- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
+++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
@@ -62,6 +62,7 @@ public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer {
   }
 
   public static final String DRAW_SVG = "tez.critical-path-analyzer.draw-svg";
+  public static final String SVG_DIR = "output-dir";
 
   public static class CriticalPathStep {
     public enum EntityType {
@@ -111,6 +112,10 @@ public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer {
   public CriticalPathAnalyzer() {
   }
 
+  public CriticalPathAnalyzer(Configuration conf) {
+    setConf(conf);
+  }
+
   @Override 
   public void analyze(DagInfo dagInfo) throws TezException {
     // get all attempts in the dag and find the last failed/succeeded attempt.
@@ -152,7 +157,11 @@ public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer {
   
   private void saveCriticalPathAsSVG(DagInfo dagInfo) {
     SVGUtils svg = new SVGUtils();
-    String outputFileName = getOutputDir() + File.separator + dagInfo.getDagId() + ".svg";
+    String outputDir = getOutputDir();
+    if (outputDir == null) {
+      outputDir = getConf().get(SVG_DIR);
+    }
+    String outputFileName = outputDir + File.separator + dagInfo.getDagId() + ".svg";
     System.out.println("Writing output to: " + outputFileName);
     svg.saveCriticalPathAsSVG(dagInfo, outputFileName, criticalPath);
   }


[17/24] tez git commit: TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime) (rbalamohan)

Posted by sr...@apache.org.
TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime) (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7e3d5461
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7e3d5461
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7e3d5461

Branch: refs/heads/TEZ-2980
Commit: 7e3d5461c3b948ca1c27f386e3e9e3665b8a649e
Parents: 6f57630
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Fri Feb 12 09:59:25 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Fri Feb 12 09:59:25 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/tez/dag/app/dag/impl/DAGImpl.java    | 12 ++++++
 .../tez/history/parser/datamodel/DagInfo.java   | 33 ++++++++++++++-
 .../parser/datamodel/TaskAttemptInfo.java       | 37 +++++++++++++++-
 .../tez/history/parser/datamodel/TaskInfo.java  | 37 +++++++++++++++-
 .../history/parser/datamodel/VertexInfo.java    | 44 ++++++++++++++++++--
 .../apache/tez/history/TestHistoryParser.java   |  6 +++
 7 files changed, 161 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 61aaaa7..8cb7505 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime).
   TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
   TEZ-3090. MRInput should make dagIdentifier, vertexIdentifier, etc available to the InputFormat jobConf.
   TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.

http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index dd6f834..88dfe27 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -1232,6 +1232,12 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     if (recoveryData == null
         || recoveryData.getDAGFinishedEvent() == null) {
       Map<String, Integer> taskStats = constructTaskStats(getDAGProgress());
+      if (finishTime < startTime) {
+        LOG.warn("DAG finish time is smaller than start time. "
+            + "startTime=" + startTime
+            + ", finishTime=" + finishTime
+        );
+      }
       DAGFinishedEvent finishEvt = new DAGFinishedEvent(dagId, startTime,
           finishTime, DAGState.SUCCEEDED, "", counters,
           this.userName, this.dagName, taskStats, this.appContext.getApplicationAttemptId(),
@@ -1245,6 +1251,12 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     if (recoveryData == null
         || recoveryData.getDAGFinishedEvent() == null) {
       Map<String, Integer> taskStats = constructTaskStats(getDAGProgress());
+      if (finishTime < startTime) {
+        LOG.warn("DAG finish time is smaller than start time. "
+            + "startTime=" + startTime
+            + ", finishTime=" + finishTime
+        );
+      }
       DAGFinishedEvent finishEvt = new DAGFinishedEvent(dagId, startTime,
           finishTime, state,
           StringUtils.join(getDiagnostics(), LINE_SEPARATOR),

http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
index 5fb760c..8057be7 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java
@@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.tez.client.CallerContext;
 import org.apache.tez.dag.api.event.VertexState;
+import org.apache.tez.dag.history.HistoryEventType;
 import org.codehaus.jettison.json.JSONArray;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
@@ -98,8 +99,36 @@ public class DagInfo extends BaseInfo {
 
     //Parse additional Info
     JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
-    startTime = otherInfoNode.optLong(Constants.START_TIME);
-    endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+
+    long sTime = otherInfoNode.optLong(Constants.START_TIME);
+    long eTime= otherInfoNode.optLong(Constants.FINISH_TIME);
+    if (eTime < sTime) {
+      LOG.warn("DAG has got wrong start/end values. "
+          + "startTime=" + sTime + ", endTime=" + eTime + ". Will check "
+          + "timestamps in DAG started/finished events");
+
+      // Check if events DAG_STARTED, DAG_FINISHED can be made use of
+      for(Event event : eventList) {
+        switch (HistoryEventType.valueOf(event.getType())) {
+        case DAG_STARTED:
+          sTime = event.getAbsoluteTime();
+          break;
+        case DAG_FINISHED:
+          eTime = event.getAbsoluteTime();
+          break;
+        default:
+          break;
+        }
+      }
+
+      if (eTime < sTime) {
+        LOG.warn("DAG has got wrong start/end values in events as well. "
+            + "startTime=" + sTime + ", endTime=" + eTime);
+      }
+    }
+    startTime = sTime;
+    endTime = eTime;
+
     //TODO: Not getting populated correctly for lots of jobs.  Verify
     submitTime = otherInfoNode.optLong(Constants.START_REQUESTED_TIME);
     diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);

http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
index d373513..885d743 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java
@@ -24,12 +24,15 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.tez.common.ATSConstants;
 import org.apache.tez.common.counters.DAGCounter;
 import org.apache.tez.common.counters.TaskCounter;
 import org.apache.tez.common.counters.TezCounter;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.history.HistoryEventType;
 import org.apache.tez.history.parser.utils.Utils;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
@@ -46,6 +49,8 @@ import static org.apache.hadoop.classification.InterfaceAudience.Public;
 @Evolving
 public class TaskAttemptInfo extends BaseInfo {
 
+  private static final Log LOG = LogFactory.getLog(TaskAttemptInfo.class);
+
   private static final String SUCCEEDED = StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name());
 
   private final String taskAttemptId;
@@ -95,8 +100,36 @@ public class TaskAttemptInfo extends BaseInfo {
 
     //Parse additional Info
     final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
-    startTime = otherInfoNode.optLong(Constants.START_TIME);
-    endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+
+    long sTime = otherInfoNode.optLong(Constants.START_TIME);
+    long eTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+    if (eTime < sTime) {
+      LOG.warn("TaskAttemptInfo has got wrong start/end values. "
+          + "startTime=" + sTime + ", endTime=" + eTime + ". Will check "
+          + "timestamps in DAG started/finished events");
+
+      // Check if events TASK_STARTED, TASK_FINISHED can be made use of
+      for(Event event : eventList) {
+        switch (HistoryEventType.valueOf(event.getType())) {
+        case TASK_ATTEMPT_STARTED:
+          sTime = event.getAbsoluteTime();
+          break;
+        case TASK_ATTEMPT_FINISHED:
+          eTime = event.getAbsoluteTime();
+          break;
+        default:
+          break;
+        }
+      }
+
+      if (eTime < sTime) {
+        LOG.warn("TaskAttemptInfo has got wrong start/end values in events as well. "
+            + "startTime=" + sTime + ", endTime=" + eTime);
+      }
+    }
+    startTime = sTime;
+    endTime = eTime;
+
     diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
     creationTime = otherInfoNode.optLong(Constants.CREATION_TIME);
     creationCausalTA = StringInterner.weakIntern(

http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
index c6f89d6..fb3f232 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java
@@ -29,8 +29,11 @@ import com.google.common.collect.Multimap;
 import com.google.common.collect.Multimaps;
 import com.google.common.collect.Ordering;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
+import org.apache.tez.dag.history.HistoryEventType;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
 
@@ -46,6 +49,8 @@ import static org.apache.hadoop.classification.InterfaceStability.Evolving;
 @Evolving
 public class TaskInfo extends BaseInfo {
 
+  private static final Log LOG = LogFactory.getLog(TaskInfo.class);
+
   private final long startTime;
   private final long endTime;
   private final String diagnostics;
@@ -70,8 +75,36 @@ public class TaskInfo extends BaseInfo {
 
     //Parse additional Info
     final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
-    startTime = otherInfoNode.optLong(Constants.START_TIME);
-    endTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+
+    long sTime = otherInfoNode.optLong(Constants.START_TIME);
+    long eTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+    if (eTime < sTime) {
+      LOG.warn("Task has got wrong start/end values. "
+          + "startTime=" + sTime + ", endTime=" + eTime + ". Will check "
+          + "timestamps in DAG started/finished events");
+
+      // Check if events TASK_STARTED, TASK_FINISHED can be made use of
+      for(Event event : eventList) {
+        switch (HistoryEventType.valueOf(event.getType())) {
+        case TASK_STARTED:
+          sTime = event.getAbsoluteTime();
+          break;
+        case TASK_FINISHED:
+          eTime = event.getAbsoluteTime();
+          break;
+        default:
+          break;
+        }
+      }
+
+      if (eTime < sTime) {
+        LOG.warn("Task has got wrong start/end values in events as well. "
+            + "startTime=" + sTime + ", endTime=" + eTime);
+      }
+    }
+    startTime = sTime;
+    endTime = eTime;
+
     diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
     successfulAttemptId = StringInterner.weakIntern(
         otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID));

http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
index 50647fe..0f6831b 100644
--- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
+++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java
@@ -28,8 +28,11 @@ import com.google.common.collect.Multimap;
 import com.google.common.collect.Multimaps;
 import com.google.common.collect.Ordering;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.tez.dag.api.oldrecords.TaskState;
+import org.apache.tez.dag.history.HistoryEventType;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
 
@@ -46,6 +49,8 @@ import static org.apache.hadoop.classification.InterfaceStability.Evolving;
 @Evolving
 public class VertexInfo extends BaseInfo {
 
+  private static final Log LOG = LogFactory.getLog(VertexInfo.class);
+
   private final String vertexId;
   private final String vertexName;
   private final long finishTime;
@@ -98,9 +103,42 @@ public class VertexInfo extends BaseInfo {
     JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO);
     initRequestedTime = otherInfoNode.optLong(Constants.INIT_REQUESTED_TIME);
     startRequestedTime = otherInfoNode.optLong(Constants.START_REQUESTED_TIME);
-    startTime = otherInfoNode.optLong(Constants.START_TIME);
-    initTime = otherInfoNode.optLong(Constants.INIT_TIME);
-    finishTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+
+    long sTime = otherInfoNode.optLong(Constants.START_TIME);
+    long iTime = otherInfoNode.optLong(Constants.INIT_TIME);
+    long eTime = otherInfoNode.optLong(Constants.FINISH_TIME);
+    if (eTime < sTime) {
+      LOG.warn("Vertex has got wrong start/end values. "
+          + "startTime=" + sTime + ", endTime=" + eTime + ". Will check "
+          + "timestamps in DAG started/finished events");
+
+      // Check if events VERTEX_STARTED, VERTEX_FINISHED can be made use of
+      for(Event event : eventList) {
+        switch (HistoryEventType.valueOf(event.getType())) {
+        case VERTEX_INITIALIZED:
+          iTime = event.getAbsoluteTime();
+          break;
+        case VERTEX_STARTED:
+          sTime = event.getAbsoluteTime();
+          break;
+        case VERTEX_FINISHED:
+          eTime = event.getAbsoluteTime();
+          break;
+        default:
+          break;
+        }
+      }
+
+      if (eTime < sTime) {
+        LOG.warn("Vertex has got wrong start/end values in events as well. "
+            + "startTime=" + sTime + ", endTime=" + eTime);
+      }
+    }
+    startTime = sTime;
+    finishTime = eTime;
+    initTime = iTime;
+
+
     diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS);
     numTasks = otherInfoNode.optInt(Constants.NUM_TASKS);
     failedTasks = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS);

http://git-wip-us.apache.org/repos/asf/tez/blob/7e3d5461/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java
----------------------------------------------------------------------
diff --git a/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java b/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java
index b373f6e..372585b 100644
--- a/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java
+++ b/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java
@@ -249,6 +249,7 @@ public class TestHistoryParser {
         WordCount.TokenProcessor.class.getName()));
     assertTrue(dagInfo.getVertex(SUMMATION).getProcessorClassName()
         .equals(WordCount.SumProcessor.class.getName()));
+    assertTrue(dagInfo.getFinishTime() > dagInfo.getStartTime());
     assertTrue(dagInfo.getEdges().size() == 1);
     EdgeInfo edgeInfo = dagInfo.getEdges().iterator().next();
     assertTrue(edgeInfo.getDataMovementType().
@@ -269,6 +270,7 @@ public class TestHistoryParser {
       assertTrue(vertexInfo.getStartRequestedTime() > 0);
       assertTrue(vertexInfo.getStartTime() > 0);
       assertTrue(vertexInfo.getFinishTime() > 0);
+      assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
       long finishTime = 0;
       for (TaskInfo taskInfo : vertexInfo.getTasks()) {
         assertTrue(taskInfo.getNumberOfTaskAttempts() == 1);
@@ -280,6 +282,7 @@ public class TestHistoryParser {
         assertTrue(taskInfo.getSuccessfulTaskAttempts().size() > 0);
         assertTrue(taskInfo.getFailedTaskAttempts().size() == 0);
         assertTrue(taskInfo.getKilledTaskAttempts().size() == 0);
+        assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
         List<TaskAttemptInfo> attempts = taskInfo.getTaskAttempts();
         if (vertexInfo.getVertexName().equals(TOKENIZER)) {
           // get the last task to finish and track its successful attempt
@@ -304,6 +307,7 @@ public class TestHistoryParser {
           assertTrue(attemptInfo.getCreationTime() > 0);
           assertTrue(attemptInfo.getAllocationTime() > 0);
           assertTrue(attemptInfo.getStartTime() > 0);
+          assertTrue(attemptInfo.getFinishTime() > attemptInfo.getStartTime());
         }
       }
       assertTrue(vertexInfo.getLastTaskToFinish() != null);
@@ -748,6 +752,7 @@ public class TestHistoryParser {
       assertTrue(vertexInfo.getFirstTaskToStart() != null);
       assertTrue(vertexInfo.getSucceededTasksCount() > 0);
       assertTrue(vertexInfo.getTasks().size() > 0);
+      assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
     }
 
     for (TaskInfo taskInfo : vertexInfo.getTasks()) {
@@ -781,6 +786,7 @@ public class TestHistoryParser {
           taskInfo.getStartTimeInterval() > 0 && taskInfo.getStartTime() > taskInfo.getStartTimeInterval());
       assertTrue(taskInfo.getSuccessfulAttemptId() != null);
       assertTrue(taskInfo.getSuccessfulTaskAttempt() != null);
+      assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
     }
     assertTrue(taskInfo.getTaskId() != null);
 


[13/24] tez git commit: TEZ-3093. addendum patch (rbalamohan)

Posted by sr...@apache.org.
TEZ-3093. addendum patch (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/c7397f5d
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/c7397f5d
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/c7397f5d

Branch: refs/heads/TEZ-2980
Commit: c7397f5de42bd83f2a479ca1b29ef007cc40cc25
Parents: 12e1aae
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Thu Feb 4 19:04:28 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Thu Feb 4 19:04:28 2016 -0800

----------------------------------------------------------------------
 .../org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/c7397f5d/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
----------------------------------------------------------------------
diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
index a7e37ab..88c5134 100644
--- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
+++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java
@@ -62,7 +62,7 @@ public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer {
   }
 
   public static final String DRAW_SVG = "tez.critical-path-analyzer.draw-svg";
-  public static final String SVG_DIR = "output-dir";
+  public static final String OUTPUT_DIR = "output-dir";
 
   public static class CriticalPathStep {
     public enum EntityType {
@@ -159,7 +159,7 @@ public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer {
     SVGUtils svg = new SVGUtils();
     String outputDir = getOutputDir();
     if (outputDir == null) {
-      outputDir = getConf().get(SVG_DIR);
+      outputDir = getConf().get(OUTPUT_DIR);
     }
     String outputFileName = outputDir + File.separator + dagInfo.getDagId() + ".svg";
     System.out.println("Writing output to: " + outputFileName);


[22/24] tez git commit: TEZ-3029. Add an onError method to service plugin contexts. (sseth)

Posted by sr...@apache.org.
TEZ-3029. Add an onError method to service plugin contexts. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a812c346
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a812c346
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a812c346

Branch: refs/heads/TEZ-2980
Commit: a812c3462808e73b8a59e1852ff2547dcbafbf84
Parents: fec46aa
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Feb 17 13:39:11 2016 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Feb 17 13:39:11 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../api/ContainerLauncherContext.java           |  12 +-
 .../apache/tez/serviceplugins/api/DagInfo.java  |  30 +++
 .../api/ServicePluginContextBase.java           |  49 ++++
 .../serviceplugins/api/ServicePluginError.java  |  48 ++++
 .../api/ServicePluginErrorDefaults.java         |  76 ++++++
 .../api/TaskSchedulerContext.java               |  19 +-
 tez-dag/src/main/java/org/apache/tez/Utils.java |  33 +++
 .../tez/dag/api/client/DAGClientHandler.java    |   5 +-
 .../dag/app/ContainerLauncherContextImpl.java   |  27 ++-
 .../org/apache/tez/dag/app/DAGAppMaster.java    |  24 +-
 .../dag/app/TaskCommunicatorContextImpl.java    |  17 ++
 .../tez/dag/app/TaskCommunicatorManager.java    |  26 ++
 .../app/TaskCommunicatorManagerInterface.java   |   4 +
 .../java/org/apache/tez/dag/app/dag/DAG.java    |   3 +-
 .../tez/dag/app/dag/DAGTerminationCause.java    |   3 +
 .../tez/dag/app/dag/VertexTerminationCause.java |   2 +-
 ...DAGAppMasterEventSchedulingServiceError.java |  15 +-
 .../dag/app/dag/event/DAGEventTerminateDag.java |  38 +++
 .../tez/dag/app/dag/event/DAGEventType.java     |   4 +-
 .../apache/tez/dag/app/dag/impl/DAGImpl.java    |  82 ++++---
 .../apache/tez/dag/app/dag/impl/VertexImpl.java |   2 +-
 .../app/launcher/ContainerLauncherManager.java  |  59 ++++-
 .../dag/app/rm/TaskSchedulerContextImpl.java    |  22 +-
 .../app/rm/TaskSchedulerContextImplWrapper.java |  33 ++-
 .../tez/dag/app/rm/TaskSchedulerManager.java    |  36 ++-
 .../dag/app/rm/YarnTaskSchedulerService.java    |   6 +-
 .../app/rm/YarnTaskSchedulerServiceError.java   |  33 +++
 .../api/TaskCommunicatorContext.java            |  16 +-
 .../dag/api/client/TestDAGClientHandler.java    |   4 +-
 .../apache/tez/dag/app/MockDAGAppMaster.java    |   6 +-
 .../tez/dag/app/TestMockDAGAppMaster.java       |   3 +-
 .../dag/app/TestTaskCommunicatorManager.java    | 136 ++++++++++-
 .../apache/tez/dag/app/dag/impl/TestCommit.java |  87 +++++--
 .../tez/dag/app/dag/impl/TestDAGImpl.java       |  84 +++++--
 .../tez/dag/app/dag/impl/TestVertexImpl.java    |  13 +-
 .../launcher/TestContainerLauncherManager.java  | 101 +++++++-
 .../tez/dag/app/rm/TestTaskScheduler.java       |  19 +-
 .../dag/app/rm/TestTaskSchedulerHelpers.java    |  15 +-
 .../dag/app/rm/TestTaskSchedulerManager.java    | 161 ++++++++++++
 .../tez/dag/helpers/DagInfoImplForTest.java     |  38 +++
 .../tez/dag/app/ErrorPluginConfiguration.java   | 134 ++++++++++
 ...zTestServiceContainerLauncherWithErrors.java |  17 +-
 ...stServiceTaskSchedulerServiceWithErrors.java |  23 +-
 ...ezTestServiceTaskCommunicatorWithErrors.java |  22 +-
 .../tests/TestExternalTezServicesErrors.java    | 243 +++++++++++++++----
 46 files changed, 1584 insertions(+), 247 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index e2f77f6..af643dd 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,7 @@ Apache Tez Change Log
 Release 0.8.3: Unreleased
 
 INCOMPATIBLE CHANGES
+  TEZ-3029. Add an onError method to service plugin contexts.
 
 ALL CHANGES:
   TEZ-3103. Shuffle can hang when memory to memory merging enabled

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java
index 70a3498..ed1d58f 100644
--- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java
+++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java
@@ -14,15 +14,15 @@
 
 package org.apache.tez.serviceplugins.api;
 
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.tez.dag.api.UserPayload;
 
 @InterfaceAudience.Public
 @InterfaceStability.Unstable
-public interface ContainerLauncherContext {
+public interface ContainerLauncherContext extends ServicePluginContextBase {
 
   // TODO TEZ-2003 (post) TEZ-2664 Tez abstraction for ContainerId, NodeId, other YARN constructs
 
@@ -77,13 +77,6 @@ public interface ContainerLauncherContext {
   // Lookup APIs
 
   /**
-   * Get the UserPayload that was configured while setting up the launcher
-   *
-   * @return the initially configured user payload
-   */
-  UserPayload getInitialUserPayload();
-
-  /**
    * Get the number of nodes being handled by the specified source
    *
    * @param sourceName the relevant source name
@@ -108,4 +101,5 @@ public interface ContainerLauncherContext {
    *
    */
   Object getTaskCommunicatorMetaInfo(String taskCommName);
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java
new file mode 100644
index 0000000..ef73343
--- /dev/null
+++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.serviceplugins.api;
+
+public interface DagInfo {
+
+  /**
+   * The index of the current dag
+   * @return a numerical identifier for the DAG. This is unique within the currently running application.
+   */
+  int getIndex();
+
+  /**
+   * Get the name of the dag
+   * @return the name of the dag
+   */
+  String getName();
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginContextBase.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginContextBase.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginContextBase.java
new file mode 100644
index 0000000..90a51b2
--- /dev/null
+++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginContextBase.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.serviceplugins.api;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
+import org.apache.tez.dag.api.UserPayload;
+
+/**
+ * Base interface for ServicePluginContexts
+ */
+public interface ServicePluginContextBase {
+
+  /**
+   * Get the UserPayload that was configured while setting up the launcher
+   *
+   * @return the initially configured user payload
+   */
+  UserPayload getInitialUserPayload();
+
+  /**
+   * Get information on the currently executing dag
+   * @return info on the currently running dag, or null if no dag is executing
+   */
+  @Nullable
+  DagInfo getCurrentDagInfo();
+
+  /**
+   * Report an error from the service. This results in the specific DAG being killed.
+   *
+   * @param servicePluginError the error category
+   * @param message      A diagnostic message associated with this error
+   * @param dagInfo      the affected dag
+   */
+  void reportError(@Nonnull ServicePluginError servicePluginError, String message, DagInfo dagInfo);
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginError.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginError.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginError.java
new file mode 100644
index 0000000..932c0fa
--- /dev/null
+++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginError.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.serviceplugins.api;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+/**
+ * Represents errors from a ServicePlugin. The default implementation {@link ServicePluginErrorDefaults}
+ * lists a basic set of errors.
+ * This can be extended by implementing this interface, if the default set is not adequate
+ */
+public interface ServicePluginError {
+
+  enum ErrorType {
+    TEMPORARY, PERMANENT,
+  }
+
+  /**
+   * Get the enum representation
+   *
+   * @return an enum representation of the ServicePluginError
+   */
+  Enum getEnum();
+
+  /**
+   * The type of the error
+   *
+   * @return the type of the error
+   */
+  ErrorType getErrorType();
+
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginErrorDefaults.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginErrorDefaults.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginErrorDefaults.java
new file mode 100644
index 0000000..83a85b5
--- /dev/null
+++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginErrorDefaults.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.serviceplugins.api;/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A default set of errors from ServicePlugins
+ *
+ * Errors are marked as fatal or non-fatal for the Application.
+ * Fatal errors cause the AM to go down.
+ *
+ */
+@InterfaceAudience.Public
+public enum ServicePluginErrorDefaults implements ServicePluginError {
+  /**
+   * Indicates that the service is currently unavailable.
+   * This is a temporary error.
+   */
+  SERVICE_UNAVAILABLE(ErrorType.TEMPORARY),
+
+  /** Indicates that the service is in an inconsistent state.
+   * This is a fatal error.
+   */
+  INCONSISTENT_STATE(ErrorType.PERMANENT),
+
+  /**
+   * Other temporary error,
+   */
+  OTHER(ErrorType.TEMPORARY),
+
+  /**
+   * Other fatal error.
+   */
+  OTHER_FATAL(ErrorType.PERMANENT);
+
+  private ErrorType errorType;
+
+  ServicePluginErrorDefaults(ErrorType errorType) {
+    this.errorType = errorType;
+  }
+
+  @Override
+  public Enum getEnum() {
+    return this;
+  }
+
+  @Override
+  public ErrorType getErrorType() {
+    return errorType;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java
----------------------------------------------------------------------
diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java
index a24061f..d30ada3 100644
--- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java
+++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java
@@ -29,7 +29,6 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.tez.common.ContainerSignatureMatcher;
-import org.apache.tez.dag.api.UserPayload;
 
 /**
  * Context for a {@link TaskScheduler}
@@ -42,7 +41,7 @@ import org.apache.tez.dag.api.UserPayload;
  */
 @InterfaceAudience.Public
 @InterfaceStability.Unstable
-public interface TaskSchedulerContext {
+public interface TaskSchedulerContext extends ServicePluginContextBase {
 
   class AppFinalStatus {
     public final FinalApplicationStatus exitStatus;
@@ -136,14 +135,6 @@ public interface TaskSchedulerContext {
   );
 
   /**
-   * Indicate to the framework that the scheduler has run into an error. This will cause
-   * the DAG and application to be killed.
-   *
-   * @param t the relevant error
-   */
-  void onError(Throwable t);
-
-  /**
    * Inform the framework that the scheduler has determined that a previously allocated container
    * needs to be preempted
    *
@@ -164,13 +155,6 @@ public interface TaskSchedulerContext {
   // Getters
 
   /**
-   * Get the UserPayload that was configured while setting up the scheduler
-   *
-   * @return the initially configured user payload
-   */
-  UserPayload getInitialUserPayload();
-
-  /**
    * Get the tracking URL for the application. Primarily relevant to YARN
    *
    * @return the trackingUrl for the app
@@ -234,4 +218,5 @@ public interface TaskSchedulerContext {
    * @return the app master state
    */
   AMState getAMState();
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/Utils.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/Utils.java b/tez-dag/src/main/java/org/apache/tez/Utils.java
index 959b536..6f03a67 100644
--- a/tez-dag/src/main/java/org/apache/tez/Utils.java
+++ b/tez-dag/src/main/java/org/apache/tez/Utils.java
@@ -15,7 +15,14 @@
 package org.apache.tez;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.yarn.event.Event;
 import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.dag.app.dag.DAG;
+import org.apache.tez.dag.app.dag.DAGTerminationCause;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
+import org.apache.tez.dag.records.TezDAGID;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -63,4 +70,30 @@ public class Utils {
     return "[" + schedulerIndex + ":" + name + "]";
   }
 
+  public static void processNonFatalServiceErrorReport(String entityString,
+                                                       ServicePluginError servicePluginError,
+                                                       String diagnostics,
+                                                       DagInfo dagInfo, AppContext appContext,
+                                                       String componentName) {
+    String message = "Error reported by " + componentName + " [" +
+        entityString + "][" +
+        servicePluginError +
+        "] " + (diagnostics == null ? "" : diagnostics);
+    if (dagInfo != null) {
+      DAG dag = appContext.getCurrentDAG();
+      if (dag != null && dag.getID().getId() == dagInfo.getIndex()) {
+        TezDAGID dagId = dag.getID();
+        // Send a kill message only if it is the same dag.
+        LOG.warn(message + ", Failing dag: [" + dagInfo.getName() + ", " + dagId + "]");
+        sendEvent(appContext, new DAGEventTerminateDag(dagId, DAGTerminationCause.SERVICE_PLUGIN_ERROR, message));
+      }
+    } else {
+      LOG.warn("No current dag name provided. Not acting on " + message);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private static void sendEvent(AppContext appContext, Event<?> event) {
+    appContext.getEventHandler().handle(event);
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java
index 0f674f3..79b9acd 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java
@@ -112,7 +112,7 @@ public class DAGClientHandler {
   public void tryKillDAG(String dagIdStr) throws TezException {
     DAG dag = getDAG(dagIdStr);
     LOG.info("Sending client kill to dag: " + dagIdStr);
-    dagAppMaster.tryKillDAG(dag);
+    dagAppMaster.tryKillDAG(dag, "Kill Dag request received from client");
   }
 
   public synchronized String submitDAG(DAGPlan dagPlan,
@@ -120,10 +120,11 @@ public class DAGClientHandler {
     return dagAppMaster.submitDAGToAppMaster(dagPlan, additionalAmResources);
   }
 
+  // Only to be invoked by the DAGClient.
   public synchronized void shutdownAM() throws TezException {
     LOG.info("Received message to shutdown AM");
     if (dagAppMaster != null) {
-      dagAppMaster.shutdownTezAM();
+      dagAppMaster.shutdownTezAM("AM Shutdown request received from client");
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java
index 9434256..7e68675 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java
@@ -14,6 +14,8 @@
 
 package org.apache.tez.dag.app;
 
+import javax.annotation.Nullable;
+
 import com.google.common.base.Preconditions;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -21,7 +23,10 @@ import org.apache.tez.common.TezUtilsInternal;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError;
+import org.apache.tez.dag.app.launcher.ContainerLauncherManager;
 import org.apache.tez.serviceplugins.api.ContainerLauncherContext;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 import org.apache.tez.dag.app.rm.container.AMContainerEvent;
 import org.apache.tez.dag.app.rm.container.AMContainerEventCompleted;
@@ -39,15 +44,22 @@ public class ContainerLauncherContextImpl implements ContainerLauncherContext {
 
   private static final Logger LOG = LoggerFactory.getLogger(ContainerLauncherContextImpl.class);
   private final AppContext context;
+  private final ContainerLauncherManager containerLauncherManager;
   private final TaskCommunicatorManagerInterface tal;
   private final UserPayload initialUserPayload;
+  private final int containerLauncherIndex;
 
-  public ContainerLauncherContextImpl(AppContext appContext, TaskCommunicatorManagerInterface tal, UserPayload initialUserPayload) {
+  public ContainerLauncherContextImpl(AppContext appContext, ContainerLauncherManager containerLauncherManager,
+                                      TaskCommunicatorManagerInterface tal,
+                                      UserPayload initialUserPayload, int containerLauncherIndex) {
     Preconditions.checkNotNull(appContext, "AppContext cannot be null");
+    Preconditions.checkNotNull(appContext, "ContainerLauncherManager cannot be null");
     Preconditions.checkNotNull(tal, "TaskCommunicator cannot be null");
     this.context = appContext;
+    this.containerLauncherManager = containerLauncherManager;
     this.tal = tal;
     this.initialUserPayload = initialUserPayload;
+    this.containerLauncherIndex = containerLauncherIndex;
   }
 
   @Override
@@ -103,6 +115,12 @@ public class ContainerLauncherContextImpl implements ContainerLauncherContext {
     return context.getApplicationAttemptId();
   }
 
+  @Nullable
+  @Override
+  public DagInfo getCurrentDagInfo() {
+    return context.getCurrentDAG();
+  }
+
   @Override
   public Object getTaskCommunicatorMetaInfo(String taskCommName) {
     int taskCommId = context.getTaskCommunicatorIdentifier(taskCommName);
@@ -120,4 +138,11 @@ public class ContainerLauncherContextImpl implements ContainerLauncherContext {
     return null;
   }
 
+  @Override
+  public void reportError(ServicePluginError servicePluginError, String message, DagInfo dagInfo) {
+    Preconditions.checkNotNull(servicePluginError, "ServiceError must be specified");
+    containerLauncherManager.reportError(containerLauncherIndex, servicePluginError, message, dagInfo);
+  }
+
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index 579d23f..5ac3800 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -72,9 +72,11 @@ import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto;
 import org.apache.tez.dag.api.records.DAGProtos.ConfigurationProto;
 import org.apache.tez.dag.api.records.DAGProtos.TezNamedEntityDescriptorProto;
+import org.apache.tez.dag.app.dag.DAGTerminationCause;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventDagCleanup;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError;
 import org.apache.tez.dag.app.dag.event.DAGEventInternalError;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
 import org.apache.tez.dag.history.events.DAGRecoveredEvent;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.dag.records.TezTaskID;
@@ -711,8 +713,8 @@ public class DAGAppMaster extends AbstractService {
       state = DAGAppMasterState.ERROR;
       errDiagnostics = "Error in the TaskScheduler. Shutting down. ";
       addDiagnostic(errDiagnostics
-          + "Error=" + ExceptionUtils.getStackTrace(schedulingServiceErrorEvent.getThrowable()));
-      LOG.error(errDiagnostics, schedulingServiceErrorEvent.getThrowable());
+          + "Error=" + schedulingServiceErrorEvent.getDiagnosticInfo());
+      LOG.error(errDiagnostics);
       shutdownHandler.shutdown();
       break;
     case TASK_COMMUNICATOR_SERVICE_FATAL_ERROR:
@@ -724,7 +726,7 @@ public class DAGAppMaster extends AbstractService {
       Throwable error = usfe.getError();
       errDiagnostics = "Service Error: " + usfe.getDiagnosticInfo()
           + ", eventType=" + event.getType()
-          + ", exception=" + ExceptionUtils.getStackTrace(usfe.getError());
+          + ", exception=" + (usfe.getError() == null ? "None" : ExceptionUtils.getStackTrace(usfe.getError()));
       LOG.error(errDiagnostics, error);
       addDiagnostic(errDiagnostics);
 
@@ -1291,16 +1293,16 @@ public class DAGAppMaster extends AbstractService {
         + oldState + " new state: " + state);
   }
 
-  public void shutdownTezAM() throws TezException {
+  public void shutdownTezAM(String dagKillmessage) throws TezException {
     sessionStopped.set(true);
     synchronized (this) {
       this.taskSchedulerManager.setShouldUnregisterFlag();
       if (currentDAG != null
           && !currentDAG.isComplete()) {
-        //send a DAG_KILL message
+        //send a DAG_TERMINATE message
         LOG.info("Sending a kill event to the current DAG"
             + ", dagId=" + currentDAG.getID());
-        tryKillDAG(currentDAG);
+        tryKillDAG(currentDAG, dagKillmessage);
       } else {
         LOG.info("No current running DAG, shutting down the AM");
         if (isSession && !state.equals(DAGAppMasterState.ERROR)) {
@@ -1376,13 +1378,13 @@ public class DAGAppMaster extends AbstractService {
   }
 
   @SuppressWarnings("unchecked")
-  public void tryKillDAG(DAG dag) throws TezException {
+  public void tryKillDAG(DAG dag, String message) throws TezException {
     try {
       logDAGKillRequestEvent(dag.getID(), false);
     } catch (IOException e) {
       throw new TezException(e);
     }
-    dispatcher.getEventHandler().handle(new DAGEvent(dag.getID(), DAGEventType.DAG_KILL));
+    dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dag.getID(), DAGTerminationCause.DAG_KILL, message));
   }
   
   private Map<String, LocalResource> getAdditionalLocalResourceDiff(
@@ -2235,10 +2237,10 @@ public class DAGAppMaster extends AbstractService {
     if (currentTime < (lastDAGCompletionTime + sessionTimeoutInterval)) {
       return;
     }
-    LOG.info("Session timed out"
+    String message = "Session timed out"
         + ", lastDAGCompletionTime=" + lastDAGCompletionTime + " ms"
-        + ", sessionTimeoutInterval=" + sessionTimeoutInterval + " ms");
-    shutdownTezAM();
+        + ", sessionTimeoutInterval=" + sessionTimeoutInterval + " ms";
+    shutdownTezAM(message);
   }
 
   public boolean isSession() {

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
index 7f88be2..a922f38 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java
@@ -14,6 +14,7 @@
 
 package org.apache.tez.dag.app;
 
+import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import java.io.IOException;
 import java.util.Set;
@@ -28,6 +29,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.app.rm.container.AMContainer;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 import org.apache.tez.serviceplugins.api.TaskCommunicatorContext;
 import org.apache.tez.serviceplugins.api.TaskHeartbeatRequest;
@@ -143,6 +146,7 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
         this);
   }
 
+  @SuppressWarnings("deprecation")
   @Override
   public String getCurrentDagName() {
     return getDag().getName();
@@ -153,11 +157,18 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
     return context.getApplicationID().toString();
   }
 
+  @SuppressWarnings("deprecation")
   @Override
   public int getCurrentDagIdenitifer() {
     return getDag().getID().getId();
   }
 
+  @Nullable
+  @Override
+  public DagInfo getCurrentDagInfo() {
+    return getDag();
+  }
+
   @Override
   public Iterable<String> getInputVertexNames(String vertexName) {
     Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName);
@@ -203,6 +214,12 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver
   }
 
   @Override
+  public void reportError(@Nonnull ServicePluginError servicePluginError, String message, DagInfo dagInfo) {
+    Preconditions.checkNotNull(servicePluginError, "ServicePluginError must be set");
+    taskCommunicatorManager.reportError(taskCommunicatorIndex, servicePluginError, message, dagInfo);
+  }
+
+  @Override
   public void onStateUpdated(VertexStateUpdate event) {
     taskCommunicatorManager.vertexStateUpdateNotificationReceived(event, taskCommunicatorIndex);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java
index a196114..403e1a1 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java
@@ -33,6 +33,8 @@ import org.apache.commons.collections4.ListUtils;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.tez.Utils;
 import org.apache.tez.dag.api.NamedEntityDescriptor;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskCommunicator;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.api.UserPayload;
@@ -593,6 +595,30 @@ public class TaskCommunicatorManager extends AbstractService implements
     return taskCommunicators[taskCommIndex];
   }
 
+  @Override
+  public void reportError(int taskCommIndex, ServicePluginError servicePluginError,
+                          String diagnostics,
+                          DagInfo dagInfo) {
+    if (servicePluginError.getErrorType() == ServicePluginError.ErrorType.PERMANENT) {
+      String msg = "Fatal Error reported by TaskCommunicator"
+          + ", communicator=" + Utils.getTaskCommIdentifierString(taskCommIndex, context)
+          + ", servicePluginError=" + servicePluginError
+          + ", diagnostics= " + (diagnostics == null ? "" : diagnostics);
+      LOG.error(msg + ", Diagnostics=" + diagnostics);
+      sendEvent(
+          new DAGAppMasterEventUserServiceFatalError(
+              DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR,
+              msg, null));
+    } else {
+      Utils
+          .processNonFatalServiceErrorReport(
+              Utils.getTaskCommIdentifierString(taskCommIndex, context), servicePluginError,
+              diagnostics,
+              dagInfo, context,
+              "TaskCommunicator");
+    }
+  }
+
   private void pingContainerHeartbeatHandler(ContainerId containerId) {
     containerHeartbeatHandler.pinged(containerId);
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java
index e07b1a0..e0f9852 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java
@@ -20,6 +20,8 @@ package org.apache.tez.dag.app;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.tez.serviceplugins.api.ContainerEndReason;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.rm.container.AMContainerTask;
@@ -42,4 +44,6 @@ public interface TaskCommunicatorManagerInterface {
   void dagSubmitted();
 
   TaskCommunicatorWrapper getTaskCommunicator(int taskCommIndex);
+
+  void reportError(int taskCommIndex, ServicePluginError servicePluginError, String diagnostics, DagInfo dagName);
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
index a01c623..dd96ab2 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java
@@ -36,11 +36,12 @@ import org.apache.tez.dag.api.records.DAGProtos.DAGPlan;
 import org.apache.tez.common.security.ACLManager;
 import org.apache.tez.dag.records.TezDAGID;
 import org.apache.tez.dag.records.TezVertexID;
+import org.apache.tez.serviceplugins.api.DagInfo;
 
 /**
  * Main interface to interact with the job.
  */
-public interface DAG {
+public interface DAG extends DagInfo {
 
   TezDAGID getID();
   Map<String, LocalResource> getLocalResources();

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGTerminationCause.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGTerminationCause.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGTerminationCause.java
index b6be395..b73cbe6 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGTerminationCause.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGTerminationCause.java
@@ -26,6 +26,9 @@ public enum DAGTerminationCause {
 
   /** DAG was directly killed.   */
   DAG_KILL(DAGState.KILLED),
+
+  /** A service plugin indicated an error */
+  SERVICE_PLUGIN_ERROR(DAGState.FAILED),
   
   /** A vertex failed. */ 
   VERTEX_FAILURE(DAGState.FAILED),

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/VertexTerminationCause.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/VertexTerminationCause.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/VertexTerminationCause.java
index 816f85a..49be74d 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/VertexTerminationCause.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/VertexTerminationCause.java
@@ -24,7 +24,7 @@ package org.apache.tez.dag.app.dag;
 public enum VertexTerminationCause {
 
   /** DAG was killed  */
-  DAG_KILL(VertexState.KILLED),
+  DAG_TERMINATED(VertexState.KILLED),
 
   /** Other vertex failed causing DAG to fail thus killing this vertex  */
   OTHER_VERTEX_FAILURE(VertexState.KILLED),

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventSchedulingServiceError.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventSchedulingServiceError.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventSchedulingServiceError.java
index 16625df..cf49d20 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventSchedulingServiceError.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventSchedulingServiceError.java
@@ -18,17 +18,18 @@
 
 package org.apache.tez.dag.app.dag.event;
 
-public class DAGAppMasterEventSchedulingServiceError extends DAGAppMasterEvent {
+public class DAGAppMasterEventSchedulingServiceError extends DAGAppMasterEvent
+    implements DiagnosableEvent {
 
-  private final Throwable throwable;
+  private final String diagnostics;
 
-  public DAGAppMasterEventSchedulingServiceError(Throwable t) {
+  public DAGAppMasterEventSchedulingServiceError(String diagnostics) {
     super(DAGAppMasterEventType.SCHEDULING_SERVICE_ERROR);
-    this.throwable = t;
+    this.diagnostics = diagnostics;
   }
 
-  public Throwable getThrowable() {
-    return throwable;
+  @Override
+  public String getDiagnosticInfo() {
+   return diagnostics;
   }
-
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventTerminateDag.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventTerminateDag.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventTerminateDag.java
new file mode 100644
index 0000000..1286e11
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventTerminateDag.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.dag.event;
+
+import org.apache.tez.dag.app.dag.DAGTerminationCause;
+import org.apache.tez.dag.records.TezDAGID;
+
+public class DAGEventTerminateDag extends DAGEvent implements DiagnosableEvent {
+  private final String diagMessage;
+  private final DAGTerminationCause terminationCause;
+
+  public DAGEventTerminateDag(TezDAGID dagId, DAGTerminationCause terminationCause, String message) {
+    super(dagId, DAGEventType.DAG_TERMINATE);
+    this.diagMessage = message;
+    this.terminationCause = terminationCause;
+  }
+
+  @Override
+  public String getDiagnosticInfo() {
+    return diagMessage;
+  }
+
+  public DAGTerminationCause getTerminationCause() {
+    return terminationCause;
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventType.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventType.java
index ea6a3cc..bf3b30a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventType.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventType.java
@@ -23,8 +23,8 @@ package org.apache.tez.dag.app.dag.event;
  */
 public enum DAGEventType {
 
-  //Producer:Client
-  DAG_KILL,
+  //Producer: ServicePluginManagers , Client (KILL)
+  DAG_TERMINATE,
 
   //Producer:AM
   DAG_INIT,

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
index 88dfe27..a6c6c02 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java
@@ -43,7 +43,7 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.tez.common.TezUtilsInternal;
 import org.apache.tez.common.counters.LimitExceededException;
-import org.apache.tez.dag.app.dag.event.DAGEventInternalError;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
 import org.apache.tez.dag.app.dag.event.DiagnosableEvent;
 import org.apache.tez.state.OnStateChangedCallback;
 import org.apache.tez.state.StateMachineTez;
@@ -253,8 +253,8 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               EnumSet.of(DAGState.INITED, DAGState.FAILED),
               DAGEventType.DAG_INIT,
               new InitTransition())
-          .addTransition(DAGState.NEW, DAGState.KILLED,
-              DAGEventType.DAG_KILL,
+          .addTransition(DAGState.NEW, EnumSet.of(DAGState.KILLED, DAGState.FAILED),
+              DAGEventType.DAG_TERMINATE,
               new KillNewJobTransition())
           .addTransition(DAGState.NEW, DAGState.ERROR,
               DAGEventType.INTERNAL_ERROR,
@@ -269,8 +269,8 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
           .addTransition(DAGState.INITED, DAGState.RUNNING,
               DAGEventType.DAG_START,
               new StartTransition())
-          .addTransition(DAGState.INITED, DAGState.KILLED,
-              DAGEventType.DAG_KILL,
+          .addTransition(DAGState.INITED, EnumSet.of(DAGState.KILLED, DAGState.FAILED),
+              DAGEventType.DAG_TERMINATE,
               new KillInitedJobTransition())
           .addTransition(DAGState.INITED, DAGState.ERROR,
               DAGEventType.INTERNAL_ERROR,
@@ -287,7 +287,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               DAGEventType.DAG_VERTEX_RERUNNING,
               new VertexReRunningTransition())
           .addTransition(DAGState.RUNNING, DAGState.TERMINATING,
-              DAGEventType.DAG_KILL, new DAGKilledTransition())
+              DAGEventType.DAG_TERMINATE, new DAGKilledTransition())
           .addTransition(DAGState.RUNNING, DAGState.RUNNING,
               DAGEventType.DAG_DIAGNOSTIC_UPDATE,
               DIAGNOSTIC_UPDATE_TRANSITION)
@@ -311,7 +311,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               DAGEventType.DAG_COMMIT_COMPLETED,
               COMMIT_COMPLETED_TRANSITION)
           .addTransition(DAGState.COMMITTING, DAGState.TERMINATING, 
-              DAGEventType.DAG_KILL,
+              DAGEventType.DAG_TERMINATE,
               new DAGKilledWhileCommittingTransition())
           .addTransition(
               DAGState.COMMITTING,
@@ -354,7 +354,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
 
               // Ignore-able events
           .addTransition(DAGState.TERMINATING, DAGState.TERMINATING,
-              EnumSet.of(DAGEventType.DAG_KILL,
+              EnumSet.of(DAGEventType.DAG_TERMINATE,
                          DAGEventType.DAG_VERTEX_RERUNNING,
                          DAGEventType.DAG_SCHEDULER_UPDATE))
 
@@ -370,7 +370,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               INTERNAL_ERROR_TRANSITION)
           // Ignore-able events
           .addTransition(DAGState.SUCCEEDED, DAGState.SUCCEEDED,
-              EnumSet.of(DAGEventType.DAG_KILL,
+              EnumSet.of(DAGEventType.DAG_TERMINATE,
                   DAGEventType.DAG_SCHEDULER_UPDATE,
                   DAGEventType.DAG_VERTEX_COMPLETED))
 
@@ -386,7 +386,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               INTERNAL_ERROR_TRANSITION)
           // Ignore-able events
           .addTransition(DAGState.FAILED, DAGState.FAILED,
-              EnumSet.of(DAGEventType.DAG_KILL,
+              EnumSet.of(DAGEventType.DAG_TERMINATE,
                   DAGEventType.DAG_START,
                   DAGEventType.DAG_VERTEX_RERUNNING,
                   DAGEventType.DAG_SCHEDULER_UPDATE,
@@ -404,7 +404,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               INTERNAL_ERROR_TRANSITION)
           // Ignore-able events
           .addTransition(DAGState.KILLED, DAGState.KILLED,
-              EnumSet.of(DAGEventType.DAG_KILL,
+              EnumSet.of(DAGEventType.DAG_TERMINATE,
                   DAGEventType.DAG_START,
                   DAGEventType.DAG_VERTEX_RERUNNING,
                   DAGEventType.DAG_SCHEDULER_UPDATE,
@@ -415,7 +415,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
               DAGState.ERROR,
               DAGState.ERROR,
               EnumSet.of(
-                  DAGEventType.DAG_KILL,
+                  DAGEventType.DAG_TERMINATE,
                   DAGEventType.DAG_INIT,
                   DAGEventType.DAG_START,
                   DAGEventType.DAG_VERTEX_COMPLETED,
@@ -1424,6 +1424,11 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
   }
 
   @Override
+  public int getIndex() {
+    return dagId.getId();
+  }
+
+  @Override
   public String getName() {
     return dagName;
   }
@@ -1836,28 +1841,41 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     }
   }
 
+  private void addDiagnostics(DiagnosableEvent event) {
+    if (event.getDiagnosticInfo() != null && !event.getDiagnosticInfo().isEmpty()) {
+      addDiagnostic(event.getDiagnosticInfo());
+    }
+  }
+
   // Task-start has been moved out of InitTransition, so this arc simply
   // hardcodes 0 for both map and reduce finished tasks.
-  private static class KillNewJobTransition
-      implements SingleArcTransition<DAGImpl, DAGEvent> {
+  private static class KillNewJobTransition implements
+      MultipleArcTransition<DAGImpl, DAGEvent, DAGState> {
 
     @Override
-    public void transition(DAGImpl dag, DAGEvent dagEvent) {
+    public DAGState transition(DAGImpl dag, DAGEvent dagEvent) {
+      DAGEventTerminateDag event = (DAGEventTerminateDag) dagEvent;
       dag.setFinishTime();
-      dag.trySetTerminationCause(DAGTerminationCause.DAG_KILL);
-      dag.finished(DAGState.KILLED);
+      dag.trySetTerminationCause(event.getTerminationCause());
+      dag.addDiagnostic("Dag received [" + event.getType() + ", " + event.getTerminationCause() +
+          "] in NEW state.");
+      dag.addDiagnostics(event);
+      return dag.finished(event.getTerminationCause().getFinishedState());
     }
 
   }
 
-  private static class KillInitedJobTransition
-      implements SingleArcTransition<DAGImpl, DAGEvent> {
+  private static class KillInitedJobTransition implements
+      MultipleArcTransition<DAGImpl, DAGEvent, DAGState> {
 
     @Override
-    public void transition(DAGImpl dag, DAGEvent dagEvent) {
-      dag.trySetTerminationCause(DAGTerminationCause.DAG_KILL);
-      dag.addDiagnostic("Job received Kill in INITED state.");
-      dag.finished(DAGState.KILLED);
+    public DAGState transition(DAGImpl dag, DAGEvent dagEvent) {
+      DAGEventTerminateDag event = (DAGEventTerminateDag) dagEvent;
+      dag.trySetTerminationCause(event.getTerminationCause());
+      dag.addDiagnostic("Dag received [" + event.getType() + ", " + event.getTerminationCause() +
+          "] in INITED state.");
+      dag.addDiagnostics(event);
+      return dag.finished(event.getTerminationCause().getFinishedState());
     }
 
   }
@@ -1865,11 +1883,14 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
   private static class DAGKilledTransition
       implements SingleArcTransition<DAGImpl, DAGEvent> {
     @Override
-    public void transition(DAGImpl job, DAGEvent event) {
-      String msg = "Job received Kill while in RUNNING state.";
+    public void transition(DAGImpl job, DAGEvent dagEvent) {
+      DAGEventTerminateDag event = (DAGEventTerminateDag) dagEvent;
+      String msg = "Dag received [" + event.getType() + ", " + event.getTerminationCause() +
+          "] in RUNNING state.";
       LOG.info(msg);
       job.addDiagnostic(msg);
-      job.enactKill(DAGTerminationCause.DAG_KILL, VertexTerminationCause.DAG_KILL);
+      job.addDiagnostics(event);
+      job.enactKill(event.getTerminationCause(), VertexTerminationCause.DAG_TERMINATED);
       // Commit may happen when dag is still in RUNNING (vertex group commit)
       job.cancelCommits();
       // TODO Metrics
@@ -1883,12 +1904,15 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG,
     implements SingleArcTransition<DAGImpl, DAGEvent> {
 
     @Override
-    public void transition(DAGImpl dag, DAGEvent event) {
-      String diag = "DAG received Kill while in COMMITTING state.";
+    public void transition(DAGImpl dag, DAGEvent dagEvent) {
+      DAGEventTerminateDag event = (DAGEventTerminateDag) dagEvent;
+      String diag = "Dag received [" + event.getType() + ", " + event.getTerminationCause() +
+          "] in COMMITTING state.";
       LOG.info(diag);
       dag.addDiagnostic(diag);
+      dag.addDiagnostics(event);
       dag.cancelCommits();
-      dag.trySetTerminationCause(DAGTerminationCause.DAG_KILL);
+      dag.trySetTerminationCause(event.getTerminationCause());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
index 065974e..c8f217b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java
@@ -3206,7 +3206,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl
       VertexEventTermination vet = (VertexEventTermination) event;
       VertexTerminationCause trigger = vet.getTerminationCause();
       switch(trigger){
-        case DAG_KILL : vertex.tryEnactKill(trigger, TaskTerminationCause.DAG_KILL); break;
+        case DAG_TERMINATED: vertex.tryEnactKill(trigger, TaskTerminationCause.DAG_KILL); break;
         case OWN_TASK_FAILURE: vertex.tryEnactKill(trigger, TaskTerminationCause.OTHER_TASK_FAILURE); break;
         case ROOT_INPUT_INIT_FAILURE:
         case COMMIT_FAILURE:

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java
index 98237c1..250afd8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java
@@ -44,6 +44,8 @@ import org.apache.tez.dag.app.TaskCommunicatorManagerInterface;
 import org.apache.tez.dag.app.dag.DAG;
 import org.apache.tez.dag.app.rm.ContainerLauncherEvent;
 import org.apache.tez.dag.app.rm.ContainerLauncherLaunchRequestEvent;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -58,16 +60,9 @@ public class ContainerLauncherManager extends AbstractService
   final ContainerLauncherContext containerLauncherContexts[];
   protected final ServicePluginLifecycleAbstractService[] containerLauncherServiceWrappers;
   private final AppContext appContext;
+  private final boolean isIncompleteCtor;
+
 
-  @VisibleForTesting
-  public ContainerLauncherManager(ContainerLauncher containerLauncher, AppContext context) {
-    super(ContainerLauncherManager.class.getName());
-    this.appContext = context;
-    containerLaunchers = new ContainerLauncherWrapper[] {new ContainerLauncherWrapper(containerLauncher)};
-    containerLauncherContexts = new ContainerLauncherContext[] {containerLauncher.getContext()};
-    containerLauncherServiceWrappers = new ServicePluginLifecycleAbstractService[]{
-        new ServicePluginLifecycleAbstractService<>(containerLauncher)};
-  }
 
   // Accepting conf to setup final parameters, if required.
   public ContainerLauncherManager(AppContext context,
@@ -77,6 +72,7 @@ public class ContainerLauncherManager extends AbstractService
                                   boolean isPureLocalMode) throws TezException {
     super(ContainerLauncherManager.class.getName());
 
+    this.isIncompleteCtor = false;
     this.appContext = context;
     Preconditions.checkArgument(
         containerLauncherDescriptors != null && !containerLauncherDescriptors.isEmpty(),
@@ -89,7 +85,7 @@ public class ContainerLauncherManager extends AbstractService
     for (int i = 0; i < containerLauncherDescriptors.size(); i++) {
       UserPayload userPayload = containerLauncherDescriptors.get(i).getUserPayload();
       ContainerLauncherContext containerLauncherContext =
-          new ContainerLauncherContextImpl(context, taskCommunicatorManagerInterface, userPayload);
+          new ContainerLauncherContextImpl(context, this, taskCommunicatorManagerInterface, userPayload, i);
       containerLauncherContexts[i] = containerLauncherContext;
       containerLaunchers[i] = new ContainerLauncherWrapper(createContainerLauncher(containerLauncherDescriptors.get(i), context,
           containerLauncherContext, taskCommunicatorManagerInterface, workingDirectory, i, isPureLocalMode));
@@ -98,6 +94,25 @@ public class ContainerLauncherManager extends AbstractService
   }
 
   @VisibleForTesting
+  public ContainerLauncherManager(AppContext context) {
+    super(ContainerLauncherManager.class.getName());
+    this.isIncompleteCtor = true;
+    this.appContext = context;
+    containerLaunchers = new ContainerLauncherWrapper[1];
+    containerLauncherContexts = new ContainerLauncherContext[1];
+    containerLauncherServiceWrappers = new ServicePluginLifecycleAbstractService[1];
+  }
+
+  // To be used with the constructor which accepts the AppContext only, and is for testing.
+  @VisibleForTesting
+  public void setContainerLauncher(ContainerLauncher containerLauncher) {
+    Preconditions.checkState(isIncompleteCtor == true, "Can only be used with the Test constructor");
+    containerLaunchers[0] = new ContainerLauncherWrapper(containerLauncher);
+    containerLauncherContexts[0] = containerLauncher.getContext();
+    containerLauncherServiceWrappers[0] = new ServicePluginLifecycleAbstractService<>(containerLauncher);
+  }
+
+  @VisibleForTesting
   ContainerLauncher createContainerLauncher(
       NamedEntityDescriptor containerLauncherDescriptor,
       AppContext context,
@@ -236,6 +251,30 @@ public class ContainerLauncherManager extends AbstractService
     }
   }
 
+  public void reportError(int containerLauncherIndex, ServicePluginError servicePluginError,
+                          String diagnostics,
+                          DagInfo dagInfo) {
+    if (servicePluginError.getErrorType() == ServicePluginError.ErrorType.PERMANENT) {
+      String msg = "Fatal Error reported by ContainerLauncher"
+          + ", containerLauncher=" +
+          Utils.getContainerLauncherIdentifierString(containerLauncherIndex, appContext)
+          + ", servicePluginError=" + servicePluginError
+          + ", diagnostics= " + (diagnostics == null ? "" : diagnostics);
+      LOG.error(msg);
+      sendEvent(
+          new DAGAppMasterEventUserServiceFatalError(
+              DAGAppMasterEventType.CONTAINER_LAUNCHER_SERVICE_FATAL_ERROR,
+              msg, null));
+    } else {
+      Utils
+          .processNonFatalServiceErrorReport(
+              Utils.getContainerLauncherIdentifierString(containerLauncherIndex, appContext),
+              servicePluginError,
+              diagnostics, dagInfo,
+              appContext, "ContainerLauncher");
+    }
+  }
+
   @SuppressWarnings("unchecked")
   private void sendEvent(Event<?> event) {
     appContext.getEventHandler().handle(event);

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java
index 37aa96b..fb4198b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java
@@ -14,10 +14,12 @@
 
 package org.apache.tez.dag.app.rm;
 
+import javax.annotation.Nullable;
 import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Map;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -29,6 +31,8 @@ import org.apache.tez.common.ContainerSignatureMatcher;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.app.AppContext;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
 
 public class TaskSchedulerContextImpl implements TaskSchedulerContext {
@@ -94,11 +98,6 @@ public class TaskSchedulerContextImpl implements TaskSchedulerContext {
   }
 
   @Override
-  public void onError(Throwable t) {
-    taskSchedulerManager.onError(schedulerId, t);
-  }
-
-  @Override
   public float getProgress() {
     return taskSchedulerManager.getProgress(schedulerId);
   }
@@ -139,6 +138,12 @@ public class TaskSchedulerContextImpl implements TaskSchedulerContext {
     return appContext.getApplicationAttemptId();
   }
 
+  @Nullable
+  @Override
+  public DagInfo getCurrentDagInfo() {
+    return appContext.getCurrentDAG();
+  }
+
   @Override
   public String getAppHostName() {
     return appHostName;
@@ -175,4 +180,11 @@ public class TaskSchedulerContextImpl implements TaskSchedulerContext {
         throw new TezUncheckedException("Unexpected state " + appContext.getAMState());
     }
   }
+
+  @Override
+  public void reportError(ServicePluginError servicePluginError, String diagnostics,
+                          DagInfo dagInfo) {
+    Preconditions.checkNotNull(servicePluginError, "ServicePluginError must be specified");
+    taskSchedulerManager.reportError(schedulerId, servicePluginError, diagnostics, dagInfo);
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java
index 9e4c8e0..7e1988b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java
@@ -18,6 +18,8 @@
 
 package org.apache.tez.dag.app.rm;
 
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Map;
@@ -37,6 +39,8 @@ import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.tez.common.ContainerSignatureMatcher;
 import org.apache.tez.dag.api.TezUncheckedException;
 import org.apache.tez.dag.api.UserPayload;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
 
 /**
@@ -97,8 +101,9 @@ class TaskSchedulerContextImplWrapper implements TaskSchedulerContext {
   }
 
   @Override
-  public void onError(Throwable t) {
-    executorService.submit(new OnErrorCallable(real, t));
+  public void reportError(@Nonnull ServicePluginError servicePluginError, String message,
+                          DagInfo dagInfo) {
+    executorService.submit(new ReportErrorCallable(real, servicePluginError, message, dagInfo));
   }
 
   @Override
@@ -156,6 +161,12 @@ class TaskSchedulerContextImplWrapper implements TaskSchedulerContext {
     return real.getApplicationAttemptId();
   }
 
+  @Nullable
+  @Override
+  public DagInfo getCurrentDagInfo() {
+    return real.getCurrentDagInfo();
+  }
+
   @Override
   public String getAppHostName() {
     return real.getAppHostName();
@@ -175,6 +186,7 @@ class TaskSchedulerContextImplWrapper implements TaskSchedulerContext {
   public AMState getAMState() {
     return real.getAMState();
   }
+
   // End of getters which do not need to go through a thread. Underlying implementation
   // does not use locks.
 
@@ -301,19 +313,24 @@ class TaskSchedulerContextImplWrapper implements TaskSchedulerContext {
     }
   }
 
-  static class OnErrorCallable extends TaskSchedulerContextCallbackBase implements
-      Callable<Void> {
+  static class ReportErrorCallable extends TaskSchedulerContextCallbackBase implements Callable<Void> {
 
-    private final Throwable throwable;
+    private final ServicePluginError servicePluginError;
+    private final String message;
+    private final DagInfo dagInfo;
 
-    public OnErrorCallable(TaskSchedulerContext app, Throwable throwable) {
+    public ReportErrorCallable(TaskSchedulerContext app,
+                               ServicePluginError servicePluginError, String message,
+                               DagInfo dagInfo) {
       super(app);
-      this.throwable = throwable;
+      this.servicePluginError = servicePluginError;
+      this.message = message;
+      this.dagInfo = dagInfo;
     }
 
     @Override
     public Void call() throws Exception {
-      app.onError(throwable);
+      app.reportError(servicePluginError, message, dagInfo);
       return null;
     }
   }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java
index fa9fb81..5317440 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java
@@ -38,6 +38,8 @@ import org.apache.tez.dag.api.NamedEntityDescriptor;
 import org.apache.tez.dag.api.TezConstants;
 import org.apache.tez.dag.app.ServicePluginLifecycleAbstractService;
 import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError;
+import org.apache.tez.serviceplugins.api.DagInfo;
+import org.apache.tez.serviceplugins.api.ServicePluginError;
 import org.apache.tez.serviceplugins.api.TaskScheduler;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext.AppFinalStatus;
@@ -844,9 +846,36 @@ public class TaskSchedulerManager extends AbstractService implements
     return dagAppMaster.getProgress();
   }
 
-  public void onError(int schedulerId, Throwable t) {
-    LOG.info("Error reported by scheduler {} - {}", schedulerId, t);
-    sendEvent(new DAGAppMasterEventSchedulingServiceError(t));
+  public void reportError(int taskSchedulerIndex, ServicePluginError servicePluginError,
+                          String diagnostics,
+                          DagInfo dagInfo) {
+    if (servicePluginError == YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR) {
+      LOG.info("Error reported by scheduler {} - {}",
+          Utils.getTaskSchedulerIdentifierString(taskSchedulerIndex, appContext) + ": " +
+              diagnostics);
+      if (taskSchedulerDescriptors[taskSchedulerIndex].getClassName()
+          .equals(YarnTaskSchedulerService.class.getName())) {
+        LOG.warn(
+            "Reporting a SchedulerServiceError to the DAGAppMaster since the error" +
+                " was reported by the default YARN Task Scheduler");
+        sendEvent(new DAGAppMasterEventSchedulingServiceError(diagnostics));
+      }
+    } else if (servicePluginError.getErrorType() == ServicePluginError.ErrorType.PERMANENT) {
+      String msg = "Fatal error reported by TaskScheduler"
+          + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(taskSchedulerIndex, appContext)
+          + ", servicePluginError=" + servicePluginError
+          + ", diagnostics= " + (diagnostics == null ? "" : diagnostics);
+      LOG.error(msg);
+      sendEvent(
+          new DAGAppMasterEventUserServiceFatalError(
+              DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR,
+              msg, null));
+    } else {
+      Utils.processNonFatalServiceErrorReport(
+          Utils.getTaskSchedulerIdentifierString(taskSchedulerIndex, appContext),
+          servicePluginError, diagnostics, dagInfo,
+          appContext, "TaskScheduler");
+    }
   }
 
   public void dagCompleted() {
@@ -964,5 +993,4 @@ public class TaskSchedulerManager extends AbstractService implements
 
     return historyUrl;
   }
-
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
index 1f05064..c1c363b 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java
@@ -35,7 +35,7 @@ import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.tez.common.TezUtils;
 import org.apache.tez.serviceplugins.api.TaskScheduler;
 import org.apache.tez.serviceplugins.api.TaskSchedulerContext;
@@ -916,7 +916,9 @@ public class YarnTaskSchedulerService extends TaskScheduler
       LOG.error("Got TaskSchedulerError, " + ExceptionUtils.getStackTrace(t));
       return;
     }
-    getContext().onError(t);
+    LOG.error("Got Error from RMClient", t);
+    getContext().reportError(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR, StringUtils.stringifyException(t),
+        null);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerServiceError.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerServiceError.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerServiceError.java
new file mode 100644
index 0000000..e8017dd
--- /dev/null
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerServiceError.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.dag.app.rm;
+
+import org.apache.tez.serviceplugins.api.ServicePluginError;
+
+public enum YarnTaskSchedulerServiceError implements ServicePluginError {
+
+  RESOURCEMANAGER_ERROR;
+
+  @Override
+  public Enum getEnum() {
+    return this;
+  }
+
+  @Override
+  public ErrorType getErrorType() {
+    return ErrorType.PERMANENT;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java
index c55bdbd..c551b09 100644
--- a/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java
+++ b/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java
@@ -36,7 +36,6 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.tez.dag.api.TezException;
-import org.apache.tez.dag.api.UserPayload;
 import org.apache.tez.dag.api.event.VertexState;
 import org.apache.tez.dag.records.TezTaskAttemptID;
 
@@ -44,7 +43,7 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
 // Do not make calls into this from within a held lock.
 
 // TODO TEZ-2003 (post) TEZ-2665. Move to the tez-api module
-public interface TaskCommunicatorContext {
+public interface TaskCommunicatorContext extends ServicePluginContextBase {
 
   // TODO TEZ-2003 (post) TEZ-2666 Enhancements to API
   // - Consolidate usage of IDs
@@ -57,12 +56,6 @@ public interface TaskCommunicatorContext {
   // - Maybe add book-keeping as a helper library, instead of each impl tracking container to task etc.
   // - Handling of containres / tasks which no longer exist in the system (formalized interface instead of a shouldDie notification)
 
-  /**
-   * Get the UserPayload that was configured while setting up the task communicator
-   *
-   * @return the initially configured user payload
-   */
-  UserPayload getInitialUserPayload();
 
   /**
    * Get the application attempt id for the running application. Relevant when running under YARN
@@ -170,11 +163,14 @@ public interface TaskCommunicatorContext {
    */
   void registerForVertexStateUpdates(String vertexName, @Nullable Set<VertexState> stateSet);
 
+  // TODO TEZ-3120 Remove deprecated methods
   /**
    * Get the name of the currently executing dag
    *
    * @return the name of the currently executing dag
+   * @deprecated replaced by {@link TaskCommunicatorContext#getCurrentDagInfo}
    */
+  @Deprecated
   String getCurrentDagName();
 
   /**
@@ -183,10 +179,13 @@ public interface TaskCommunicatorContext {
    */
   String getCurrentAppIdentifier();
 
+  // TODO TEZ-3120 Remove deprecated methods
   /**
    * Get the identifier for the currently executing dag.
    * @return a numerical identifier for the currently running DAG. This is unique within the currently running application.
+   * @deprecated replaced by {@link TaskCommunicatorContext#getCurrentDagInfo}
    */
+  @Deprecated
   int getCurrentDagIdenitifer();
 
   /**
@@ -237,4 +236,5 @@ public interface TaskCommunicatorContext {
    * @return time when the current dag started executing
    */
   long getDagStartTime();
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java
index 80414ba..23a5191 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java
@@ -112,7 +112,7 @@ public class TestDAGClientHandler {
     }
     dagClientHandler.tryKillDAG("dag_9999_0001_1");
     ArgumentCaptor<DAG> eventCaptor = ArgumentCaptor.forClass(DAG.class);
-    verify(mockDagAM, times(1)).tryKillDAG(eventCaptor.capture());
+    verify(mockDagAM, times(1)).tryKillDAG(eventCaptor.capture(), eq("Kill Dag request received from client"));
     assertEquals(1, eventCaptor.getAllValues().size());
     assertTrue(eventCaptor.getAllValues().get(0) instanceof DAG);
     assertEquals("dag_9999_0001_1",  ((DAG)eventCaptor.getAllValues().get(0)).getID().toString());
@@ -125,7 +125,7 @@ public class TestDAGClientHandler {
     
     // shutdown
     dagClientHandler.shutdownAM();
-    verify(mockDagAM).shutdownTezAM();
+    verify(mockDagAM).shutdownTezAM(eq("AM Shutdown request received from client"));
   }
   
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
index 08f81fb..b021a36 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java
@@ -520,10 +520,12 @@ public class MockDAGAppMaster extends DAGAppMaster {
     } catch (IOException e) {
       throw new TezUncheckedException(e);
     }
+    ContainerLauncherManager clManager = new ContainerLauncherManager(getContext());
     ContainerLauncherContext containerLauncherContext =
-        new ContainerLauncherContextImpl(getContext(), getTaskCommunicatorManager(), userPayload);
+        new ContainerLauncherContextImpl(getContext(), clManager, getTaskCommunicatorManager(), userPayload, 0);
     containerLauncher = new MockContainerLauncher(launcherGoFlag, containerLauncherContext);
-    return new ContainerLauncherManager(containerLauncher, getContext());
+    clManager.setContainerLauncher(containerLauncher);
+    return clManager;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
index d5ee67d..74ac51e 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java
@@ -854,7 +854,8 @@ public class TestMockDAGAppMaster {
 
     tezClient.submitDAG(dag);
     mockLauncher.waitTillContainersLaunched();
-    mockApp.handle(new DAGAppMasterEventSchedulingServiceError(new RuntimeException("Mock error")));
+    mockApp.handle(new DAGAppMasterEventSchedulingServiceError(
+        org.apache.hadoop.util.StringUtils.stringifyException(new RuntimeException("Mock error"))));
 
     while(!mockApp.getShutdownHandler().wasShutdownInvoked()) {
       Thread.sleep(100);

http://git-wip-us.apache.org/repos/asf/tez/blob/a812c346/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java
index 5323928..c7f97d3 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java
@@ -23,11 +23,13 @@ import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.reset;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 import java.lang.reflect.Method;
 import java.net.InetSocketAddress;
@@ -42,6 +44,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -49,6 +52,11 @@ import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.tez.common.TezUtils;
 import org.apache.tez.dag.api.NamedEntityDescriptor;
+import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag;
+import org.apache.tez.dag.helpers.DagInfoImplForTest;
+import org.apache.tez.dag.records.TezDAGID;
+import org.apache.tez.serviceplugins.api.ServicePluginErrorDefaults;
+import org.apache.tez.serviceplugins.api.ServicePluginException;
 import org.apache.tez.serviceplugins.api.TaskCommunicator;
 import org.apache.tez.serviceplugins.api.TaskCommunicatorContext;
 import org.apache.tez.dag.api.TezConstants;
@@ -62,6 +70,7 @@ import org.apache.tez.dag.records.TezTaskAttemptID;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.serviceplugins.api.ContainerEndReason;
 import org.apache.tez.serviceplugins.api.TaskAttemptEndReason;
+import org.apache.tez.serviceplugins.api.TaskCommunicatorDescriptor;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -73,7 +82,7 @@ public class TestTaskCommunicatorManager {
 
   @Before
   @After
-  public void reset() {
+  public void resetForNextTest() {
     TaskCommManagerForMultipleCommTest.reset();
   }
 
@@ -233,6 +242,71 @@ public class TestTaskCommunicatorManager {
 
   @SuppressWarnings("unchecked")
   @Test(timeout = 5000)
+  public void testReportFailureFromTaskCommunicator() throws TezException {
+    String dagName = DAG_NAME;
+    EventHandler eventHandler = mock(EventHandler.class);
+    AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS);
+    doReturn("testTaskCommunicator").when(appContext).getTaskCommunicatorName(0);
+    doReturn(eventHandler).when(appContext).getEventHandler();
+
+    DAG dag = mock(DAG.class);
+    TezDAGID dagId = TezDAGID.getInstance(ApplicationId.newInstance(1, 0), DAG_INDEX);
+    doReturn(dagName).when(dag).getName();
+    doReturn(dagId).when(dag).getID();
+    doReturn(dag).when(appContext).getCurrentDAG();
+
+    NamedEntityDescriptor<TaskCommunicatorDescriptor> namedEntityDescriptor =
+        new NamedEntityDescriptor<>("testTaskCommunicator", TaskCommForFailureTest.class.getName());
+    List<NamedEntityDescriptor> list = new LinkedList<>();
+    list.add(namedEntityDescriptor);
+
+
+    TaskCommunicatorManager taskCommManager =
+        new TaskCommunicatorManager(appContext, mock(TaskHeartbeatHandler.class),
+            mock(ContainerHeartbeatHandler.class), list);
+    try {
+      taskCommManager.init(new Configuration());
+      taskCommManager.start();
+
+      taskCommManager.registerRunningContainer(mock(ContainerId.class), 0);
+      ArgumentCaptor<Event> argumentCaptor = ArgumentCaptor.forClass(Event.class);
+      verify(eventHandler, times(1)).handle(argumentCaptor.capture());
+
+      Event rawEvent = argumentCaptor.getValue();
+      assertTrue(rawEvent instanceof DAGEventTerminateDag);
+      DAGEventTerminateDag killEvent = (DAGEventTerminateDag) rawEvent;
+      assertTrue(killEvent.getDiagnosticInfo().contains("ReportError"));
+      assertTrue(killEvent.getDiagnosticInfo()
+          .contains(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE.name()));
+      assertTrue(killEvent.getDiagnosticInfo().contains("[0:testTaskCommunicator]"));
+
+
+      reset(eventHandler);
+
+      taskCommManager.dagComplete(dag);
+
+      argumentCaptor = ArgumentCaptor.forClass(Event.class);
+
+      verify(eventHandler, times(1)).handle(argumentCaptor.capture());
+      rawEvent = argumentCaptor.getValue();
+
+      assertTrue(rawEvent instanceof DAGAppMasterEventUserServiceFatalError);
+      DAGAppMasterEventUserServiceFatalError event =
+          (DAGAppMasterEventUserServiceFatalError) rawEvent;
+      assertEquals(DAGAppMasterEventType.TASK_COMMUNICATOR_SERVICE_FATAL_ERROR, event.getType());
+      assertTrue(event.getDiagnosticInfo().contains("ReportedFatalError"));
+      assertTrue(
+          event.getDiagnosticInfo().contains(ServicePluginErrorDefaults.INCONSISTENT_STATE.name()));
+      assertTrue(event.getDiagnosticInfo().contains("[0:testTaskCommunicator]"));
+
+    } finally {
+      taskCommManager.stop();
+    }
+
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 5000)
   public void testTaskCommunicatorUserError() {
     TaskCommunicatorContextImpl taskCommContext = mock(TaskCommunicatorContextImpl.class);
     TaskCommunicator taskCommunicator = mock(TaskCommunicator.class, new ExceptionAnswer());
@@ -313,7 +387,6 @@ public class TestTaskCommunicatorManager {
     }
   }
 
-
   static class TaskCommManagerForMultipleCommTest extends TaskCommunicatorManager {
 
     // All variables setup as static since methods being overridden are invoked by the ContainerLauncherRouter ctor,
@@ -460,4 +533,63 @@ public class TestTaskCommunicatorManager {
       return null;
     }
   }
+
+  private static final String DAG_NAME = "dagName";
+  private static final int DAG_INDEX = 1;
+  public static class TaskCommForFailureTest extends TaskCommunicator {
+
+    public TaskCommForFailureTest(
+        TaskCommunicatorContext taskCommunicatorContext) {
+      super(taskCommunicatorContext);
+    }
+
+    @Override
+    public void registerRunningContainer(ContainerId containerId, String hostname, int port) throws
+        ServicePluginException {
+      getContext()
+          .reportError(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE, "ReportError", new DagInfoImplForTest(DAG_INDEX, DAG_NAME));
+    }
+
+    @Override
+    public void registerContainerEnd(ContainerId containerId, ContainerEndReason endReason,
+                                     @Nullable String diagnostics) throws ServicePluginException {
+
+    }
+
+    @Override
+    public void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpec,
+                                           Map<String, LocalResource> additionalResources,
+                                           Credentials credentials, boolean credentialsChanged,
+                                           int priority) throws ServicePluginException {
+
+    }
+
+    @Override
+    public void unregisterRunningTaskAttempt(TezTaskAttemptID taskAttemptID,
+                                             TaskAttemptEndReason endReason,
+                                             @Nullable String diagnostics) throws
+        ServicePluginException {
+
+    }
+
+    @Override
+    public InetSocketAddress getAddress() throws ServicePluginException {
+      return null;
+    }
+
+    @Override
+    public void onVertexStateUpdated(VertexStateUpdate stateUpdate) throws ServicePluginException {
+
+    }
+
+    @Override
+    public void dagComplete(int dagIdentifier) throws ServicePluginException {
+      getContext().reportError(ServicePluginErrorDefaults.INCONSISTENT_STATE, "ReportedFatalError", null);
+    }
+
+    @Override
+    public Object getMetaInfo() throws ServicePluginException {
+      return null;
+    }
+  }
 }


[08/24] tez git commit: TEZ-3081. Update tez website for trademarks feedback. (hitesh)

Posted by sr...@apache.org.
TEZ-3081. Update tez website for trademarks feedback. (hitesh)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/870972d2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/870972d2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/870972d2

Branch: refs/heads/TEZ-2980
Commit: 870972d2db66e8c89a8a61c4a7930a82e36b59e3
Parents: 3ff360a
Author: Hitesh Shah <hi...@apache.org>
Authored: Fri Jan 29 14:55:06 2016 -0800
Committer: Hitesh Shah <hi...@apache.org>
Committed: Fri Jan 29 14:55:06 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 docs/src/site/markdown/index.md                 |  7 +++---
 .../site/markdown/releases/apache-tez-0-5-0.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-5-1.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-5-2.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-5-3.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-5-4.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-6-0.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-6-1.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-6-2.md  |  6 ++---
 .../site/markdown/releases/apache-tez-0-7-0.md  |  6 ++---
 .../markdown/releases/apache-tez-0-8-0-alpha.md |  6 ++---
 .../markdown/releases/apache-tez-0-8-1-alpha.md |  6 ++---
 .../site/markdown/releases/apache-tez-0-8-2.md  |  6 ++---
 docs/src/site/markdown/releases/index.md        | 26 ++++++++++----------
 docs/src/site/site.xml                          |  4 +--
 16 files changed, 56 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 6570f8b..a550015 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3081. Update tez website for trademarks feedback.
   TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
   TEZ-3079. Fix tez-tfile parser documentation.
   TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/index.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/index.md b/docs/src/site/markdown/index.md
index 337d21c..d0f307a 100644
--- a/docs/src/site/markdown/index.md
+++ b/docs/src/site/markdown/index.md
@@ -15,15 +15,15 @@
    limitations under the License.
 -->
 
-<head><title>Welcome to Apache Tez</title></head>
+<head><title>Welcome to Apache Tez&trade;</title></head>
 
 Introduction
 ------------
 
-The Apache Tez project is aimed at building an application framework
+The Apache Tez&trade; project is aimed at building an application framework
 which allows for a complex directed-acyclic-graph of tasks for processing
 data. It is currently built atop
-[Apache Hadoop YARN](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html)
+[Apache Hadoop YARN](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html).
 
 The 2 main design themes for Tez are:
 
@@ -45,3 +45,4 @@ multiple MR jobs, now in a single Tez job as shown below.
 ![Flow for a Hive or Pig Query on MapReduce](./images/PigHiveQueryOnMR.png)
 ![Flow for a Hive or Pig Query on Tez](./images/PigHiveQueryOnTez.png)
 
+To download the Apache Tez software, go to the [Releases](./releases/index.html) page.

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-5-0.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-0.md b/docs/src/site/markdown/releases/apache-tez-0-5-0.md
index 2505845..f206ea6 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-5-0.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-5-0.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.5.0</title></head>
+<head><title>Apache Tez&trade; 0.5.0</title></head>
 
-Apache Tez 0.5.0
+Apache Tez&trade; 0.5.0
 ----------------
 
-- [Release Artifacts](http://archive.apache.org/dist/tez/0.5.0/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.0/)
 - [Release Notes](0.5.0/release-notes.txt)
 - Documentation
     - [API Javadocs](0.5.0/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-5-1.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-1.md b/docs/src/site/markdown/releases/apache-tez-0-5-1.md
index 2d19fee..01417ba 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-5-1.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-5-1.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.5.1</title></head>
+<head><title>Apache Tez&trade; 0.5.1</title></head>
 
-Apache Tez 0.5.1
+Apache Tez&trade; 0.5.1
 ----------------
 
-- [Release Artifacts](http://archive.apache.org/dist/tez/0.5.1/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.1/)
 - [Release Notes](0.5.1/release-notes.txt)
 - Documentation
     - [API Javadocs](0.5.1/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-5-2.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-2.md b/docs/src/site/markdown/releases/apache-tez-0-5-2.md
index 9b15ce8..99ca44b 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-5-2.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-5-2.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.5.2</title></head>
+<head><title>Apache Tez&trade; 0.5.2</title></head>
 
-Apache Tez 0.5.2
+Apache Tez&trade; 0.5.2
 ----------------
 
-- [Release Artifacts](http://archive.apache.org/dist/tez/0.5.2/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.2/)
 - [Release Notes](0.5.2/release-notes.txt)
 - Documentation (See 0.5.3 documentation)
 

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-5-3.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-3.md b/docs/src/site/markdown/releases/apache-tez-0-5-3.md
index 6e97f13..7e00fe9 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-5-3.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-5-3.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.5.3</title></head>
+<head><title>Apache Tez&trade; 0.5.3</title></head>
 
-Apache Tez 0.5.3
+Apache Tez&trade; 0.5.3
 ----------------
 
-- [Release Artifacts](http://archive.apache.org/dist/tez/0.5.3/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.3/)
 - [Release Notes](0.5.3/release-notes.txt)
 - Documentation
     - [API Javadocs](0.5.3/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-5-4.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-4.md b/docs/src/site/markdown/releases/apache-tez-0-5-4.md
index f485a38..9d3f96c 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-5-4.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-5-4.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.5.4</title></head>
+<head><title>Apache Tez&trade; 0.5.4</title></head>
 
-Apache Tez 0.5.4
+Apache Tez&trade; 0.5.4
 ----------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.5.4/)
+- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.5.4/)
 - [Release Notes](0.5.4/release-notes.txt)
 - Documentation
     - [API Javadocs](0.5.4/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-6-0.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-6-0.md b/docs/src/site/markdown/releases/apache-tez-0-6-0.md
index 6c7c13b..473d03b 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-6-0.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-6-0.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.6.0</title></head>
+<head><title>Apache Tez&trade; 0.6.0</title></head>
 
-Apache Tez 0.6.0
+Apache Tez&trade; 0.6.0
 ----------------
 
-- [Release Artifacts](http://archive.apache.org/dist/tez/0.6.0/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.6.0/)
 - [Release Notes](0.6.0/release-notes.txt)
 - Documentation
     - [API Javadocs](0.6.0/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-6-1.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-6-1.md b/docs/src/site/markdown/releases/apache-tez-0-6-1.md
index 020729c..1285477 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-6-1.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-6-1.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.6.1</title></head>
+<head><title>Apache Tez&trade; 0.6.1</title></head>
 
-Apache Tez 0.6.1
+Apache Tez&trade; 0.6.1
 ----------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.6.1/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/tez/0.6.1/)
 - [Release Notes](0.6.1/release-notes.txt)
 - Documentation
     - [API Javadocs](0.6.1/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-6-2.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-6-2.md b/docs/src/site/markdown/releases/apache-tez-0-6-2.md
index 358035e..5a898e6 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-6-2.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-6-2.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.6.2</title></head>
+<head><title>Apache Tez&trade; 0.6.2</title></head>
 
-Apache Tez 0.6.2
+Apache Tez&trade; 0.6.2
 ----------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.6.2/)
+- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.6.2/)
 - [Release Notes](0.6.2/release-notes.txt)
 - Documentation
     - [API Javadocs](0.6.2/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-7-0.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-7-0.md b/docs/src/site/markdown/releases/apache-tez-0-7-0.md
index 80e581e..5c9aa1f 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-7-0.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-7-0.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.7.0</title></head>
+<head><title>Apache Tez&trade; 0.7.0</title></head>
 
-Apache Tez 0.7.0
+Apache Tez&trade; 0.7.0
 ----------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.7.0/)
+- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.7.0/)
 - [Release Notes](0.7.0/release-notes.txt)
 - Documentation
     - [API Javadocs](0.7.0/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md b/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md
index f5f2e75..5854ecd 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.8.0-alpha</title></head>
+<head><title>Apache Tez&trade; 0.8.0-alpha</title></head>
 
-Apache Tez 0.8.0-alpha
+Apache Tez&trade; 0.8.0-alpha
 ----------------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.0-alpha/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.8.0-alpha/)
 - [Release Notes](0.8.0-alpha/release-notes.txt)
 - Documentation
     - [API Javadocs](0.8.0-alpha/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md b/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md
index c00f0d5..37a2d12 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.8.1-alpha</title></head>
+<head><title>Apache Tez&trade; 0.8.1-alpha</title></head>
 
-Apache Tez 0.8.1-alpha
+Apache Tez&trade; 0.8.1-alpha
 ----------------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.1-alpha/)
+- [Download Release Artifacts](http://archive.apache.org/dist/tez/0.8.1-alpha/)
 - [Release Notes](0.8.1-alpha/release-notes.txt)
 - Documentation
     - [API Javadocs](0.8.1-alpha/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/apache-tez-0-8-2.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-2.md b/docs/src/site/markdown/releases/apache-tez-0-8-2.md
index 984e507..7ffe039 100644
--- a/docs/src/site/markdown/releases/apache-tez-0-8-2.md
+++ b/docs/src/site/markdown/releases/apache-tez-0-8-2.md
@@ -15,12 +15,12 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez 0.8.2</title></head>
+<head><title>Apache Tez&trade; 0.8.2</title></head>
 
-Apache Tez 0.8.2
+Apache Tez&trade; 0.8.2
 ----------------------
 
-- [Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.2/)
+- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.2/)
 - [Release Notes](0.8.2/release-notes.txt)
 - Documentation
     - [API Javadocs](0.8.2/tez-api-javadocs/index.html) : Documentation for the Tez APIs

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/markdown/releases/index.md
----------------------------------------------------------------------
diff --git a/docs/src/site/markdown/releases/index.md b/docs/src/site/markdown/releases/index.md
index 3822e07..5703cf3 100644
--- a/docs/src/site/markdown/releases/index.md
+++ b/docs/src/site/markdown/releases/index.md
@@ -15,19 +15,19 @@
    limitations under the License.
 -->
 
-<head><title>Apache Tez Releases</title></head>
+<head><title>Apache Tez&trade; Releases</title></head>
 
 Releases
 ------------
--   [Apache Tez 0.8.2](./apache-tez-0-8-2.html) (Jan 19, 2016)
--   [Apache Tez 0.8.1-alpha](./apache-tez-0-8-1-alpha.html) (Oct 12, 2015)
--   [Apache Tez 0.8.0-alpha](./apache-tez-0-8-0-alpha.html) (Sep 01, 2015)
--   [Apache Tez 0.7.0](./apache-tez-0-7-0.html) (May 18, 2015)
--   [Apache Tez 0.6.2](./apache-tez-0-6-2.html) (Aug 07, 2015)
--   [Apache Tez 0.6.1](./apache-tez-0-6-1.html) (May 18, 2015)
--   [Apache Tez 0.6.0](./apache-tez-0-6-0.html) (Jan 23, 2015)
--   [Apache Tez 0.5.4](./apache-tez-0-5-4.html) (Jun 26, 2015)
--   [Apache Tez 0.5.3](./apache-tez-0-5-3.html) (Dec 10, 2014)
--   [Apache Tez 0.5.2](./apache-tez-0-5-2.html) (Nov 07, 2014)
--   [Apache Tez 0.5.1](./apache-tez-0-5-1.html) (Oct 08, 2014)
--   [Apache Tez 0.5.0](./apache-tez-0-5-0.html) (Sep 04, 2014)
+-   [Apache Tez&trade; 0.8.2](./apache-tez-0-8-2.html) (Jan 19, 2016)
+-   [Apache Tez&trade; 0.8.1-alpha](./apache-tez-0-8-1-alpha.html) (Oct 12, 2015)
+-   [Apache Tez&trade; 0.8.0-alpha](./apache-tez-0-8-0-alpha.html) (Sep 01, 2015)
+-   [Apache Tez&trade; 0.7.0](./apache-tez-0-7-0.html) (May 18, 2015)
+-   [Apache Tez&trade; 0.6.2](./apache-tez-0-6-2.html) (Aug 07, 2015)
+-   [Apache Tez&trade; 0.6.1](./apache-tez-0-6-1.html) (May 18, 2015)
+-   [Apache Tez&trade; 0.6.0](./apache-tez-0-6-0.html) (Jan 23, 2015)
+-   [Apache Tez&trade; 0.5.4](./apache-tez-0-5-4.html) (Jun 26, 2015)
+-   [Apache Tez&trade; 0.5.3](./apache-tez-0-5-3.html) (Dec 10, 2014)
+-   [Apache Tez&trade; 0.5.2](./apache-tez-0-5-2.html) (Nov 07, 2014)
+-   [Apache Tez&trade; 0.5.1](./apache-tez-0-5-1.html) (Oct 08, 2014)
+-   [Apache Tez&trade; 0.5.0](./apache-tez-0-5-0.html) (Sep 04, 2014)

http://git-wip-us.apache.org/repos/asf/tez/blob/870972d2/docs/src/site/site.xml
----------------------------------------------------------------------
diff --git a/docs/src/site/site.xml b/docs/src/site/site.xml
index dc4cec8..9860575 100644
--- a/docs/src/site/site.xml
+++ b/docs/src/site/site.xml
@@ -94,7 +94,7 @@
     </head>
 
     <breadcrumbs>
-      <item name="Apache Tez" href="http://tez.apache.org/"/>
+      <item name="Apache Tez&trade;" href="http://tez.apache.org/"/>
     </breadcrumbs>
 
     <menu name="Getting Started">
@@ -119,7 +119,7 @@
       <item name="Tez Wiki" href="https://cwiki.apache.org/confluence/display/TEZ"/>
     </menu>
 
-    <menu name="Releases">
+    <menu name="Download Apache Tez&trade; Releases">
       <item name="0.4.1-incubating" href="http://archive.apache.org/dist/incubator/tez/tez-0.4.1-incubating/"/>
       <item name="0.5.4" href="./releases/apache-tez-0-5-4.html"/>
       <item name="0.6.1" href="./releases/apache-tez-0-6-1.html"/>


[02/24] tez git commit: TEZ-2594. Fix LICENSE for missing entries for full and minimal tarballs. (hitesh)

Posted by sr...@apache.org.
TEZ-2594. Fix LICENSE for missing entries for full and minimal tarballs. (hitesh)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/73e993cb
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/73e993cb
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/73e993cb

Branch: refs/heads/TEZ-2980
Commit: 73e993cba52e25c0d126a59439a76b43af9506bc
Parents: e171fdd
Author: Hitesh Shah <hi...@apache.org>
Authored: Wed Jan 20 13:59:53 2016 -0800
Committer: Hitesh Shah <hi...@apache.org>
Committed: Wed Jan 20 13:59:53 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                         | 1 +
 tez-dist/dist-files/full/LICENSE    | 9 ++++++++-
 tez-dist/dist-files/minimal/LICENSE | 8 ++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/73e993cb/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 3b8b016..5d2c446 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2594. Fix LICENSE for missing entries for full and minimal tarballs.
   TEZ-3053. Containers timeout if they do not receive a task within the container timeout interval.
   TEZ-2898. tez tools : swimlanes.py is broken.
   TEZ-2937. Can Processor.close() be called after closing inputs and outputs?

http://git-wip-us.apache.org/repos/asf/tez/blob/73e993cb/tez-dist/dist-files/full/LICENSE
----------------------------------------------------------------------
diff --git a/tez-dist/dist-files/full/LICENSE b/tez-dist/dist-files/full/LICENSE
index 3cff5fc..b79eaba 100644
--- a/tez-dist/dist-files/full/LICENSE
+++ b/tez-dist/dist-files/full/LICENSE
@@ -220,6 +220,10 @@ license:
   - tez-common-*.jar
   - tez-dag-*.jar
   - tez-examples-*.jar
+  - tez-ext-service-tests-*.jar
+  - tez-history-parser-*.jar
+  - tez-javadoc-tools-*.jar
+  - tez-job-analyzer-*.jar
   - tez-mapreduce-*.jar
   - tez-mbeans-resource-calculator-*.jar
   - tez-runtime-internals-*.jar
@@ -227,6 +231,7 @@ license:
   - tez-tests-*.jar
   - tez-yarn-timeline-history-*.jar
   - tez-yarn-timeline-history-with-acls-*.jar
+  - hadoop-shim-*.jar
   - avro-*.jar
   - async-http-client-*.jar
   - apacheds-i18n-*.jar
@@ -264,7 +269,7 @@ license:
   - hadoop-yarn-server-web-proxy-*.jar
   - httpclient-*.jar
   - httpcore-*.jar
-  - htrace-*.jar
+  - htrace-core-*.jar
   - jackson-core-asl-*.jar
   - jackson-jaxrs-*.jar
   - jackson-mapper-asl-*.jar
@@ -278,7 +283,9 @@ license:
   - jetty-6.1.*.jar - Copyright 2001-2005 Mort Bay Consulting Pty. Ltd.
   - jetty-utils-6.1.*.jar - Copyright 2001-2005 Mort Bay Consulting Pty. Ltd.
   - log4j-*.jar
+  - metrics-core-*.jar
   - netty-*.jar
+  - RoaringBitmap-*.jar
   - snappy-java-*.jar
   - stax-api-1.0.1.jar
   - xercesImpl-*.jar

http://git-wip-us.apache.org/repos/asf/tez/blob/73e993cb/tez-dist/dist-files/minimal/LICENSE
----------------------------------------------------------------------
diff --git a/tez-dist/dist-files/minimal/LICENSE b/tez-dist/dist-files/minimal/LICENSE
index effbc3d..c89bc24 100644
--- a/tez-dist/dist-files/minimal/LICENSE
+++ b/tez-dist/dist-files/minimal/LICENSE
@@ -220,6 +220,10 @@ license:
   - tez-common-*.jar
   - tez-dag-*.jar
   - tez-examples-*.jar
+  - tez-ext-service-tests-*.jar
+  - tez-history-parser-*.jar
+  - tez-javadoc-tools-*.jar
+  - tez-job-analyzer-*.jar
   - tez-mapreduce-*.jar
   - tez-mbeans-resource-calculator-*.jar
   - tez-runtime-internals-*.jar
@@ -227,6 +231,8 @@ license:
   - tez-tests-*.jar
   - tez-yarn-timeline-history-*.jar
   - tez-yarn-timeline-history-with-acls-*.jar
+  - hadoop-shim-*.jar
+  - async-http-client-*.jar
   - commons-cli-*.jar
   - commons-collections-*.jar
   - commons-io-*.jar 
@@ -242,6 +248,8 @@ license:
   - jettison-*.jar - Copyright 2006 Envoi Solutions LLC
   - jetty-6.1.*.jar 
   - jetty-utils-6.1.*.jar 
+  - metrics-core-*.jar
+  - RoaringBitmap-*.jar
 
 -----------------------------------------------------------------------
 Dual Licensed under Common Development And Disribution License (CDDL - Version 1.1)


[14/24] tez git commit: Fixing CHANGES.txt (rbalamohan)

Posted by sr...@apache.org.
Fixing CHANGES.txt (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/f352cfb4
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/f352cfb4
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/f352cfb4

Branch: refs/heads/TEZ-2980
Commit: f352cfb4d69ed0a9dc1db090293bddd92072d679
Parents: c7397f5
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Thu Feb 4 19:11:00 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Thu Feb 4 19:11:00 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/f352cfb4/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 2e484eb..4d7ae6b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -328,6 +328,8 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
+  TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.
   TEZ-2307. Possible wrong error message when submitting new dag.
   TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
   TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.


[23/24] tez git commit: TEZ-3117. Deadlock in Edge and Vertex code (bikas)

Posted by sr...@apache.org.
TEZ-3117. Deadlock in Edge and Vertex code (bikas)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/de3a0748
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/de3a0748
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/de3a0748

Branch: refs/heads/TEZ-2980
Commit: de3a0748ff19b5ced87050596d088bdb573cae05
Parents: a812c34
Author: Bikas Saha <bi...@apache.org>
Authored: Wed Feb 17 17:48:55 2016 -0800
Committer: Bikas Saha <bi...@apache.org>
Committed: Wed Feb 17 17:48:55 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../org/apache/tez/dag/app/dag/impl/Edge.java   | 59 ++++++++++++--------
 2 files changed, 37 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/de3a0748/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index af643dd..d10b47a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES
   TEZ-3029. Add an onError method to service plugin contexts.
 
 ALL CHANGES:
+  TEZ-3117. Deadlock in Edge and Vertex code
   TEZ-3103. Shuffle can hang when memory to memory merging enabled
   TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime).
   TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier
@@ -334,6 +335,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3117. Deadlock in Edge and Vertex code
   TEZ-3103. Shuffle can hang when memory to memory merging enabled
   TEZ-3107. tez-tools: Log warn msgs in case ATS has wrong values (e.g startTime > finishTime).
   TEZ-3104. Tez fails on Bzip2 intermediate output format on hadoop 2.7.1 and earlier

http://git-wip-us.apache.org/repos/asf/tez/blob/de3a0748/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
index 0be7790..bb4d319 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java
@@ -174,19 +174,24 @@ public class Edge {
             + getEdgeInfo(), e);
       }
     }
-    destinationMetaInfo = new EventMetaData(EventProducerConsumerType.INPUT, 
-        destinationVertex.getName(), 
-        sourceVertex.getName(), 
-        null);
+    synchronized (this) {
+      destinationMetaInfo = new EventMetaData(EventProducerConsumerType.INPUT,
+              destinationVertex.getName(),
+              sourceVertex.getName(),
+              null);
+    }
   }
 
-  public synchronized void setEdgeProperty(EdgeProperty newEdgeProperty) throws AMUserCodeException {
-    this.edgeProperty = newEdgeProperty;
-    boolean wasUnInitialized = (edgeManager == null);
-    try {
-      createEdgeManager();
-    } catch (TezException e) {
-      throw new AMUserCodeException(Source.EdgeManager, e);
+  public void setEdgeProperty(EdgeProperty newEdgeProperty) throws AMUserCodeException {
+    boolean wasUnInitialized;
+    synchronized (this) {
+      this.edgeProperty = newEdgeProperty;
+      wasUnInitialized = (edgeManager == null);
+      try {
+        createEdgeManager();
+      } catch (TezException e) {
+        throw new AMUserCodeException(Source.EdgeManager, e);
+      }
     }
     initialize();
     if (wasUnInitialized) {
@@ -199,7 +204,7 @@ public class Edge {
   
   // Test only method for creating specific scenarios
   @VisibleForTesting
-  synchronized void setCustomEdgeManager(EdgeManagerPluginDescriptor descriptor)
+  void setCustomEdgeManager(EdgeManagerPluginDescriptor descriptor)
       throws AMUserCodeException {
     EdgeProperty modifiedEdgeProperty =
         EdgeProperty.create(descriptor,
@@ -210,22 +215,28 @@ public class Edge {
     setEdgeProperty(modifiedEdgeProperty);
   }
   
-  public synchronized void routingToBegin() throws AMUserCodeException {
-    if (edgeManagerContext.getDestinationVertexNumTasks() == 0) {
-      routingNeeded = false;
-    } else if (edgeManagerContext.getDestinationVertexNumTasks() < 0) {
-      throw new TezUncheckedException(
-          "Internal error. Not expected to route events to a destination until parallelism is determined" +
-          " sourceVertex=" + sourceVertex.getLogIdentifier() +
-          " edgeManager=" + edgeManager.getClass().getName());
+  public void routingToBegin() throws AMUserCodeException {
+    int numDestTasks = edgeManagerContext.getDestinationVertexNumTasks();
+    synchronized (this) {
+      if (numDestTasks == 0) {
+        routingNeeded = false;
+      } else if (numDestTasks < 0) {
+        throw new TezUncheckedException(
+                "Internal error. Not expected to route events to a destination until parallelism is determined" +
+                        " sourceVertex=" + sourceVertex.getLogIdentifier() +
+                        " edgeManager=" + edgeManager.getClass().getName());
+      }
+      if (edgeManager instanceof EdgeManagerPluginOnDemand) {
+        onDemandRouting = true;
+      }
     }
-    if (edgeManager instanceof EdgeManagerPluginOnDemand) {
-      onDemandRouting = true;
+
+    if (onDemandRouting) {
       try {
-        ((EdgeManagerPluginOnDemand)edgeManager).prepareForRouting();
+        ((EdgeManagerPluginOnDemand) edgeManager).prepareForRouting();
       } catch (Exception e) {
         throw new AMUserCodeException(Source.EdgeManager,
-            "Fail to prepareForRouting " + getEdgeInfo(), e);
+                "Fail to prepareForRouting " + getEdgeInfo(), e);
       }
     }
     


[07/24] tez git commit: TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs (jeagles)

Posted by sr...@apache.org.
TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs (jeagles)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/3ff360aa
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/3ff360aa
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/3ff360aa

Branch: refs/heads/TEZ-2980
Commit: 3ff360aa18373f2b4aa03648de905c690ce5a180
Parents: 7e636a5
Author: Jonathan Eagles <je...@yahoo-inc.com>
Authored: Fri Jan 29 13:43:59 2016 -0600
Committer: Jonathan Eagles <je...@yahoo-inc.com>
Committed: Fri Jan 29 13:43:59 2016 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |   2 +
 .../tez/dag/app/dag/impl/TestDAGRecovery.java   |   1 -
 .../library/common/InputAttemptIdentifier.java  |  28 ++---
 .../common/shuffle/DiskFetchedInput.java        |   2 +-
 .../library/common/shuffle/ShuffleUtils.java    |   2 +-
 .../impl/ShuffleInputEventHandlerImpl.java      |   3 +-
 .../common/shuffle/impl/ShuffleManager.java     |  23 ++---
 .../FetchedInputAllocatorOrderedGrouped.java    |   3 +
 .../shuffle/orderedgrouped/InMemoryReader.java  | 102 ++++++++++++++++++-
 .../shuffle/orderedgrouped/MapOutput.java       |  32 +++---
 .../shuffle/orderedgrouped/MergeManager.java    |  11 +-
 .../ShuffleInputEventHandlerOrderedGrouped.java |   3 +-
 .../orderedgrouped/ShuffleScheduler.java        |  29 +++---
 .../runtime/library/common/sort/impl/IFile.java |   6 +-
 .../library/common/shuffle/TestFetcher.java     |  27 +++--
 .../impl/TestShuffleInputEventHandlerImpl.java  |  11 +-
 .../shuffle/orderedgrouped/TestFetcher.java     |  29 +++---
 ...tShuffleInputEventHandlerOrderedGrouped.java |  17 ++--
 .../orderedgrouped/TestShuffleScheduler.java    |  65 ++++++------
 19 files changed, 238 insertions(+), 158 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index d69390c..6570f8b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
   TEZ-3079. Fix tez-tfile parser documentation.
   TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.
   TEZ-3036. Tez AM can hang on startup with no indication of error
@@ -322,6 +323,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
   TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.
   TEZ-3036. Tez AM can hang on startup with no indication of error
   TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java
index 3a602bc..6be682d 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java
@@ -132,7 +132,6 @@ import org.apache.tez.runtime.api.impl.EventMetaData;
 import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType;
 import org.apache.tez.runtime.api.impl.TaskSpec;
 import org.apache.tez.runtime.api.impl.TezEvent;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java
index d70942c..cc9c6ea 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java
@@ -27,7 +27,7 @@ import org.apache.tez.dag.api.TezUncheckedException;
 @Private
 public class InputAttemptIdentifier {
 
-  private final InputIdentifier inputIdentifier;
+  private final int inputIdentifier;
   private final int attemptNumber;
   private final String pathComponent;
   private final boolean shared;
@@ -49,18 +49,18 @@ public class InputAttemptIdentifier {
   private final int spillEventId;
 
   public InputAttemptIdentifier(int inputIndex, int attemptNumber) {
-    this(new InputIdentifier(inputIndex), attemptNumber, null);
+    this(inputIndex, attemptNumber, null);
   }
 
-  public InputAttemptIdentifier(InputIdentifier inputIdentifier, int attemptNumber, String pathComponent) {
+  public InputAttemptIdentifier(int inputIdentifier, int attemptNumber, String pathComponent) {
     this(inputIdentifier, attemptNumber, pathComponent, false);
   }
 
-  public InputAttemptIdentifier(InputIdentifier inputIdentifier, int attemptNumber, String pathComponent, boolean shared) {
+  public InputAttemptIdentifier(int inputIdentifier, int attemptNumber, String pathComponent, boolean shared) {
     this(inputIdentifier, attemptNumber, pathComponent, shared, SPILL_INFO.FINAL_MERGE_ENABLED, -1);
   }
 
-  public InputAttemptIdentifier(InputIdentifier inputIdentifier, int attemptNumber, String pathComponent,
+  public InputAttemptIdentifier(int inputIdentifier, int attemptNumber, String pathComponent,
       boolean shared, SPILL_INFO fetchTypeInfo, int spillEventId) {
     this.inputIdentifier = inputIdentifier;
     this.attemptNumber = attemptNumber;
@@ -74,15 +74,7 @@ public class InputAttemptIdentifier {
     }
   }
 
-  public InputAttemptIdentifier(int taskIndex, int attemptNumber, String pathComponent) {
-    this(new InputIdentifier(taskIndex), attemptNumber, pathComponent);
-  }
-
-  public InputAttemptIdentifier(int taskIndex, int attemptNumber, String pathComponent, boolean shared) {
-    this(new InputIdentifier(taskIndex), attemptNumber, pathComponent, shared);
-  }
-
-  public InputIdentifier getInputIdentifier() {
+  public int getInputIdentifier() {
     return this.inputIdentifier;
   }
 
@@ -117,8 +109,7 @@ public class InputAttemptIdentifier {
     final int prime = 31;
     int result = 1;
     result = prime * result + attemptNumber;
-    result = prime * result
-        + ((inputIdentifier == null) ? 0 : inputIdentifier.hashCode());
+    result = prime * result + inputIdentifier;
     return result;
   }
 
@@ -133,10 +124,7 @@ public class InputAttemptIdentifier {
     InputAttemptIdentifier other = (InputAttemptIdentifier) obj;
     if (attemptNumber != other.attemptNumber)
       return false;
-    if (inputIdentifier == null) {
-      if (other.inputIdentifier != null)
-        return false;
-    } else if (!inputIdentifier.equals(other.inputIdentifier))
+    if (inputIdentifier != other.inputIdentifier)
       return false;
     // do not compare pathComponent as they may not always be present
     return true;

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java
index dfad39d..c873af7 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java
@@ -50,7 +50,7 @@ public class DiskFetchedInput extends FetchedInput {
 
     this.localFS = FileSystem.getLocal(conf).getRaw();
     this.outputPath = filenameAllocator.getInputFileForWrite(
-        this.inputAttemptIdentifier.getInputIdentifier().getInputIndex(), this
+        this.inputAttemptIdentifier.getInputIdentifier(), this
             .inputAttemptIdentifier.getSpillEventId(), actualSize);
     // Files are not clobbered due to the id being appended to the outputPath in the tmpPath,
     // otherwise fetches for the same task but from different attempts would clobber each other.

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
index 431ba38..e8bf6ae 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java
@@ -498,7 +498,7 @@ public class ShuffleUtils {
   private static String toShortString(InputAttemptIdentifier inputAttemptIdentifier) {
     StringBuilder sb = new StringBuilder();
     sb.append("{");
-    sb.append(inputAttemptIdentifier.getInputIdentifier().getInputIndex());
+    sb.append(inputAttemptIdentifier.getInputIdentifier());
     sb.append(", ").append(inputAttemptIdentifier.getAttemptNumber());
     sb.append(", ").append(inputAttemptIdentifier.getPathComponent());
     if (inputAttemptIdentifier.getFetchTypeInfo()

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java
index 8fb1568..adc3432 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java
@@ -37,7 +37,6 @@ import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputFailedEvent;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator;
 import org.apache.tez.runtime.library.common.shuffle.ShuffleEventHandler;
 import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
@@ -176,7 +175,7 @@ public class ShuffleInputEventHandlerImpl implements ShuffleEventHandler {
       InputAttemptIdentifier.SPILL_INFO spillInfo = (lastEvent) ? InputAttemptIdentifier.SPILL_INFO
           .FINAL_UPDATE : InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE;
       srcAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(dmEvent.getTargetIndex()), dmEvent
+          new InputAttemptIdentifier(dmEvent.getTargetIndex(), dmEvent
               .getVersion(), pathComponent, isShared, spillInfo, spillEventId);
     } else {
       srcAttemptIdentifier =

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
index b3e050a..7f2054b 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java
@@ -67,7 +67,6 @@ import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.TezRuntimeUtils;
 import org.apache.tez.runtime.library.common.shuffle.FetchResult;
 import org.apache.tez.runtime.library.common.shuffle.FetchedInput;
@@ -111,7 +110,7 @@ public class ShuffleManager implements FetcherCallback {
   
   private final BlockingQueue<FetchedInput> completedInputs;
   private final AtomicBoolean inputReadyNotificationSent = new AtomicBoolean(false);
-  private final Set<InputIdentifier> completedInputSet;
+  private final Set<Integer> completedInputSet;
   private final ConcurrentMap<String, InputHost> knownSrcHosts;
   private final BlockingQueue<InputHost> pendingHosts;
   private final Set<InputAttemptIdentifier> obsoletedInputs;
@@ -171,7 +170,7 @@ public class ShuffleManager implements FetcherCallback {
 
   //To track shuffleInfo events when finalMerge is disabled OR pipelined shuffle is enabled in source.
   @VisibleForTesting
-  final Map<InputIdentifier, ShuffleEventInfo> shuffleInfoEventsMap;
+  final Map<Integer, ShuffleEventInfo> shuffleInfoEventsMap;
 
   // TODO More counters - FetchErrors, speed?
   
@@ -205,7 +204,7 @@ public class ShuffleManager implements FetcherCallback {
     
     this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName());
   
-    completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs));
+    completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<Integer, Boolean>(numInputs));
     /**
      * In case of pipelined shuffle, it is possible to get multiple FetchedInput per attempt.
      * We do not know upfront the number of spills from source.
@@ -266,7 +265,7 @@ public class ShuffleManager implements FetcherCallback {
 
     Arrays.sort(this.localDisks);
 
-    shuffleInfoEventsMap = new ConcurrentHashMap<InputIdentifier, ShuffleEventInfo>();
+    shuffleInfoEventsMap = new ConcurrentHashMap<Integer, ShuffleEventInfo>();
 
     LOG.info(srcNameTrimmed + ": numInputs=" + numInputs + ", compressionCodec="
         + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers="
@@ -479,7 +478,7 @@ public class ShuffleManager implements FetcherCallback {
       return;
     }
 
-    InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
+    int inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
     if (shuffleInfoEventsMap.get(inputIdentifier) == null) {
       shuffleInfoEventsMap.put(inputIdentifier, new ShuffleEventInfo(srcAttemptIdentifier));
     }
@@ -501,7 +500,7 @@ public class ShuffleManager implements FetcherCallback {
 
   public void addCompletedInputWithNoData(
       InputAttemptIdentifier srcAttemptIdentifier) {
-    InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
+    int inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
     if (LOG.isDebugEnabled()) {
       LOG.debug("No input data exists for SrcTask: " + inputIdentifier + ". Marking as complete.");
     }
@@ -558,7 +557,7 @@ public class ShuffleManager implements FetcherCallback {
 
 
     ShuffleEventInfo(InputAttemptIdentifier input) {
-      this.id = input.getInputIdentifier().getInputIndex() + "_" + input.getAttemptNumber();
+      this.id = input.getInputIdentifier() + "_" + input.getAttemptNumber();
       this.eventsProcessed = new BitSet();
       this.attemptNum = input.getAttemptNumber();
     }
@@ -594,7 +593,7 @@ public class ShuffleManager implements FetcherCallback {
   public void fetchSucceeded(String host, InputAttemptIdentifier srcAttemptIdentifier,
       FetchedInput fetchedInput, long fetchedBytes, long decompressedLength, long copyDuration)
       throws IOException {
-    InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
+    int inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
 
     // Count irrespective of whether this is a copy of an already fetched input
     lock.lock();
@@ -706,7 +705,7 @@ public class ShuffleManager implements FetcherCallback {
       return;
     }
 
-    InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
+    int inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
     ShuffleEventInfo eventInfo = shuffleInfoEventsMap.get(inputIdentifier);
 
     //for empty partition case
@@ -769,9 +768,9 @@ public class ShuffleManager implements FetcherCallback {
         "Fetch failure while fetching from "
             + TezRuntimeUtils.getTaskAttemptIdentifier(
             inputContext.getSourceVertexName(),
-            srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
+            srcAttemptIdentifier.getInputIdentifier(),
             srcAttemptIdentifier.getAttemptNumber()),
-        srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
+        srcAttemptIdentifier.getInputIdentifier(),
         srcAttemptIdentifier.getAttemptNumber());
 
     List<Event> failedEvents = Lists.newArrayListWithCapacity(1);

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetchedInputAllocatorOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetchedInputAllocatorOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetchedInputAllocatorOrderedGrouped.java
index ec1f8eb..7276f74 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetchedInputAllocatorOrderedGrouped.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetchedInputAllocatorOrderedGrouped.java
@@ -16,6 +16,7 @@ package org.apache.tez.runtime.library.common.shuffle.orderedgrouped;
 
 import java.io.IOException;
 
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.FileChunk;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
 
@@ -29,6 +30,8 @@ public interface FetchedInputAllocatorOrderedGrouped {
 
   void closeInMemoryFile(MapOutput mapOutput);
 
+  FileSystem getLocalFileSystem();
+
   void closeOnDiskFile(FileChunk file);
 
   void unreserve(long bytes);

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryReader.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryReader.java
index 75c552e..7860377 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryReader.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryReader.java
@@ -18,6 +18,7 @@
 
 package org.apache.tez.runtime.library.common.shuffle.orderedgrouped;
 
+import java.io.ByteArrayInputStream;
 import java.io.DataInput;
 import java.io.File;
 import java.io.FileOutputStream;
@@ -37,9 +38,103 @@ import org.apache.tez.runtime.library.common.sort.impl.IFile.Reader;
 @InterfaceStability.Unstable
 public class InMemoryReader extends Reader {
 
+  private static class ByteArrayDataInput extends ByteArrayInputStream implements DataInput {
+
+    public ByteArrayDataInput(byte buf[], int offset, int length) {
+      super(buf, offset, length);
+    }
+
+    public void reset(byte[] input, int start, int length) {
+      this.buf = input;
+      this.count = start+length;
+      this.mark = start;
+      this.pos = start;
+    }
+
+    public byte[] getData() { return buf; }
+    public int getPosition() { return pos; }
+    public int getLength() { return count; }
+    public int getMark() { return mark; }
+
+    @Override
+    public void readFully(byte[] b) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void readFully(byte[] b, int off, int len) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int skipBytes(int n) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean readBoolean() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public byte readByte() throws IOException {
+      return (byte)read();
+    }
+
+    @Override
+    public int readUnsignedByte() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public short readShort() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int readUnsignedShort() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public char readChar() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int readInt() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long readLong() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public float readFloat() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public double readDouble() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String readLine() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String readUTF() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+
   private final InputAttemptIdentifier taskAttemptId;
   private final MergeManager merger;
-  DataInputBuffer memDataIn = new DataInputBuffer();
+  ByteArrayDataInput memDataIn;
   private int start;
   private int length;
   private int originalKeyPos;
@@ -49,12 +144,12 @@ public class InMemoryReader extends Reader {
       int length)
       throws IOException {
     super(null, length - start, null, null, null, false, 0, -1);
-    this.merger = merger;
     this.taskAttemptId = taskAttemptId;
+    this.merger = merger;
 
     buffer = data;
     bufferSize = (int) length;
-    memDataIn.reset(buffer, start, length);
+    memDataIn = new ByteArrayDataInput(buffer, start, length);
     this.start = start;
     this.length = length;
   }
@@ -160,7 +255,6 @@ public class InMemoryReader extends Reader {
 
   public void close() {
     // Release
-    dataIn = null;
     buffer = null;
     // Inform the MergeManager
     if (merger != null) {

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MapOutput.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MapOutput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MapOutput.java
index f19cd55..7e3d983 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MapOutput.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MapOutput.java
@@ -47,17 +47,14 @@ class MapOutput {
   private final int id;
   private final Type type;
   private InputAttemptIdentifier attemptIdentifier;
-  private final long size;
 
   private final boolean primaryMapOutput;
   private final FetchedInputAllocatorOrderedGrouped callback;
 
   // MEMORY
-  private final byte[] memory;
   private BoundedByteArrayOutputStream byteStream;
 
   // DISK
-  private final FileSystem localFS;
   private final Path tmpOutputPath;
   private final FileChunk outputPath;
   private OutputStream disk;
@@ -71,18 +68,13 @@ class MapOutput {
     this.callback = callback;
     this.primaryMapOutput = primaryMapOutput;
 
-    this.localFS = fs;
-    this.size = size;
-
     // Other type specific values
 
     if (type == Type.MEMORY) {
       // since we are passing an int from createMemoryMapOutput, its safe to cast to int
       this.byteStream = new BoundedByteArrayOutputStream((int)size);
-      this.memory = byteStream.getBuffer();
     } else {
       this.byteStream = null;
-      this.memory = null;
     }
 
     this.tmpOutputPath = tmpOutputPath;
@@ -97,7 +89,6 @@ class MapOutput {
     } else {
       this.outputPath = null;
     }
-
   }
 
   public static MapOutput createDiskMapOutput(InputAttemptIdentifier attemptIdentifier,
@@ -107,7 +98,7 @@ class MapOutput {
       IOException {
     FileSystem fs = FileSystem.getLocal(conf).getRaw();
     Path outputpath = mapOutputFile.getInputFileForWrite(
-        attemptIdentifier.getInputIdentifier().getInputIndex(), attemptIdentifier.getSpillEventId(), size);
+        attemptIdentifier.getInputIdentifier(), attemptIdentifier.getSpillEventId(), size);
     // Files are not clobbered due to the id being appended to the outputPath in the tmpPath,
     // otherwise fetches for the same task but from different attempts would clobber each other.
     Path tmpOuputPath = outputpath.suffix(String.valueOf(fetcher));
@@ -115,7 +106,7 @@ class MapOutput {
 
     MapOutput mapOutput = new MapOutput(Type.DISK, attemptIdentifier, callback, size, outputpath, offset,
         primaryMapOutput, fs, tmpOuputPath);
-    mapOutput.disk = mapOutput.localFS.create(tmpOuputPath);
+    mapOutput.disk = fs.create(tmpOuputPath);
 
     return mapOutput;
   }
@@ -160,7 +151,7 @@ class MapOutput {
   }
 
   public byte[] getMemory() {
-    return memory;
+    return byteStream.getBuffer();
   }
 
   public BoundedByteArrayOutputStream getArrayStream() {
@@ -180,14 +171,19 @@ class MapOutput {
   }
 
   public long getSize() {
-    return size;
+    if (type == Type.MEMORY) {
+      return byteStream.getLimit();
+    } else if (type == Type.DISK || type == Type.DISK_DIRECT) {
+      return outputPath.getLength();
+    }
+    return -1;
   }
 
   public void commit() throws IOException {
     if (type == Type.MEMORY) {
       callback.closeInMemoryFile(this);
     } else if (type == Type.DISK) {
-      localFS.rename(tmpOutputPath, outputPath.getPath());
+      callback.getLocalFileSystem().rename(tmpOutputPath, outputPath.getPath());
       callback.closeOnDiskFile(outputPath);
     } else if (type == Type.DISK_DIRECT) {
       callback.closeOnDiskFile(outputPath);
@@ -198,10 +194,10 @@ class MapOutput {
   
   public void abort() {
     if (type == Type.MEMORY) {
-      callback.unreserve(memory.length);
+      callback.unreserve(byteStream.getBuffer().length);
     } else if (type == Type.DISK) {
       try {
-        localFS.delete(tmpOutputPath, false);
+        callback.getLocalFileSystem().delete(tmpOutputPath, true);
       } catch (IOException ie) {
         LOG.info("failure to clean up " + tmpOutputPath, ie);
       }
@@ -223,9 +219,9 @@ class MapOutput {
         return 0;
       }
       
-      if (o1.size < o2.size) {
+      if (o1.getSize() < o2.getSize()) {
         return -1;
-      } else if (o1.size > o2.size) {
+      } else if (o1.getSize() > o2.getSize()) {
         return 1;
       }
       

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
index 61ff338..dfa509f 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java
@@ -503,6 +503,11 @@ public class MergeManager implements FetchedInputAllocatorOrderedGrouped {
   }
 
   @Override
+  public FileSystem getLocalFileSystem() {
+    return localFS;
+  }
+
+  @Override
   public synchronized void closeOnDiskFile(FileChunk file) {
     //including only path & offset for valdiations.
     for (FileChunk fileChunk : onDiskMapOutputs) {
@@ -726,7 +731,7 @@ public class MergeManager implements FetchedInputAllocatorOrderedGrouped {
       // All disk writes done by this merge are overhead - due to the lack of
       // adequate memory to keep all segments in memory.
       outputPath = mapOutputFile.getInputFileForWrite(
-          srcTaskIdentifier.getInputIdentifier().getInputIndex(), srcTaskIdentifier.getSpillEventId(),
+          srcTaskIdentifier.getInputIdentifier(), srcTaskIdentifier.getSpillEventId(),
           mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX);
 
       Writer writer = null;
@@ -863,7 +868,7 @@ public class MergeManager implements FetchedInputAllocatorOrderedGrouped {
       if (file0.isLocalFile()) {
         // This is setup the same way a type DISK MapOutput is setup when fetching.
         namePart = mapOutputFile.getSpillFileName(
-            file0.getInputAttemptIdentifier().getInputIdentifier().getInputIndex(),
+            file0.getInputAttemptIdentifier().getInputIdentifier(),
             file0.getInputAttemptIdentifier().getSpillEventId());
       } else {
         namePart = file0.getPath().getName().toString();
@@ -1032,7 +1037,7 @@ public class MergeManager implements FetchedInputAllocatorOrderedGrouped {
     long inMemToDiskBytes = 0;
     boolean mergePhaseFinished = false;
     if (inMemoryMapOutputs.size() > 0) {
-      int srcTaskId = inMemoryMapOutputs.get(0).getAttemptIdentifier().getInputIdentifier().getInputIndex();
+      int srcTaskId = inMemoryMapOutputs.get(0).getAttemptIdentifier().getInputIdentifier();
       inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs,
                                                 memDiskSegments,
                                                 this.postMergeMemLimit);

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
index f8c9553..6e6d967 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java
@@ -37,7 +37,6 @@ import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputFailedEvent;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
 import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto;
 
@@ -170,7 +169,7 @@ public class ShuffleInputEventHandlerOrderedGrouped implements ShuffleEventHandl
       InputAttemptIdentifier.SPILL_INFO info = (lastEvent) ? InputAttemptIdentifier.SPILL_INFO
           .FINAL_UPDATE : InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE;
       srcAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(dmEvent.getTargetIndex()), dmEvent
+          new InputAttemptIdentifier(dmEvent.getTargetIndex(), dmEvent
               .getVersion(), pathComponent, false, info, spillEventId);
     } else {
       srcAttemptIdentifier =

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java
index dcfb274..8cba2a6 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java
@@ -71,7 +71,6 @@ import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.events.InputReadErrorEvent;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.TezRuntimeUtils;
 import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
 import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput.Type;
@@ -111,7 +110,7 @@ class ShuffleScheduler {
   //To track shuffleInfo events when finalMerge is disabled in source or pipelined shuffle is
   // enabled in source.
   @VisibleForTesting
-  final Map<InputIdentifier, ShuffleEventInfo> pipelinedShuffleInfoEventsMap;
+  final Map<Integer, ShuffleEventInfo> pipelinedShuffleInfoEventsMap;
 
   @VisibleForTesting
   final Set<MapHost> pendingHosts = new HashSet<MapHost>();
@@ -349,7 +348,7 @@ class ShuffleScheduler {
     this.firstEventReceived = inputContext.getCounters().findCounter(TaskCounter.FIRST_EVENT_RECEIVED);
     this.lastEventReceived = inputContext.getCounters().findCounter(TaskCounter.LAST_EVENT_RECEIVED);
 
-    pipelinedShuffleInfoEventsMap = new HashMap<InputIdentifier, ShuffleEventInfo>();
+    pipelinedShuffleInfoEventsMap = new HashMap<Integer, ShuffleEventInfo>();
     LOG.info("ShuffleScheduler running for sourceVertex: "
         + inputContext.getSourceVertexName() + " with configuration: "
         + "maxFetchFailuresBeforeReporting=" + maxFetchFailuresBeforeReporting
@@ -429,7 +428,7 @@ class ShuffleScheduler {
 
 
     ShuffleEventInfo(InputAttemptIdentifier input) {
-      this.id = input.getInputIdentifier().getInputIndex() + "_" + input.getAttemptNumber();
+      this.id = input.getInputIdentifier() + "_" + input.getAttemptNumber();
       this.eventsProcessed = new BitSet();
       this.attemptNum = input.getAttemptNumber();
     }
@@ -467,7 +466,7 @@ class ShuffleScheduler {
                                          ) throws IOException {
 
     inputContext.notifyProgress();
-    if (!isInputFinished(srcAttemptIdentifier.getInputIdentifier().getInputIndex())) {
+    if (!isInputFinished(srcAttemptIdentifier.getInputIdentifier())) {
       if (!isLocalFetch) {
         /**
          * Reset it only when it is a non-local-disk copy.
@@ -505,10 +504,10 @@ class ShuffleScheduler {
        */
       if (!srcAttemptIdentifier.canRetrieveInputInChunks()) {
         remainingMaps.decrementAndGet();
-        setInputFinished(srcAttemptIdentifier.getInputIdentifier().getInputIndex());
+        setInputFinished(srcAttemptIdentifier.getInputIdentifier());
         numFetchedSpills++;
       } else {
-        InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
+        int inputIdentifier = srcAttemptIdentifier.getInputIdentifier();
         //Allow only one task attempt to proceed.
         if (!validateInputAttemptForPipelinedShuffle(srcAttemptIdentifier)) {
           return;
@@ -533,7 +532,7 @@ class ShuffleScheduler {
         //check if we downloaded all spills pertaining to this InputAttemptIdentifier
         if (eventInfo.isDone()) {
           remainingMaps.decrementAndGet();
-          setInputFinished(inputIdentifier.getInputIndex());
+          setInputFinished(inputIdentifier);
           pipelinedShuffleInfoEventsMap.remove(inputIdentifier);
           if (LOG.isTraceEnabled()) {
             LOG.trace("Removing : " + srcAttemptIdentifier + ", pending: " +
@@ -560,7 +559,7 @@ class ShuffleScheduler {
       if (LOG.isDebugEnabled()) {
         LOG.debug("src task: "
             + TezRuntimeUtils.getTaskAttemptIdentifier(
-                inputContext.getSourceVertexName(), srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
+                inputContext.getSourceVertexName(), srcAttemptIdentifier.getInputIdentifier(),
                 srcAttemptIdentifier.getAttemptNumber()) + " done");
       }
     } else {
@@ -679,7 +678,7 @@ class ShuffleScheduler {
       String errorMsg = "Failed " + attemptFailures + " times trying to "
           + "download from " + TezRuntimeUtils.getTaskAttemptIdentifier(
           inputContext.getSourceVertexName(),
-          srcAttempt.getInputIdentifier().getInputIndex(),
+          srcAttempt.getInputIdentifier(),
           srcAttempt.getAttemptNumber()) + ". threshold=" + abortFailureLimit;
       IOException ioe = new IOException(errorMsg);
       // Shuffle knows how to deal with failures post shutdown via the onFailure hook
@@ -738,15 +737,15 @@ class ShuffleScheduler {
         srcNameTrimmed + ": " + "Reporting fetch failure for InputIdentifier: "
             + srcAttempt + " taskAttemptIdentifier: " + TezRuntimeUtils
             .getTaskAttemptIdentifier(inputContext.getSourceVertexName(),
-                srcAttempt.getInputIdentifier().getInputIndex(),
+                srcAttempt.getInputIdentifier(),
                 srcAttempt.getAttemptNumber()) + " to AM.");
     List<Event> failedEvents = Lists.newArrayListWithCapacity(1);
     failedEvents.add(InputReadErrorEvent.create(
         "Fetch failure for " + TezRuntimeUtils
             .getTaskAttemptIdentifier(inputContext.getSourceVertexName(),
-                srcAttempt.getInputIdentifier().getInputIndex(),
+                srcAttempt.getInputIdentifier(),
                 srcAttempt.getAttemptNumber()) + " to jobtracker.",
-        srcAttempt.getInputIdentifier().getInputIndex(),
+        srcAttempt.getInputIdentifier(),
         srcAttempt.getAttemptNumber()));
 
     inputContext.sendEvents(failedEvents);
@@ -1014,7 +1013,7 @@ class ShuffleScheduler {
   
   private boolean inputShouldBeConsumed(InputAttemptIdentifier id) {
     return (!obsoleteInputs.contains(id) && 
-             !isInputFinished(id.getInputIdentifier().getInputIndex()));
+             !isInputFinished(id.getInputIdentifier()));
   }
 
   public synchronized List<InputAttemptIdentifier> getMapsForHost(MapHost host) {
@@ -1029,7 +1028,7 @@ class ShuffleScheduler {
       // This may be removed after TEZ-914
       InputAttemptIdentifier id = listItr.next();
       if (inputShouldBeConsumed(id)) {
-        Integer inputNumber = Integer.valueOf(id.getInputIdentifier().getInputIndex());
+        Integer inputNumber = Integer.valueOf(id.getInputIdentifier());
         List<InputAttemptIdentifier> oldIdList = dedupedList.get(inputNumber);
 
         if (oldIdList == null || oldIdList.isEmpty()) {

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java
index 20f44dd..a99eb5e 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java
@@ -496,7 +496,7 @@ public class IFile {
 
     protected byte[] buffer = null;
     protected int bufferSize = DEFAULT_BUFFER_SIZE;
-    protected DataInputStream dataIn;
+    protected DataInputStream dataIn = null;
 
     protected int recNo = 1;
     protected int originalKeyLength;
@@ -583,7 +583,9 @@ public class IFile {
         this.in = null;
       }
 
-      this.dataIn = new DataInputStream(this.in);
+      if (in != null) {
+        this.dataIn = new DataInputStream(this.in);
+      }
       this.readRecordsCounter = readsCounter;
       this.bytesReadCounter = bytesReadCounter;
       this.fileLength = length;

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
index 917dbcb..0aa112e 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java
@@ -46,7 +46,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord;
 import org.junit.Assert;
 import org.junit.Test;
@@ -236,36 +235,36 @@ public class TestFetcher {
   @Test(timeout=5000)
   public void testInputAttemptIdentifierMap() {
     InputAttemptIdentifier[] srcAttempts = {
-        new InputAttemptIdentifier(new InputIdentifier(0), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
+        new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
             //duplicate entry
-        new InputAttemptIdentifier(new InputIdentifier(0), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
+        new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
         // pipeline shuffle based identifiers, with multiple attempts
-        new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+        new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-        new InputAttemptIdentifier(new InputIdentifier(1), 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+        new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-        new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
+        new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1),
-        new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+        new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
             false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 2),
-        new InputAttemptIdentifier(new InputIdentifier(2), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+        new InputAttemptIdentifier(2, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
             false, InputAttemptIdentifier.SPILL_INFO.FINAL_MERGE_ENABLED, 0)
     };
     InputAttemptIdentifier[] expectedSrcAttempts = {
-        new InputAttemptIdentifier(new InputIdentifier(0), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
+        new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
         // pipeline shuffle based identifiers
-        new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+        new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-        new InputAttemptIdentifier(new InputIdentifier(1), 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+        new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-        new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
+        new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
             false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1),
-        new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+        new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
             false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 2),
-        new InputAttemptIdentifier(new InputIdentifier(2), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+        new InputAttemptIdentifier(2, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
             false, InputAttemptIdentifier.SPILL_INFO.FINAL_MERGE_ENABLED, 0)
     };
     TezConfiguration conf = new TezConfiguration();

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
index c452898..5bbf0fb 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java
@@ -53,7 +53,6 @@ import org.apache.tez.runtime.api.ExecutionContext;
 import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator;
 import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils;
 import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto;
@@ -215,7 +214,7 @@ public class TestShuffleInputEventHandlerImpl {
     Event dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 0);
     handler.handleEvents(Collections.singletonList(dme));
 
-    InputAttemptIdentifier expectedId1 = new InputAttemptIdentifier(new InputIdentifier(1), 0,
+    InputAttemptIdentifier expectedId1 = new InputAttemptIdentifier(1, 0,
         PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
     verify(shuffleManager, times(1)).addKnownInput(eq(HOST), eq(PORT), eq(expectedId1), eq(0));
 
@@ -223,7 +222,7 @@ public class TestShuffleInputEventHandlerImpl {
     dme = createDataMovementEvent(true, 0, 1, 1, false, new BitSet(), 4, 0);
     handler.handleEvents(Collections.singletonList(dme));
 
-    InputAttemptIdentifier expectedId2 = new InputAttemptIdentifier(new InputIdentifier(1), 0,
+    InputAttemptIdentifier expectedId2 = new InputAttemptIdentifier(1, 0,
         PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1);
     verify(shuffleManager, times(2)).addKnownInput(eq(HOST), eq(PORT), eq(expectedId2), eq(0));
 
@@ -252,7 +251,7 @@ public class TestShuffleInputEventHandlerImpl {
     Event dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 1);
     handler.handleEvents(Collections.singletonList(dme));
 
-    InputAttemptIdentifier expected = new InputAttemptIdentifier(new InputIdentifier(1), 1,
+    InputAttemptIdentifier expected = new InputAttemptIdentifier(1, 1,
         PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1);
     verify(shuffleManager, times(1)).addKnownInput(eq(HOST), eq(PORT), eq(expected), eq(0));
 
@@ -283,14 +282,14 @@ public class TestShuffleInputEventHandlerImpl {
     Event dme = createDataMovementEvent(true, 0, 1, 0, false, bitSet, 4, 0);
     handler.handleEvents(Collections.singletonList(dme));
 
-    InputAttemptIdentifier expected = new InputAttemptIdentifier(new InputIdentifier(1), 0,
+    InputAttemptIdentifier expected = new InputAttemptIdentifier(1, 0,
         PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
     verify(shuffleManager, times(1)).addCompletedInputWithNoData(expected);
 
     //0--> 1 with spill id 1 (attemptNum 0)
     handler.handleEvents(Collections.singletonList(dme));
     dme = createDataMovementEvent(true, 0, 1, 1, false, new BitSet(), 4, 0);
-    expected = new InputAttemptIdentifier(new InputIdentifier(1), 0,
+    expected = new InputAttemptIdentifier(1, 0,
         PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1);
     verify(shuffleManager, times(2)).addCompletedInputWithNoData(expected);
 

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
index faa2d31..20fb9a9 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java
@@ -53,7 +53,6 @@ import com.google.common.collect.Lists;
 import org.apache.tez.http.HttpConnection;
 import org.apache.tez.http.HttpConnectionParams;
 import org.apache.tez.common.counters.TezCounter;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -116,7 +115,7 @@ public class TestFetcher {
     doReturn("src vertex").when(inputContext).getSourceVertexName();
 
     MapHost mapHost = new MapHost(0, HOST + ":" + PORT, "baseurl");
-    InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(new InputIdentifier(0), 0, "attempt");
+    InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(0, 0, "attempt");
     mapHost.addKnownMap(inputAttemptIdentifier);
     List<InputAttemptIdentifier> mapsForHost = Lists.newArrayList(inputAttemptIdentifier);
     doReturn(mapsForHost).when(scheduler).getMapsForHost(mapHost);
@@ -484,36 +483,36 @@ public class TestFetcher {
   @Test(timeout = 5000)
   public void testInputAttemptIdentifierMap() {
     InputAttemptIdentifier[] srcAttempts = {
-      new InputAttemptIdentifier(new InputIdentifier(0), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
+      new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
           //duplicate entry
-      new InputAttemptIdentifier(new InputIdentifier(0), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
+      new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
       // pipeline shuffle based identifiers, with multiple attempts
-      new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+      new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-      new InputAttemptIdentifier(new InputIdentifier(1), 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+      new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-      new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
+      new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1),
-      new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+      new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
           false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 2),
-      new InputAttemptIdentifier(new InputIdentifier(2), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+      new InputAttemptIdentifier(2, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
           false, InputAttemptIdentifier.SPILL_INFO.FINAL_MERGE_ENABLED, 0)
     };
     InputAttemptIdentifier[] expectedSrcAttempts = {
-      new InputAttemptIdentifier(new InputIdentifier(0), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
+      new InputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
       // pipeline shuffle based identifiers
-      new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+      new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-      new InputAttemptIdentifier(new InputIdentifier(1), 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
+      new InputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0),
-      new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
+      new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2",
           false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 1),
-      new InputAttemptIdentifier(new InputIdentifier(1), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+      new InputAttemptIdentifier(1, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
           false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 2),
-      new InputAttemptIdentifier(new InputIdentifier(2), 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
+      new InputAttemptIdentifier(2, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3",
           false, InputAttemptIdentifier.SPILL_INFO.FINAL_MERGE_ENABLED, 0)
     };
 

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java
index 88a1d20..de066fe 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java
@@ -17,7 +17,6 @@ import org.apache.tez.runtime.api.events.DataMovementEvent;
 import org.apache.tez.runtime.api.events.InputFailedEvent;
 import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads;
 import org.junit.Before;
 import org.junit.Test;
@@ -165,7 +164,7 @@ public class TestShuffleInputEventHandlerOrderedGrouped {
     int inputIdx = 0;
     Event dme1 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, true, 0);
     InputAttemptIdentifier id1 =
-        new InputAttemptIdentifier(new InputIdentifier(inputIdx), attemptNum,
+        new InputAttemptIdentifier(inputIdx, attemptNum,
             PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
     handler.handleEvents(Collections.singletonList(dme1));
     String baseUri = handler.getBaseURI(HOST, PORT, attemptNum).toString();
@@ -176,7 +175,7 @@ public class TestShuffleInputEventHandlerOrderedGrouped {
     //Send final_update event.
     Event dme2 = createDataMovementEvent(attemptNum, inputIdx, null, false, true, false, 1);
     InputAttemptIdentifier id2 =
-        new InputAttemptIdentifier(new InputIdentifier(inputIdx), attemptNum,
+        new InputAttemptIdentifier(inputIdx, attemptNum,
             PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 1);
     handler.handleEvents(Collections.singletonList(dme2));
     baseUri = handler.getBaseURI(HOST, PORT, attemptNum).toString();
@@ -202,14 +201,14 @@ public class TestShuffleInputEventHandlerOrderedGrouped {
     inputIdx = 1;
     Event dme3 = createDataMovementEvent(attemptNum, inputIdx, null, false, true,
         true, 1);
-    InputAttemptIdentifier id3 = new InputAttemptIdentifier(new InputIdentifier(inputIdx),
+    InputAttemptIdentifier id3 = new InputAttemptIdentifier(inputIdx,
         attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE,
         0);
     handler.handleEvents(Collections.singletonList(dme3));
     //Send final_update event (empty partition directly invoking copySucceeded).
-    InputAttemptIdentifier id4 = new InputAttemptIdentifier(new InputIdentifier(inputIdx),
+    InputAttemptIdentifier id4 = new InputAttemptIdentifier(inputIdx,
         attemptNum, PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.FINAL_UPDATE, 1);
-    assertTrue(!scheduler.isInputFinished(id4.getInputIdentifier().getInputIndex()));
+    assertTrue(!scheduler.isInputFinished(id4.getInputIdentifier()));
     scheduler.copySucceeded(id4, null, 0, 0, 0, null, false);
     assertTrue(!scheduler.isDone()); //we haven't downloaded another id yet
     //Let the incremental event pass
@@ -229,7 +228,7 @@ public class TestShuffleInputEventHandlerOrderedGrouped {
     handler.handleEvents(Collections.singletonList(dme1));
 
     InputAttemptIdentifier id1 =
-        new InputAttemptIdentifier(new InputIdentifier(inputIdx), attemptNum,
+        new InputAttemptIdentifier(inputIdx, attemptNum,
             PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
 
     verify(scheduler, times(1)).addKnownMapOutput(eq(HOST), eq(PORT), eq(1), eq(baseUri), eq(id1));
@@ -243,7 +242,7 @@ public class TestShuffleInputEventHandlerOrderedGrouped {
     handler.handleEvents(Collections.singletonList(dme2));
 
     InputAttemptIdentifier id2 =
-        new InputAttemptIdentifier(new InputIdentifier(inputIdx), attemptNum,
+        new InputAttemptIdentifier(inputIdx, attemptNum,
             PATH_COMPONENT, false, InputAttemptIdentifier.SPILL_INFO.INCREMENTAL_UPDATE, 0);
     verify(scheduler, times(1)).reportExceptionForInput(any(IOException.class));
   }
@@ -329,4 +328,4 @@ public class TestShuffleInputEventHandlerOrderedGrouped {
     }
     return TezCommonUtils.compressByteArrayToByteString(TezUtilsInternal.toByteArray(bitSet));
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/3ff360aa/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java
index 1a6c3be..f7ef309 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java
@@ -50,7 +50,6 @@ import org.apache.tez.runtime.api.InputContext;
 import org.apache.tez.runtime.api.impl.ExecutionContextImpl;
 import org.apache.tez.runtime.library.api.TezRuntimeConfiguration;
 import org.apache.tez.runtime.library.common.InputAttemptIdentifier;
-import org.apache.tez.runtime.library.common.InputIdentifier;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
@@ -89,7 +88,7 @@ public class TestShuffleScheduler {
       // Schedule all copies.
       for (int i = 0; i < numInputs; i++) {
         InputAttemptIdentifier inputAttemptIdentifier =
-            new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+            new InputAttemptIdentifier(i, 0, "attempt_");
         scheduler.addKnownMapOutput("host" + i, 10000, 1, "hostUrl", inputAttemptIdentifier);
         identifiers[i] = inputAttemptIdentifier;
       }
@@ -134,7 +133,7 @@ public class TestShuffleScheduler {
 
       for (int i = 0; i < numInputs; i++) {
         InputAttemptIdentifier inputAttemptIdentifier =
-            new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+            new InputAttemptIdentifier(i, 0, "attempt_");
         scheduler.addKnownMapOutput("host" + i, 10000, 1, "hostUrl", inputAttemptIdentifier);
         identifiers[i] = inputAttemptIdentifier;
       }
@@ -191,7 +190,7 @@ public class TestShuffleScheduler {
     //Generate 320 events
     for (int i = 0; i < 320; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes),
           10000, i, "hostUrl", inputAttemptIdentifier);
     }
@@ -199,7 +198,7 @@ public class TestShuffleScheduler {
     //100 succeeds
     for (int i = 0; i < 100; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -210,14 +209,14 @@ public class TestShuffleScheduler {
     //99 fails
     for (int i = 100; i < 199; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
           + ":" + 10000, ""), false, true, false);
     }
 
 
     InputAttemptIdentifier inputAttemptIdentifier =
-        new InputAttemptIdentifier(new InputIdentifier(200), 0, "attempt_");
+        new InputAttemptIdentifier(200, 0, "attempt_");
 
     //Should fail here and report exception as reducer is not healthy
     scheduler.copyFailed(inputAttemptIdentifier, new MapHost(200, "host" + (200 %
@@ -260,7 +259,7 @@ public class TestShuffleScheduler {
     //Generate 0-200 events
     for (int i = 0; i < 200; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes),
           10000, i, "hostUrl", inputAttemptIdentifier);
     }
@@ -269,7 +268,7 @@ public class TestShuffleScheduler {
     //Generate 200-320 events with empty partitions
     for (int i = 200; i < 320; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copySucceeded(inputAttemptIdentifier, null, 0, 0, 0, null, true);
     }
     //120 are successful. so remaining is 200
@@ -279,7 +278,7 @@ public class TestShuffleScheduler {
     //200 pending to be downloaded. Download 190.
     for (int i = 0; i < 190; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -292,7 +291,7 @@ public class TestShuffleScheduler {
     //10 fails
     for (int i = 190; i < 200; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
           + ":" + 10000, ""), false, true, false);
     }
@@ -304,7 +303,7 @@ public class TestShuffleScheduler {
     scheduler.lastProgressTime = System.currentTimeMillis() - 250000;
 
     InputAttemptIdentifier inputAttemptIdentifier =
-        new InputAttemptIdentifier(new InputIdentifier(190), 0, "attempt_");
+        new InputAttemptIdentifier(190, 0, "attempt_");
     scheduler.copyFailed(inputAttemptIdentifier, new MapHost(190, "host" +
         (190 % totalProducerNodes)
         + ":" + 10000, ""), false, true, false);
@@ -317,7 +316,7 @@ public class TestShuffleScheduler {
     //fail to download 50 more times across attempts
     for (int i = 190; i < 200; i++) {
       inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
           + ":" + 10000, ""), false, true, false);
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
@@ -338,7 +337,7 @@ public class TestShuffleScheduler {
     //fail another 30
     for (int i = 110; i < 120; i++) {
       inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
           + ":" + 10000, ""), false, true, false);
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
@@ -376,7 +375,7 @@ public class TestShuffleScheduler {
     //Generate 320 events
     for (int i = 0; i < 320; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes),
           10000, i, "hostUrl", inputAttemptIdentifier);
     }
@@ -384,7 +383,7 @@ public class TestShuffleScheduler {
     //319 succeeds
     for (int i = 0; i < 319; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -394,7 +393,7 @@ public class TestShuffleScheduler {
 
     //1 fails (last fetch)
     InputAttemptIdentifier inputAttemptIdentifier =
-        new InputAttemptIdentifier(new InputIdentifier(319), 0, "attempt_");
+        new InputAttemptIdentifier(319, 0, "attempt_");
     scheduler.copyFailed(inputAttemptIdentifier, new MapHost(319, "host" + (319 % totalProducerNodes)
         + ":" + 10000, ""), false, true, false);
 
@@ -441,7 +440,7 @@ public class TestShuffleScheduler {
     //Generate 320 events
     for (int i = 0; i < 320; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes),
           10000, i, "hostUrl", inputAttemptIdentifier);
     }
@@ -449,7 +448,7 @@ public class TestShuffleScheduler {
     //Tasks fail in 20% of nodes 3 times, but are able to proceed further
     for (int i = 0; i < 64; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
 
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i %
           totalProducerNodes) + ":" + 10000, ""), false, true, false);
@@ -470,7 +469,7 @@ public class TestShuffleScheduler {
       //319 succeeds
     for (int i = 64; i < 319; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -480,7 +479,7 @@ public class TestShuffleScheduler {
 
     //1 fails (last fetch)
     InputAttemptIdentifier inputAttemptIdentifier =
-        new InputAttemptIdentifier(new InputIdentifier(319), 0, "attempt_");
+        new InputAttemptIdentifier(319, 0, "attempt_");
     scheduler.copyFailed(inputAttemptIdentifier, new MapHost(319, "host" + (319 % totalProducerNodes)
         + ":" + 10000, ""), false, true, false);
 
@@ -536,7 +535,7 @@ public class TestShuffleScheduler {
     //Generate 319 events (last event has not arrived)
     for (int i = 0; i < 319; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes),
           10000, i, "hostUrl", inputAttemptIdentifier);
     }
@@ -544,7 +543,7 @@ public class TestShuffleScheduler {
     //318 succeeds
     for (int i = 0; i < 319; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -554,7 +553,7 @@ public class TestShuffleScheduler {
 
     //1 fails (last fetch)
     InputAttemptIdentifier inputAttemptIdentifier =
-        new InputAttemptIdentifier(new InputIdentifier(318), 0, "attempt_");
+        new InputAttemptIdentifier(318, 0, "attempt_");
     scheduler.copyFailed(inputAttemptIdentifier, new MapHost(318, "host" + (318 % totalProducerNodes)
         + ":" + 10000, ""), false, true, false);
 
@@ -615,7 +614,7 @@ public class TestShuffleScheduler {
     //Generate 320 events (last event has not arrived)
     for (int i = 0; i < 320; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes),
           10000, i, "hostUrl", inputAttemptIdentifier);
     }
@@ -623,7 +622,7 @@ public class TestShuffleScheduler {
     //10 succeeds
     for (int i = 0; i < 10; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -634,7 +633,7 @@ public class TestShuffleScheduler {
     //5 fetches fail once
     for (int i = 10; i < 15; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
           + ":" + 10000, ""), false, true, false);
     }
@@ -648,7 +647,7 @@ public class TestShuffleScheduler {
     //5 fetches fail repeatedly
     for (int i = 10; i < 15; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
           + ":" + 10000, ""), false, true, false);
       scheduler.copyFailed(inputAttemptIdentifier, new MapHost(i, "host" + (i % totalProducerNodes)
@@ -691,7 +690,7 @@ public class TestShuffleScheduler {
     //Generate 320 events
     for (int i = 0; i < 320; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.addKnownMapOutput("host" + (i % totalProducerNodes), 10000, i,
           "hostUrl", inputAttemptIdentifier);
     }
@@ -699,7 +698,7 @@ public class TestShuffleScheduler {
     //100 succeeds
     for (int i = 0; i < 100; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       MapOutput mapOutput = MapOutput
           .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class),
               100, false);
@@ -711,7 +710,7 @@ public class TestShuffleScheduler {
     //99 fails
     for (int i = 100; i < 199; i++) {
       InputAttemptIdentifier inputAttemptIdentifier =
-          new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+          new InputAttemptIdentifier(i, 0, "attempt_");
       scheduler.copyFailed(inputAttemptIdentifier,
           new MapHost(i, "host" + (i % totalProducerNodes) + ":" + 10000, ""),
           false, true, false);
@@ -754,7 +753,7 @@ public class TestShuffleScheduler {
     final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 1, shuffle);
 
     InputAttemptIdentifier inputAttemptIdentifier =
-        new InputAttemptIdentifier(new InputIdentifier(0), 0, "attempt_");
+        new InputAttemptIdentifier(0, 0, "attempt_");
     scheduler.addKnownMapOutput("host0", 10000, 0, "hostUrl", inputAttemptIdentifier);
 
     assertTrue(scheduler.pendingHosts.size() == 1);
@@ -801,7 +800,7 @@ public class TestShuffleScheduler {
 
       for (int i = 0; i < numInputs; i++) {
         InputAttemptIdentifier inputAttemptIdentifier =
-            new InputAttemptIdentifier(new InputIdentifier(i), 0, "attempt_");
+            new InputAttemptIdentifier(i, 0, "attempt_");
         scheduler.addKnownMapOutput("host" + i, 10000, 1, "hostUrl", inputAttemptIdentifier);
         identifiers[i] = inputAttemptIdentifier;
       }


[10/24] tez git commit: TEZ-2307. Possible wrong error message when submitting new dag (zjffdu)

Posted by sr...@apache.org.
TEZ-2307. Possible wrong error message when submitting new dag (zjffdu)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/235841f7
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/235841f7
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/235841f7

Branch: refs/heads/TEZ-2980
Commit: 235841f77ebf88994c8d7af189cf1000aedbd69f
Parents: 72f5616
Author: Jeff Zhang <zj...@apache.org>
Authored: Tue Feb 2 13:21:45 2016 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Tue Feb 2 15:51:20 2016 +0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 ++
 .../org/apache/tez/dag/app/DAGAppMaster.java    | 33 ++++++++++++++------
 2 files changed, 26 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/235841f7/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 6bff146..c4c04e8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2307. Possible wrong error message when submitting new dag
   TEZ-2974. Tez tools: TFileRecordReader in tez-tools should support reading >2 GB tfiles.
   TEZ-3081. Update tez website for trademarks feedback.
   TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
@@ -325,6 +326,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-2307. Possible wrong error message when submitting new dag.
   TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
   TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.
   TEZ-3036. Tez AM can hang on startup with no indication of error

http://git-wip-us.apache.org/repos/asf/tez/blob/235841f7/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index c16bdb9..579d23f 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -280,6 +280,7 @@ public class DAGAppMaster extends AbstractService {
   private final UserGroupInformation appMasterUgi;
 
   private AtomicBoolean sessionStopped = new AtomicBoolean(false);
+  private final Object idleStateLock = new Object();
   private long sessionTimeoutInterval;
   private long lastDAGCompletionTime;
   private Timer dagSubmissionTimer;
@@ -811,7 +812,6 @@ public class DAGAppMaster extends AbstractService {
             // Leaving the taskSchedulerEventHandler here for now. Doesn't generate new events.
             // However, eventually it needs to be moved out.
             this.taskSchedulerManager.dagCompleted();
-            state = DAGAppMasterState.IDLE;
           } else {
             LOG.info("Session shutting down now.");
             this.taskSchedulerManager.setShouldUnregisterFlag();
@@ -851,6 +851,10 @@ public class DAGAppMaster extends AbstractService {
       TezDAGID.clearCache();
       LOG.info("Completed cleanup for DAG: name=" + cleanupEvent.getDag().getName() + ", with id=" +
           cleanupEvent.getDag().getID());
+      synchronized (idleStateLock) {
+        state = DAGAppMasterState.IDLE;
+        idleStateLock.notify();
+      }
       break;
     case NEW_DAG_SUBMITTED:
       // Inform sub-components that a new DAG has been submitted.
@@ -1331,21 +1335,33 @@ public class DAGAppMaster extends AbstractService {
       throw new SessionNotRunning("AM unable to accept new DAG submissions."
           + " In the process of shutting down");
     }
+
+    // dag is in cleanup when dag state is completed but AM state is still RUNNING
+    synchronized (idleStateLock) {
+      while (currentDAG != null && currentDAG.isComplete() && state == DAGAppMasterState.RUNNING) {
+        try {
+          LOG.info("wait for previous dag cleanup");
+          idleStateLock.wait();
+        } catch (InterruptedException e) {
+          throw new TezException(e);
+        }
+      }
+    }
+
     synchronized (this) {
       if (this.versionMismatch) {
         throw new TezException("Unable to accept DAG submissions as the ApplicationMaster is"
             + " incompatible with the client. " + versionMismatchDiagnostics);
       }
-      if (currentDAG != null
-          && !state.equals(DAGAppMasterState.IDLE)) {
-        throw new TezException("App master already running a DAG");
-      }
       if (state.equals(DAGAppMasterState.ERROR)
-          || sessionStopped.get()) {
+              || sessionStopped.get()) {
         throw new SessionNotRunning("AM unable to accept new DAG submissions."
-            + " In the process of shutting down");
+                + " In the process of shutting down");
+      }
+      if (currentDAG != null
+          && !currentDAG.isComplete()) {
+        throw new TezException("App master already running a DAG");
       }
-
       // RPC server runs in the context of the job user as it was started in
       // the job user's UGI context
       LOG.info("Starting DAG submitted via RPC: " + dagPlan.getName());
@@ -2445,7 +2461,6 @@ public class DAGAppMaster extends AbstractService {
     }
 
     startDAGExecution(newDAG, lrDiff);
-
     // set state after curDag is set
     this.state = DAGAppMasterState.RUNNING;
   }


[06/24] tez git commit: TEZ-3079. Fix tez-tfile parser documentation (rbalamohan)

Posted by sr...@apache.org.
TEZ-3079. Fix tez-tfile parser documentation (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/7e636a5e
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/7e636a5e
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/7e636a5e

Branch: refs/heads/TEZ-2980
Commit: 7e636a5e9661bde62cc0cb4364ad4febc659dcb3
Parents: 2bf27de
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Fri Jan 29 20:25:39 2016 +0530
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Fri Jan 29 20:25:39 2016 +0530

----------------------------------------------------------------------
 CHANGES.txt                           | 1 +
 tez-tools/tez-tfile-parser/README.txt | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/7e636a5e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 91c86f8..d69390c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3079. Fix tez-tfile parser documentation.
   TEZ-3066. TaskAttemptFinishedEvent ConcurrentModificationException in recovery or history logging services.
   TEZ-3036. Tez AM can hang on startup with no indication of error
   TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED

http://git-wip-us.apache.org/repos/asf/tez/blob/7e636a5e/tez-tools/tez-tfile-parser/README.txt
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/README.txt b/tez-tools/tez-tfile-parser/README.txt
index 5b18767..f82380d 100644
--- a/tez-tools/tez-tfile-parser/README.txt
+++ b/tez-tools/tez-tfile-parser/README.txt
@@ -16,7 +16,9 @@ set pig.splitCombination false;
 set tez.grouping.min-size 52428800;
 set tez.grouping.max-size 52428800;
 
-register 'tfile-parser-1.0-SNAPSHOT.jar';
+/* Register all tez jars. Replace $TEZ_HOME, $TEZ_TFILE_DIR with absolute path */
+register '$TEZ_HOME/*.jar';
+register '$TEZ_TFILE_DIR/tfile-parser-1.0-SNAPSHOT.jar';
 raw = load '/app-logs/root/logs/application_1411511669099_0769/*' using org.apache.tez.tools.TFileLoader() as (machine:chararray, key:chararray, line:chararray);
 filterByLine = FILTER raw BY (key MATCHES '.*container_1411511669099_0769_01_000001.*')
                    AND (line MATCHES '.*Shuffle.*');


[11/24] tez git commit: TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs (rbalamohan)

Posted by sr...@apache.org.
TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/89bc6abf
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/89bc6abf
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/89bc6abf

Branch: refs/heads/TEZ-2980
Commit: 89bc6abf6c3caddb6224a19b7b47010b7bca4eff
Parents: 235841f
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Tue Feb 2 03:56:46 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Tue Feb 2 03:56:46 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../plugins/TaskConcurrencyAnalyzer.java        | 24 ++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/89bc6abf/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c4c04e8..b7bb98a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.
   TEZ-2307. Possible wrong error message when submitting new dag
   TEZ-2974. Tez tools: TFileRecordReader in tez-tools should support reading >2 GB tfiles.
   TEZ-3081. Update tez website for trademarks feedback.

http://git-wip-us.apache.org/repos/asf/tez/blob/89bc6abf/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java
----------------------------------------------------------------------
diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java
index 070294f..72f3b36 100644
--- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java
+++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java
@@ -80,8 +80,28 @@ public class TaskConcurrencyAnalyzer extends TezAnalyzerBase implements Analyzer
        */
       TreeMultiset<TimeInfo> timeInfoSet = TreeMultiset.create(new Comparator<TimeInfo>() {
         @Override public int compare(TimeInfo o1, TimeInfo o2) {
-          return (o1.timestamp < o2.timestamp) ? -1 :
-              ((o1.timestamp == o2.timestamp) ? 0 : 1);
+          if (o1.timestamp < o2.timestamp) {
+            return -1;
+          }
+
+          if (o1.timestamp > o2.timestamp) {
+            return 1;
+          }
+
+          if (o1.timestamp == o2.timestamp) {
+            //check event type
+            if (o1.eventType.equals(o2.eventType)) {
+              return 0;
+            }
+
+            if (o1.eventType.equals(EventType.START)
+                && o2.eventType.equals(EventType.FINISH)) {
+              return -1;
+            } else {
+              return 1;
+            }
+          }
+          return 0;
         }
       });
 


[15/24] tez git commit: TEZ-3090. MRInput should make dagIdentifier, vertexIdentifier, etc available to the InputFormat jobConf. (Siddharth Seth)

Posted by sr...@apache.org.
TEZ-3090. MRInput should make dagIdentifier, vertexIdentifier, etc
available to the InputFormat jobConf. (Siddharth Seth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/99c85d3f
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/99c85d3f
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/99c85d3f

Branch: refs/heads/TEZ-2980
Commit: 99c85d3f95ac9bbee9c507c4efdc2757ea5b8542
Parents: f352cfb
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Feb 10 20:37:50 2016 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Feb 10 20:37:50 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../tez/mapreduce/hadoop/MRInputHelpers.java    | 124 +++++++++++++++
 .../org/apache/tez/mapreduce/input/MRInput.java |  16 +-
 .../tez/mapreduce/input/base/MRInputBase.java   |  12 ++
 .../apache/tez/mapreduce/input/TestMRInput.java | 151 +++++++++++++++++++
 .../tez/mapreduce/input/TestMultiMRInput.java   |   2 +
 6 files changed, 305 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/99c85d3f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 4d7ae6b..c769843 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3090. MRInput should make dagIdentifier, vertexIdentifier, etc available to the InputFormat jobConf.
   TEZ-3093. CriticalPathAnalyzer should be accessible via zeppelin.
   TEZ-3089. TaskConcurrencyAnalyzer can return negative task count with very large jobs.
   TEZ-2307. Possible wrong error message when submitting new dag

http://git-wip-us.apache.org/repos/asf/tez/blob/99c85d3f/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
index 30e4a8c..325e7b2 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java
@@ -35,6 +35,7 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.protobuf.ByteString;
 
+import org.apache.tez.runtime.api.InputContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -707,4 +708,127 @@ public class MRInputHelpers {
     return UserPayload.create(userPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());
   }
 
+
+  private static String getStringProperty(Configuration conf, String propertyName) {
+    Preconditions.checkNotNull(conf, "Configuration must be provided");
+    String propertyString = conf.get(propertyName);
+    Preconditions.checkNotNull(propertyString,
+        "Property " + propertyName + " not found in provided configuration");
+    return propertyString;
+  }
+
+  private static int getIntProperty(Configuration conf, String propertyName) {
+    return Integer.parseInt(getStringProperty(conf, propertyName));
+  }
+
+  /**
+   * @see {@link InputContext#getDagIdentifier}
+   * @param conf configuration instance
+   * @return dag index
+   */
+  @Public
+  public static int getDagIndex(Configuration conf) {
+    return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_INDEX);
+  }
+
+  /**
+   * * @see {@link InputContext#getTaskVertexIndex}
+   * @param conf configuration instance
+   * @return vertex index
+   */
+  @Public
+  public static int getVertexIndex(Configuration conf) {
+    return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_VERTEX_INDEX);
+  }
+
+  /**
+   * @see {@link InputContext#getTaskIndex}
+   * @param conf configuration instance
+   * @return task index
+   */
+  @Public
+  public static int getTaskIndex(Configuration conf) {
+    return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_TASK_INDEX);
+  }
+
+  /**
+   * @see {@link InputContext#getTaskAttemptNumber}
+   * @param conf configuration instance
+   * @return task attempt index
+   */
+  @Public
+  public static int getTaskAttemptIndex(Configuration conf) {
+    return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_TASK_ATTEMPT_INDEX);
+  }
+
+  /**
+   * @see {@link InputContext#getInputIndex}
+   * @param conf configuration instance
+   * @return input index
+   */
+  @Public
+  public static int getInputIndex(Configuration conf) {
+    return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_INPUT_INDEX);
+  }
+
+  /**
+   * @see {@link InputContext#getDAGName}
+   * @param conf configuration instance
+   * @return dag name
+   */
+  @Public
+  public static String getDagName(Configuration conf) {
+    return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_NAME);
+  }
+
+  /**
+   * @see {@link InputContext#getTaskVertexName}
+   * @param conf configuration instance
+   * @return vertex name
+   */
+  @Public
+  public static String getVertexName(Configuration conf) {
+    return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_VERTEX_NAME);
+  }
+
+  /**
+   * @see {@link InputContext#getSourceVertexName}
+   * @param conf configuration instance
+   * @return source name
+   */
+  @Public
+  public static String getInputName(Configuration conf) {
+    return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_INPUT_NAME);
+  }
+
+  /**
+   * @see {@link InputContext#getApplicationId}
+   * @param conf configuration instance
+   * @return applicationId as a string
+   */
+  @Public
+  public static String getApplicationIdString(Configuration conf) {
+    return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_APPLICATION_ID);
+  }
+
+  /**
+   * @see {@link InputContext#getUniqueIdentifier}
+   * @param conf configuration instance
+   * @return unique identifier for the input
+   */
+  @Public
+  public static String getUniqueIdentifier(Configuration conf) {
+    return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_UNIQUE_IDENTIFIER);
+  }
+
+  /**
+   * @see {@link InputContext#getDAGAttemptNumber}
+   * @param conf configuration instance
+   * @return attempt number
+   */
+  @Public
+  public static int getDagAttemptNumber(Configuration conf) {
+    return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/99c85d3f/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java
index b68d135..4a4ba86 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java
@@ -82,7 +82,21 @@ import com.google.common.collect.Lists;
  */
 @Public
 public class MRInput extends MRInputBase {
-  
+
+  @Private public static final String TEZ_MAPREDUCE_DAG_INDEX = "tez.mapreduce.dag.index";
+  @Private public static final String TEZ_MAPREDUCE_DAG_NAME = "tez.mapreduce.dag.name";
+  @Private public static final String TEZ_MAPREDUCE_VERTEX_INDEX = "tez.mapreduce.vertex.index";
+  @Private public static final String TEZ_MAPREDUCE_VERTEX_NAME = "tez.mapreduce.vertex.name";
+  @Private public static final String TEZ_MAPREDUCE_TASK_INDEX = "tez.mapreduce.task.index";
+  @Private public static final String TEZ_MAPREDUCE_TASK_ATTEMPT_INDEX = "tez.mapreduce.task.attempt.index";
+  @Private public static final String TEZ_MAPREDUCE_INPUT_INDEX = "tez.mapreduce.input.index";
+  @Private public static final String TEZ_MAPREDUCE_INPUT_NAME = "tez.mapreduce.input.name";
+  @Private public static final String TEZ_MAPREDUCE_APPLICATION_ID = "tez.mapreduce.application.id";
+  @Private public static final String TEZ_MAPREDUCE_UNIQUE_IDENTIFIER = "tez.mapreduce.unique.identifier";
+  @Private public static final String TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER = "tez.mapreduce.dag.attempt.number";
+
+
+
   /**
    * Helper class to configure {@link MRInput}
    *

http://git-wip-us.apache.org/repos/asf/tez/blob/99c85d3f/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java
index e4aa7e2..230f55e 100644
--- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java
+++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java
@@ -32,6 +32,7 @@ import org.apache.tez.common.counters.TaskCounter;
 import org.apache.tez.common.counters.TezCounter;
 import org.apache.tez.mapreduce.hadoop.MRInputHelpers;
 import org.apache.tez.mapreduce.hadoop.MRJobConfig;
+import org.apache.tez.mapreduce.input.MRInput;
 import org.apache.tez.mapreduce.protos.MRRuntimeProtos;
 import org.apache.tez.runtime.api.AbstractLogicalInput;
 import org.apache.tez.runtime.api.Event;
@@ -96,6 +97,17 @@ public abstract class MRInputBase extends AbstractLogicalInput {
         taskAttemptId.toString());
     jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
         getContext().getDAGAttemptNumber());
+    jobConf.setInt(MRInput.TEZ_MAPREDUCE_DAG_INDEX, getContext().getDagIdentifier());
+    jobConf.setInt(MRInput.TEZ_MAPREDUCE_VERTEX_INDEX, getContext().getTaskVertexIndex());
+    jobConf.setInt(MRInput.TEZ_MAPREDUCE_TASK_INDEX, getContext().getTaskIndex());
+    jobConf.setInt(MRInput.TEZ_MAPREDUCE_TASK_ATTEMPT_INDEX, getContext().getTaskAttemptNumber());
+    jobConf.set(MRInput.TEZ_MAPREDUCE_DAG_NAME, getContext().getDAGName());
+    jobConf.set(MRInput.TEZ_MAPREDUCE_VERTEX_NAME, getContext().getTaskVertexName());
+    jobConf.setInt(MRInput.TEZ_MAPREDUCE_INPUT_INDEX, getContext().getInputIndex());
+    jobConf.set(MRInput.TEZ_MAPREDUCE_INPUT_NAME, getContext().getSourceVertexName());
+    jobConf.set(MRInput.TEZ_MAPREDUCE_APPLICATION_ID, getContext().getApplicationId().toString());
+    jobConf.set(MRInput.TEZ_MAPREDUCE_UNIQUE_IDENTIFIER, getContext().getUniqueIdentifier());
+    jobConf.setInt(MRInput.TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER, getContext().getDAGAttemptNumber());
 
     this.inputRecordCounter = getContext().getCounters().findCounter(
         TaskCounter.INPUT_RECORDS_PROCESSED);

http://git-wip-us.apache.org/repos/asf/tez/blob/99c85d3f/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java
index 448b90c..b42ef25 100644
--- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java
+++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java
@@ -14,6 +14,7 @@
 
 package org.apache.tez.mapreduce.input;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -22,17 +23,28 @@ import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
+import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.tez.common.counters.TezCounters;
 import org.apache.tez.dag.api.DataSourceDescriptor;
+import org.apache.tez.mapreduce.hadoop.MRInputHelpers;
+import org.apache.tez.mapreduce.protos.MRRuntimeProtos;
 import org.apache.tez.runtime.api.Event;
 import org.apache.tez.runtime.api.InputContext;
+import org.apache.tez.runtime.api.events.InputDataInformationEvent;
 import org.junit.Test;
 
 public class TestMRInput {
@@ -47,6 +59,10 @@ public class TestMRInput {
     ApplicationId applicationId = ApplicationId.newInstance(1000, 1);
     doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
     doReturn(applicationId).when(inputContext).getApplicationId();
+    doReturn("dagName").when(inputContext).getDAGName();
+    doReturn("vertexName").when(inputContext).getTaskVertexName();
+    doReturn("inputName").when(inputContext).getSourceVertexName();
+    doReturn("uniqueIdentifier").when(inputContext).getUniqueIdentifier();
     doReturn(1).when(inputContext).getTaskIndex();
     doReturn(1).when(inputContext).getTaskAttemptNumber();
     doReturn(new TezCounters()).when(inputContext).getCounters();
@@ -69,4 +85,139 @@ public class TestMRInput {
       assertTrue(e instanceof IllegalStateException);
     }
   }
+
+  private static final String TEST_ATTRIBUTES_DAG_NAME = "dagName";
+  private static final String TEST_ATTRIBUTES_VERTEX_NAME = "vertexName";
+  private static final String TEST_ATTRIBUTES_INPUT_NAME = "inputName";
+  private static final ApplicationId TEST_ATTRIBUTES_APPLICATION_ID = ApplicationId.newInstance(0, 0);
+  private static final String TEST_ATTRIBUTES_UNIQUE_IDENTIFIER = "uniqueId";
+  private static final int TEST_ATTRIBUTES_DAG_INDEX = 1000;
+  private static final int TEST_ATTRIBUTES_VERTEX_INDEX = 2000;
+  private static final int TEST_ATTRIBUTES_TASK_INDEX = 3000;
+  private static final int TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX = 4000;
+  private static final int TEST_ATTRIBUTES_INPUT_INDEX = 5000;
+  private static final int TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER = 6000;
+
+  @Test(timeout = 5000)
+  public void testAttributesInJobConf() throws Exception {
+    InputContext inputContext = mock(InputContext.class);
+    doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier();
+    doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex();
+    doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex();
+    doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber();
+    doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex();
+    doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber();
+    doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName();
+    doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName();
+    doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName();
+    doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId();
+    doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier();
+
+
+    DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false),
+        TestInputFormat.class).groupSplits(false).build();
+
+    doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload();
+    doReturn(new TezCounters()).when(inputContext).getCounters();
+
+
+    MRInput mrInput = new MRInput(inputContext, 1);
+    mrInput.initialize();
+
+    MRRuntimeProtos.MRSplitProto splitProto =
+        MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName())
+            .build();
+    InputDataInformationEvent diEvent = InputDataInformationEvent
+        .createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer());
+
+    List<Event> events = new LinkedList<>();
+    events.add(diEvent);
+    mrInput.handleEvents(events);
+    assertTrue(TestInputFormat.invoked.get());
+  }
+
+
+  /**
+   * Test class to verify
+   */
+  static class TestInputFormat implements InputFormat {
+
+    private static final AtomicBoolean invoked = new AtomicBoolean(false);
+
+    @Override
+    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+      return null;
+    }
+
+    @Override
+    public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws
+        IOException {
+      assertEquals(TEST_ATTRIBUTES_DAG_NAME, MRInputHelpers.getDagName(job));
+      assertEquals(TEST_ATTRIBUTES_VERTEX_NAME, MRInputHelpers.getVertexName(job));
+      assertEquals(TEST_ATTRIBUTES_INPUT_NAME, MRInputHelpers.getInputName(job));
+      assertEquals(TEST_ATTRIBUTES_DAG_INDEX, MRInputHelpers.getDagIndex(job));
+      assertEquals(TEST_ATTRIBUTES_VERTEX_INDEX, MRInputHelpers.getVertexIndex(job));
+      assertEquals(TEST_ATTRIBUTES_APPLICATION_ID.toString(), MRInputHelpers.getApplicationIdString(job));
+      assertEquals(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER, MRInputHelpers.getUniqueIdentifier(job));
+      assertEquals(TEST_ATTRIBUTES_TASK_INDEX, MRInputHelpers.getTaskIndex(job));
+      assertEquals(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX, MRInputHelpers.getTaskAttemptIndex(job));
+      assertEquals(TEST_ATTRIBUTES_INPUT_INDEX, MRInputHelpers.getInputIndex(job));
+      assertEquals(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER, MRInputHelpers.getDagAttemptNumber(job));
+      invoked.set(true);
+      return new RecordReader() {
+        @Override
+        public boolean next(Object key, Object value) throws IOException {
+          return false;
+        }
+
+        @Override
+        public Object createKey() {
+          return null;
+        }
+
+        @Override
+        public Object createValue() {
+          return null;
+        }
+
+        @Override
+        public long getPos() throws IOException {
+          return 0;
+        }
+
+        @Override
+        public void close() throws IOException {
+
+        }
+
+        @Override
+        public float getProgress() throws IOException {
+          return 0;
+        }
+      };
+    }
+  }
+
+  public static class TestInputSplit implements InputSplit {
+
+    @Override
+    public long getLength() throws IOException {
+      return 0;
+    }
+
+    @Override
+    public String[] getLocations() throws IOException {
+      return new String[0];
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tez/blob/99c85d3f/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java
index db5643e..1733bfc 100644
--- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java
+++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java
@@ -33,6 +33,7 @@ import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Random;
+import java.util.UUID;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -312,6 +313,7 @@ public class TestMultiMRInput {
     doReturn(1).when(inputContext).getTaskAttemptNumber();
     doReturn(1).when(inputContext).getTaskIndex();
     doReturn(1).when(inputContext).getTaskVertexIndex();
+    doReturn(UUID.randomUUID().toString()).when(inputContext).getUniqueIdentifier();
     doReturn("taskVertexName").when(inputContext).getTaskVertexName();
     doReturn(UserPayload.create(ByteBuffer.wrap(payload))).when(inputContext).getUserPayload();
     return inputContext;


[03/24] tez git commit: TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED (jlowe)

Posted by sr...@apache.org.
TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED (jlowe)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/ca447ba5
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/ca447ba5
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/ca447ba5

Branch: refs/heads/TEZ-2980
Commit: ca447ba5c940d0ec8520166646695c49f2cd9dc3
Parents: 73e993c
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Jan 21 18:57:52 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Jan 21 18:57:52 2016 +0000

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../apache/tez/dag/app/dag/impl/TaskImpl.java   |  4 +-
 .../tez/dag/app/dag/impl/TestTaskImpl.java      | 73 ++++++++++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/ca447ba5/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 5d2c446..bec7dd4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED
   TEZ-2594. Fix LICENSE for missing entries for full and minimal tarballs.
   TEZ-3053. Containers timeout if they do not receive a task within the container timeout interval.
   TEZ-2898. tez tools : swimlanes.py is broken.
@@ -318,6 +319,7 @@ INCOMPATIBLE CHANGES
   TEZ-2949. Allow duplicate dag names within session for Tez.
 
 ALL CHANGES
+  TEZ-3052. Task internal error due to Invalid event: T_ATTEMPT_FAILED at FAILED
   TEZ-2937. Can Processor.close() be called after closing inputs and outputs?
   TEZ-3037. History URL should be set regardless of which history logging service is enabled.
   TEZ-3032. DAG start time getting logged using system time instead of recorded time in startTime field.

http://git-wip-us.apache.org/repos/asf/tez/blob/ca447ba5/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
index c00d674..9ec7ce8 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java
@@ -256,7 +256,9 @@ public class TaskImpl implements Task, EventHandler<TaskEvent> {
             TaskEventType.T_TERMINATE,
             TaskEventType.T_SCHEDULE,
             TaskEventType.T_ADD_SPEC_ATTEMPT,
-            TaskEventType.T_ATTEMPT_KILLED))
+            TaskEventType.T_ATTEMPT_FAILED,
+            TaskEventType.T_ATTEMPT_KILLED,
+            TaskEventType.T_ATTEMPT_SUCCEEDED))
 
     // Transitions from KILLED state
     // Ignorable event: T_ATTEMPT_KILLED

http://git-wip-us.apache.org/repos/asf/tez/blob/ca447ba5/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
index 0414c99..1274378 100644
--- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
+++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.SystemClock;
 import org.apache.tez.dag.api.TaskLocationHint;
+import org.apache.tez.dag.api.TezConfiguration;
 import org.apache.tez.dag.api.oldrecords.TaskAttemptState;
 import org.apache.tez.dag.api.oldrecords.TaskState;
 import org.apache.tez.dag.app.AppContext;
@@ -661,6 +662,78 @@ public class TestTaskImpl {
     assertTrue(mockTask.getDiagnostics().get(0).contains(TaskAttemptTerminationCause.TERMINATED_AT_SHUTDOWN.name()));
   }
 
+  @Test(timeout = 20000)
+  public void testFailedThenSpeculativeFailed() {
+    conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
+    mockTask = new MockTaskImpl(vertexId, partition,
+        eventHandler, conf, taskCommunicatorManagerInterface, clock,
+        taskHeartbeatHandler, appContext, leafVertex,
+        taskResource, containerContext, mock(Vertex.class));
+    TezTaskID taskId = getNewTaskID();
+    scheduleTaskAttempt(taskId);
+    MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt();
+    launchTaskAttempt(firstAttempt.getID());
+    updateAttemptState(firstAttempt, TaskAttemptState.RUNNING);
+
+    // Add a speculative task attempt
+    mockTask.handle(new TaskEventTAUpdate(mockTask.getLastAttempt().getID(),
+        TaskEventType.T_ADD_SPEC_ATTEMPT));
+    MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt();
+    launchTaskAttempt(specAttempt.getID());
+    updateAttemptState(specAttempt, TaskAttemptState.RUNNING);
+    assertEquals(2, mockTask.getAttemptList().size());
+
+    // Fail the first attempt
+    updateAttemptState(firstAttempt, TaskAttemptState.FAILED);
+    mockTask.handle(new TaskEventTAUpdate(firstAttempt.getID(),
+        TaskEventType.T_ATTEMPT_FAILED));
+    assertEquals(TaskState.FAILED, mockTask.getState());
+    assertEquals(2, mockTask.getAttemptList().size());
+
+    // Now fail the speculative attempt
+    updateAttemptState(specAttempt, TaskAttemptState.FAILED);
+    mockTask.handle(new TaskEventTAUpdate(specAttempt.getID(),
+        TaskEventType.T_ATTEMPT_FAILED));
+    assertEquals(TaskState.FAILED, mockTask.getState());
+    assertEquals(2, mockTask.getAttemptList().size());
+  }
+
+  @Test(timeout = 20000)
+  public void testFailedThenSpeculativeSucceeded() {
+    conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
+    mockTask = new MockTaskImpl(vertexId, partition,
+        eventHandler, conf, taskCommunicatorManagerInterface, clock,
+        taskHeartbeatHandler, appContext, leafVertex,
+        taskResource, containerContext, mock(Vertex.class));
+    TezTaskID taskId = getNewTaskID();
+    scheduleTaskAttempt(taskId);
+    MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt();
+    launchTaskAttempt(firstAttempt.getID());
+    updateAttemptState(firstAttempt, TaskAttemptState.RUNNING);
+
+    // Add a speculative task attempt
+    mockTask.handle(new TaskEventTAUpdate(mockTask.getLastAttempt().getID(),
+        TaskEventType.T_ADD_SPEC_ATTEMPT));
+    MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt();
+    launchTaskAttempt(specAttempt.getID());
+    updateAttemptState(specAttempt, TaskAttemptState.RUNNING);
+    assertEquals(2, mockTask.getAttemptList().size());
+
+    // Fail the first attempt
+    updateAttemptState(firstAttempt, TaskAttemptState.FAILED);
+    mockTask.handle(new TaskEventTAUpdate(firstAttempt.getID(),
+        TaskEventType.T_ATTEMPT_FAILED));
+    assertEquals(TaskState.FAILED, mockTask.getState());
+    assertEquals(2, mockTask.getAttemptList().size());
+
+    // Now succeed the speculative attempt
+    updateAttemptState(specAttempt, TaskAttemptState.SUCCEEDED);
+    mockTask.handle(new TaskEventTAUpdate(specAttempt.getID(),
+        TaskEventType.T_ATTEMPT_SUCCEEDED));
+    assertEquals(TaskState.FAILED, mockTask.getState());
+    assertEquals(2, mockTask.getAttemptList().size());
+  }
+
   // TODO Add test to validate the correct commit attempt.
 
 


[09/24] tez git commit: TEZ-2974. Tez tools: TFileRecordReader in tez-tools should support reading >2 GB tfiles (rbalamohan)

Posted by sr...@apache.org.
TEZ-2974. Tez tools: TFileRecordReader in tez-tools should support reading >2 GB tfiles (rbalamohan)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/72f56163
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/72f56163
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/72f56163

Branch: refs/heads/TEZ-2980
Commit: 72f561639f828f2d8a815d52460e44fe2ea56d3a
Parents: 870972d
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Mon Feb 1 16:44:11 2016 -0800
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Mon Feb 1 16:44:11 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../java/org/apache/tez/tools/TFileLoader.java  | 91 ++++++--------------
 .../org/apache/tez/tools/TFileRecordReader.java | 68 +++++++++++----
 3 files changed, 80 insertions(+), 80 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/72f56163/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a550015..6bff146 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,7 @@ Release 0.8.3: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2974. Tez tools: TFileRecordReader in tez-tools should support reading >2 GB tfiles.
   TEZ-3081. Update tez website for trademarks feedback.
   TEZ-3076. Reduce merge memory overhead to support large number of in-memory mapoutputs
   TEZ-3079. Fix tez-tfile parser documentation.

http://git-wip-us.apache.org/repos/asf/tez/blob/72f56163/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileLoader.java
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileLoader.java b/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileLoader.java
index 7fbcbf6..18e9940 100644
--- a/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileLoader.java
+++ b/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileLoader.java
@@ -19,16 +19,11 @@
 package org.apache.tez.tools;
 
 import com.google.common.base.Objects;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.pig.Expression;
@@ -36,17 +31,15 @@ import org.apache.pig.FileInputLoadFunc;
 import org.apache.pig.LoadMetadata;
 import org.apache.pig.ResourceSchema;
 import org.apache.pig.ResourceStatistics;
-import org.apache.pig.StoreFunc;
-import org.apache.pig.StoreFuncInterface;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigFileInputFormat;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.util.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.StringReader;
 import java.util.regex.Pattern;
 
 /**
@@ -57,74 +50,46 @@ public class TFileLoader extends FileInputLoadFunc implements LoadMetadata {
 
   private static final Logger LOG = LoggerFactory.getLogger(TFileLoader.class);
 
-  private TFileRecordReader recReader = null;
+  protected TFileRecordReader recReader = null;
 
-  private BufferedReader bufReader;
   private Text currentKey;
   private final TupleFactory tupleFactory = TupleFactory.getInstance();
 
   private final Pattern PATTERN = Pattern.compile(":");
 
-  /**
-   * We get one complete TFile per KV read.
-   * Add a BufferedReader so that we can scan a line at a time.
-   *
-   * @throws java.io.IOException
-   * @throws InterruptedException
-   */
-  //TODO: tasks can sometime throw OOM when single TFile is way too large. Adjust mem accordinly.
-  private void setupReader() throws IOException, InterruptedException {
-    if (recReader.nextKeyValue() && bufReader == null) {
-      currentKey = recReader.getCurrentKey();
-      Text val = recReader.getCurrentValue();
-      bufReader = new BufferedReader(new StringReader(val.toString()));
-    }
-  }
-
   @Override
   public Tuple getNext() throws IOException {
     try {
-      String line = readLine();
-      if (line != null) {
-        //machine, key, line
-        Tuple tuple = tupleFactory.newTuple(3);
-        if (currentKey != null) {
-          String[] data = PATTERN.split(currentKey.toString());
-          if (data == null || data.length != 2) {
-            LOG.warn("unable to parse " + currentKey.toString());
-            return null;
-          }
-          tuple.set(0, data[0]);
-          tuple.set(1, data[1]);
-        } else {
-          tuple.set(0, "");
-          tuple.set(1, "");
+      if (!recReader.nextKeyValue()) {
+        return null;
+      }
+
+      currentKey = recReader.getCurrentKey();
+      String line = recReader.getCurrentValue().toString();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("currentKey: " + currentKey
+                + ", line=" + line);
+      }
+      //Tuple would be of format: machine, key, line
+      Tuple tuple = tupleFactory.newTuple(3);
+      if (currentKey != null) {
+        String[] data = PATTERN.split(currentKey.toString());
+        if (data == null || data.length != 2) {
+          LOG.warn("unable to parse " + currentKey.toString());
+          return null;
         }
-        tuple.set(2, line); //line
-        return tuple;
+        tuple.set(0, data[0]);
+        tuple.set(1, data[1]);
+      } else {
+        tuple.set(0, "");
+        tuple.set(1, "");
       }
-    } catch (IOException e) {
-      return null;
+      //set the line field
+      tuple.set(2, line);
+      return tuple;
     } catch (InterruptedException e) {
       return null;
     }
-    return null;
-  }
-
-  private String readLine() throws IOException, InterruptedException {
-    String line = null;
-    if (bufReader == null) {
-      setupReader();
-    }
-    line = bufReader.readLine();
-    if (line == null) { //end of stream. Move to the next reader
-      bufReader = null;
-      setupReader();
-      if (bufReader != null) {
-        line = bufReader.readLine();
-      }
-    }
-    return line;
   }
 
   public static class TFileInputFormat extends

http://git-wip-us.apache.org/repos/asf/tez/blob/72f56163/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileRecordReader.java
----------------------------------------------------------------------
diff --git a/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileRecordReader.java b/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileRecordReader.java
index 70c0ee1..4d6c0f2 100644
--- a/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileRecordReader.java
+++ b/tez-tools/tez-tfile-parser/src/main/java/org/apache/tez/tools/TFileRecordReader.java
@@ -18,6 +18,7 @@
 
 package org.apache.tez.tools;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -32,10 +33,14 @@ import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 
+import java.io.BufferedReader;
+import java.io.EOFException;
 import java.io.IOException;
+import java.io.InputStreamReader;
 
 /**
- * Simple record reader which reads the TFile and emits it as key, value pair
+ * Simple record reader which reads the TFile and emits it as key, value pair.
+ * If value has multiple lines, read one line at a time.
  */
 public class TFileRecordReader extends RecordReader<Text, Text> {
 
@@ -43,17 +48,22 @@ public class TFileRecordReader extends RecordReader<Text, Text> {
 
   private long start, end;
 
-  private Path splitPath;
+  @VisibleForTesting
+  protected Path splitPath;
   private FSDataInputStream fin;
-  private TFile.Reader reader;
-  private TFile.Reader.Scanner scanner;
+
+  @VisibleForTesting
+  protected TFile.Reader reader;
+  @VisibleForTesting
+  protected TFile.Reader.Scanner scanner;
 
   private Text key = new Text();
   private Text value = new Text();
 
-  private BytesWritable valueBytesWritable = new BytesWritable();
   private BytesWritable keyBytesWritable = new BytesWritable();
 
+  private BufferedReader currentValueReader;
+
   @Override public void initialize(InputSplit split, TaskAttemptContext context)
       throws IOException, InterruptedException {
     FileSplit fileSplit = (FileSplit) split;
@@ -69,22 +79,46 @@ public class TFileRecordReader extends RecordReader<Text, Text> {
     scanner = reader.createScannerByByteRange(start, fileSplit.getLength());
   }
 
+  private void populateKV(TFile.Reader.Scanner.Entry entry) throws IOException {
+    entry.getKey(keyBytesWritable);
+    //splitpath contains the machine name. Create the key as splitPath + realKey
+    String keyStr = new StringBuilder()
+        .append(splitPath.getName()).append(":")
+        .append(new String(keyBytesWritable.getBytes()))
+        .toString();
+
+    /**
+     * In certain cases, values can be huge (files > 2 GB). Stream is
+     * better to handle such scenarios.
+     */
+    currentValueReader = new BufferedReader(
+        new InputStreamReader(entry.getValueStream()));
+    key.set(keyStr);
+    String line = currentValueReader.readLine();
+    value.set((line == null) ? "" : line);
+  }
+
   @Override public boolean nextKeyValue() throws IOException, InterruptedException {
-    valueBytesWritable.setSize(0);
-    if (!scanner.advance()) {
+    if (currentValueReader != null) {
+      //Still at the old entry reading line by line
+      String line = currentValueReader.readLine();
+      if (line != null) {
+        value.set(line);
+        return true;
+      } else {
+        //Read through all lines in the large value stream. Move to next KV.
+        scanner.advance();
+      }
+    }
+
+    try {
+      populateKV(scanner.entry());
+      return true;
+    } catch(EOFException eofException) {
+      key = null;
       value = null;
       return false;
     }
-    TFile.Reader.Scanner.Entry entry = scanner.entry();
-    //populate key, value
-    entry.getKey(keyBytesWritable);
-    StringBuilder k = new StringBuilder();
-    //split path contains the machine name. Create the key as splitPath + realKey
-    k.append(splitPath.getName()).append(":").append(new String(keyBytesWritable.getBytes()));
-    key.set(k.toString());
-    entry.getValue(valueBytesWritable);
-    value.set(valueBytesWritable.getBytes());
-    return true;
   }
 
   @Override public Text getCurrentKey() throws IOException, InterruptedException {